kyuubi/build/dist
Kent Yao 74abe8510c
[KYUUBI #1950] Remove ambiguous SPARK_HADOOP_VERSION
<!--
Thanks for sending a pull request!

Here are some tips for you:
  1. If this is your first time, please read our contributor guidelines: https://kyuubi.readthedocs.io/en/latest/community/contributions.html
  2. If the PR is related to an issue in https://github.com/apache/incubator-kyuubi/issues, add '[KYUUBI #XXXX]' in your PR title, e.g., '[KYUUBI #XXXX] Your PR title ...'.
  3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP][KYUUBI #XXXX] Your PR title ...'.
-->

### _Why are the changes needed?_
<!--
Please clarify why the changes are needed. For instance,
  1. If you add a feature, you can talk about the use case of it.
  2. If you fix a bug, you can clarify why it is a bug.
-->

The original idea of SPARK_HADOOP_VERSION is used to concat spark release names only, now we need to remove it as
- SPARK_HADOOP_VERSION is misunderstood by developers and misused somewhere like the one of kyuubi compiled
- multi-engine support now
- the release names  of spark(or something else) are very easy to get through code with different environments, prod/test/dev
- A `mvn` job is bundled with `bin/load-kyuubi-env.sh` which is truly worrisome
- SPARK_HADOOP_VERSION on spark side hass broken already for spark 3.2 which actually bundled with hadoop 3.3, see https://github.com/apache/spark-website/pull/361#discussion_r730716668

### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible

- [ ] Add screenshots for manual tests if appropriate

- [x] [Run test](https://kyuubi.apache.org/docs/latest/develop_tools/testing.html#running-tests) locally before make a pull request

Closes #1950 from yaooqinn/hadoop.

Closes #1950

b47be7c6 [Kent Yao] Remove ambiguous SPARK_HADOOP_VERSION
3b33ee56 [Kent Yao] Remove ambiguous SPARK_HADOOP_VERSION

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
2022-02-22 11:26:38 +08:00

303 lines
9.7 KiB
Bash
Executable File

#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Script to create a binary distribution for easy deploys of Spark.
# The distribution directory defaults to dist/ but can be overridden below.
# The distribution contains fat (assembly) jars that include the Scala library,
# so it is completely self contained.
# It does not contain source or *.class files.
set -o pipefail
set -e
KYUUBI_HOME="$(cd "`dirname "$0"`/.."; pwd)"
DISTDIR="$KYUUBI_HOME/dist"
MAKE_TGZ=false
FLINK_PROVIDED=false
SPARK_PROVIDED=false
NAME=none
MVN="$KYUUBI_HOME/build/mvn"
function usage {
set +x
echo "./build/dist - Tool for making binary distributions of Kyuubi"
echo ""
echo "Usage:"
echo "+--------------------------------------------------------------------------------------+"
echo "| ./build/dist [--name <custom_name>] [--tgz] [--flink-provided] [--spark-provided] |"
echo "| [--mvn <maven_executable>] <maven build options> |"
echo "+--------------------------------------------------------------------------------------+"
echo "name: - custom binary name, using project version if undefined"
echo "tgz: - whether to make a whole bundled package"
echo "flink-provided: - whether to make a package without Flink binary"
echo "spark-provided: - whether to make a package without Spark binary"
echo "mvn: - external maven executable location"
echo ""
}
function exit_with_usage {
usage
exit 1
}
# Parse arguments
while (( "$#" )); do
case $1 in
--tgz)
MAKE_TGZ=true
;;
--flink-provided)
FLINK_PROVIDED=true
;;
--spark-provided)
SPARK_PROVIDED=true
;;
--mvn)
MVN="$2"
shift
;;
--name)
NAME="$2"
shift
;;
--help)
exit_with_usage
;;
--*)
echo "Error: $1 is not supported"
exit_with_usage
;;
-*)
break
;;
*)
echo "Error: $1 is not supported"
exit_with_usage
;;
esac
shift
done
# Setup java
if [[ -z "$JAVA_HOME" ]]; then
if [[ `command -v java` ]]; then
# If java is in /usr/bin/java, we want /usr
JAVA_HOME="$(dirname $(dirname $(which java)))"
fi
fi
if [[ -z "$JAVA_HOME" ]]; then
echo "Error: JAVA_HOME is not set, cannot proceed."
exit -1
fi
echo "JAVA_HOME is set to $JAVA_HOME"
if [[ $(command -v git) ]]; then
GITREV=$(git rev-parse --short HEAD 2>/dev/null || :)
if [[ ! -z "$GITREV" ]]; then
GITREVSTRING="(git revision $GITREV)"
fi
unset GITREV
fi
if [ ! "$(command -v "$MVN")" ] ; then
echo -e "Could not locate Maven command: '$MVN'."
echo -e "Specify the Maven command with the --mvn flag"
exit -1;
fi
echo "MVN is set to $MVN"
VERSION=$("$MVN" help:evaluate -Dexpression=project.version $@ 2>/dev/null\
| grep -v "INFO"\
| grep -v "WARNING"\
| tail -n 1)
JAVA_VERSION=$("$MVN" help:evaluate -Dexpression=java.version $@ 2>/dev/null\
| grep -v "INFO"\
| grep -v "WARNING"\
| tail -n 1)
SCALA_VERSION=$("$MVN" help:evaluate -Dexpression=scala.binary.version $@ 2>/dev/null\
| grep -v "INFO"\
| grep -v "WARNING"\
| tail -n 1)
FLINK_VERSION=$("$MVN" help:evaluate -Dexpression=flink.version $@ 2>/dev/null\
| grep -v "INFO"\
| grep -v "WARNING"\
| tail -n 1)
SPARK_VERSION=$("$MVN" help:evaluate -Dexpression=spark.version $@ 2>/dev/null\
| grep -v "INFO"\
| grep -v "WARNING"\
| tail -n 1)
HADOOP_VERSION=$("$MVN" help:evaluate -Dexpression=hadoop.version $@ 2>/dev/null\
| grep -v "INFO"\
| grep -v "WARNING"\
| tail -n 1)
HIVE_VERSION=$("$MVN" help:evaluate -Dexpression=hive.version $@ 2>/dev/null\
| grep -v "INFO"\
| grep -v "WARNING"\
| tail -n 1)
echo "Building Kyuubi package of version $VERSION against Flink $FLINK_VERSION, Spark $SPARK_VERSION"
SUFFIX="-$NAME"
if [[ "$NAME" == "none" ]]; then
if [[ "$SPARK_PROVIDED" == "true" ]]; then
SUFFIX=""
else
SUFFIX="-spark-${SPARK_VERSION:0:3}"
fi
fi
if [[ "$MAKE_TGZ" == "true" ]]; then
echo "Making apache-kyuubi-$VERSION-bin$SUFFIX.tgz"
else
echo "Making distribution for Kyuubi $VERSION in '$DISTDIR'..."
fi
MVN_DIST_OPT="-DskipTests"
if [[ "$SPARK_PROVIDED" == "true" ]]; then
MVN_DIST_OPT="$MVN_DIST_OPT -Pspark-provided"
fi
if [[ "$FLINK_PROVIDED" == "true" ]]; then
MVN_DIST_OPT="$MVN_DIST_OPT -Pflink-provided"
fi
export MAVEN_OPTS="${MAVEN_OPTS:--Xmx2g}"
BUILD_COMMAND=("$MVN" clean install $MVN_DIST_OPT $@)
echo -e "\nBuilding with..."
echo -e "\$ ${BUILD_COMMAND[@]}\n"
"${BUILD_COMMAND[@]}"
# Make directories
rm -rf "$DISTDIR"
mkdir -p "$DISTDIR/pid"
mkdir -p "$DISTDIR/logs"
mkdir -p "$DISTDIR/work"
mkdir -p "$DISTDIR/externals/engines/flink"
mkdir -p "$DISTDIR/externals/engines/flink/lib"
mkdir -p "$DISTDIR/externals/engines/spark"
mkdir -p "$DISTDIR/externals/engines/trino"
mkdir -p "$DISTDIR/beeline-jars"
echo "Kyuubi $VERSION $GITREVSTRING built for" > "$DISTDIR/RELEASE"
echo "Java $JAVA_VERSION" >> "$DISTDIR/RELEASE"
echo "Scala $SCALA_VERSION" >> "$DISTDIR/RELEASE"
echo "Flink $FLINK_VERSION" >> "$DISTDIR/RELEASE"
echo "Spark $SPARK_VERSION" >> "$DISTDIR/RELEASE"
echo "Kyuubi Hadoop $HADOOP_VERSION" >> "$DISTDIR/RELEASE"
echo "Hive $HIVE_VERSION" >> "$DISTDIR/RELEASE"
echo "Build flags: $@" >> "$DISTDIR/RELEASE"
# Copy kyuubi server jars
cp -r "$KYUUBI_HOME/kyuubi-assembly/target/scala-$SCALA_VERSION/jars/" "$DISTDIR/jars"
# Copy kyuubi beeline jars
cp "$KYUUBI_HOME"/kyuubi-hive-beeline/target/*.jar "$DISTDIR/beeline-jars/"
# Copy kyuubi trino client jars
cp -r "$KYUUBI_HOME/externals/kyuubi-trino-engine/target/scala-$SCALA_VERSION/jars/" "$DISTDIR/externals/engines/trino/jars"
# Share the jars between server and beeline to reduce binary size
cd $DISTDIR/beeline-jars
for jar in $(ls "$DISTDIR/jars/"); do
if [[ -f "$DISTDIR/beeline-jars/$jar" ]]; then
rm "$DISTDIR/beeline-jars/$jar"
ln -sn "../jars/$jar" "$DISTDIR/beeline-jars/$jar"
fi
done
cd -
# Copy flink engines
cp -r "$KYUUBI_HOME/externals/kyuubi-flink-sql-engine/bin/" "$DISTDIR/externals/engines/flink/bin/"
chmod a+x "$DISTDIR/externals/engines/flink/bin/flink-sql-engine.sh"
cp "$KYUUBI_HOME/externals/kyuubi-flink-sql-engine/target/kyuubi-flink-sql-engine_${SCALA_VERSION}-${VERSION}.jar" "$DISTDIR/externals/engines/flink/lib"
# Copy spark engines
cp "$KYUUBI_HOME/externals/kyuubi-spark-sql-engine/target/kyuubi-spark-sql-engine_${SCALA_VERSION}-${VERSION}.jar" "$DISTDIR/externals/engines/spark"
# Copy trino engines
cp -r "$KYUUBI_HOME/externals/kyuubi-trino-engine/bin/" "$DISTDIR/externals/engines/trino/bin/"
chmod a+x "$DISTDIR/externals/engines/trino/bin/trino-engine.sh"
cp "$KYUUBI_HOME/externals/kyuubi-trino-engine/target/kyuubi-trino-engine_${SCALA_VERSION}-${VERSION}.jar" "$DISTDIR/externals/engines/trino/jars"
# Copy kyuubi tools
if [[ -f "$KYUUBI_HOME/tools/spark-block-cleaner/target/spark-block-cleaner_${SCALA_VERSION}-${VERSION}.jar" ]]; then
mkdir -p "$DISTDIR/tools/spark-block-cleaner/kubernetes"
mkdir -p "$DISTDIR/tools/spark-block-cleaner/jars"
cp -r "$KYUUBI_HOME/tools/spark-block-cleaner/kubernetes/" "$DISTDIR/tools/spark-block-cleaner/kubernetes/"
cp "$KYUUBI_HOME/tools/spark-block-cleaner/target/spark-block-cleaner_${SCALA_VERSION}-${VERSION}.jar" "$DISTDIR/tools/spark-block-cleaner/jars/"
fi
# Copy Kyuubi extension
SPARK_EXTENSION_VERSIONS=('3-1' '3-2')
for SPARK_EXTENSION_VERSION in ${SPARK_EXTENSION_VERSIONS[@]}; do
if [[ -f $"$KYUUBI_HOME/dev/kyuubi-extension-spark-$SPARK_EXTENSION_VERSION/target/kyuubi-extension-spark-${SPARK_EXTENSION_VERSION}_${SCALA_VERSION}-${VERSION}.jar" ]]; then
mkdir -p "$DISTDIR/extension"
cp "$KYUUBI_HOME/dev/kyuubi-extension-spark-$SPARK_EXTENSION_VERSION/target/kyuubi-extension-spark-${SPARK_EXTENSION_VERSION}_${SCALA_VERSION}-${VERSION}.jar" "$DISTDIR/extension"
fi
done
if [[ "$FLINK_PROVIDED" != "true" ]]; then
# Copy flink binary dist
FLINK_BUILTIN="$(find "$KYUUBI_HOME/externals/kyuubi-download/target" -name 'flink-*' -type d)"
cp -r "$FLINK_BUILTIN" "$DISTDIR/externals/"
fi
if [[ "$SPARK_PROVIDED" != "true" ]]; then
# Copy spark binary dist
SPARK_BUILTIN="$(find "$KYUUBI_HOME/externals/kyuubi-download/target" -name 'spark-*' -type d)"
cp -r "$SPARK_BUILTIN" "$DISTDIR/externals/"
fi
# Copy license files
cp "$KYUUBI_HOME/DISCLAIMER" "$DISTDIR/DISCLAIMER"
if [[ -f $"$KYUUBI_HOME/LICENSE-binary" ]]; then
cp "$KYUUBI_HOME/LICENSE-binary" "$DISTDIR/LICENSE"
cp -r "$KYUUBI_HOME/licenses-binary" "$DISTDIR/licenses"
$KYUUBI_HOME/build/release/collect-licenses.sh "$DISTDIR" "$DISTDIR"
fi
cp -r "$KYUUBI_HOME/bin" "$DISTDIR"
cp -r "$KYUUBI_HOME/conf" "$DISTDIR"
cp -r "$KYUUBI_HOME/docker" "$DISTDIR"
if [[ "$MAKE_TGZ" == "true" ]]; then
TARDIR_NAME="apache-kyuubi-$VERSION-bin$SUFFIX"
TARDIR="$KYUUBI_HOME/$TARDIR_NAME"
rm -rf "$TARDIR"
cp -R "$DISTDIR" "$TARDIR"
tar czf "$TARDIR_NAME.tgz" -C "$KYUUBI_HOME" "$TARDIR_NAME"
rm -rf "$TARDIR"
echo "The Kyuubi tarball $TARDIR_NAME.tgz is successfully generated in $KYUUBI_HOME."
fi