### _Why are the changes needed?_ Add Kyuubi tools cache-file-cleaner on kubernetes. This tools help to start daemonSet on Kubernetes to clean shuffle file and spark cache file. By config some envs, cache-file-cleaner will help to check some old file(which depending on configuration) and delete them for free dist. And after first cleaning, if the free capacity lower than threshold, cache file cleaner will start deep clean to help free dist. With the cache-file-cleaner, add Dockerfile, entrypoint and daemonSet.yaml to help user to use this tool. Using threadPool to help delete shuffle files. ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/latest/tools/testing.html#running-tests) locally before make a pull request Closes #768 from zwangsheng/shuffle-cleaner. Closes #768 3e3d5c65 [Binjie Yang] tini 2f7eab6e [Binjie Yang] dist d19b9d0c [Binjie Yang] dist 3c8b7828 [Binjie Yang] dist 21bf5b4d [Binjie Yang] big change 4d803504 [Binjie Yang] rename to spark-block-cleaner & fix bug ba6515d0 [Binjie Yang] do some change 1084feaa [Binjie Yang] delete empty dir ada682da [Binjie Yang] big change aabbf9ba [Binjie Yang] bugfix a504448a [Binjie Yang] big change 0e0d5811 [Binjie Yang] modify 262fe314 [Binjie Yang] pom ad9c533c [Binjie Yang] dist 0364f6e9 [Binjie Yang] pom 6fbb5cae [Binjie Yang] dist 797bcc39 [Binjie Yang] shuffle cleaner Authored-by: Binjie Yang <2213335496@qq.com> Signed-off-by: ulysses-you <ulyssesyou18@gmail.com>
234 lines
7.1 KiB
Bash
Executable File
234 lines
7.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
#
|
|
# Script to create a binary distribution for easy deploys of Spark.
|
|
# The distribution directory defaults to dist/ but can be overridden below.
|
|
# The distribution contains fat (assembly) jars that include the Scala library,
|
|
# so it is completely self contained.
|
|
# It does not contain source or *.class files.
|
|
|
|
set -o pipefail
|
|
set -e
|
|
set -x
|
|
|
|
KYUUBI_HOME="$(cd "`dirname "$0"`/.."; pwd)"
|
|
DISTDIR="$KYUUBI_HOME/dist"
|
|
MAKE_TGZ=false
|
|
SPARK_PROVIDED=false
|
|
NAME=none
|
|
MVN="$KYUUBI_HOME/build/mvn"
|
|
|
|
function usage {
|
|
set +x
|
|
echo "./build/dist - Tool for making binary distributions of Kyuubi"
|
|
echo ""
|
|
echo "Usage:"
|
|
echo "+--------------------------------------------------------------------------------------+"
|
|
echo "| ./build/dist [--name <custom_name>] [--tgz] [--spark-provided] <maven build options> |"
|
|
echo "+--------------------------------------------------------------------------------------+"
|
|
echo "name: - custom binary name, using project version if undefined"
|
|
echo "tgz: - whether to make a whole bundled package"
|
|
echo "spark-provided: - whether to make a package without Spark binary"
|
|
echo ""
|
|
}
|
|
|
|
function exit_with_usage {
|
|
usage
|
|
exit 1
|
|
}
|
|
|
|
|
|
# Parse arguments
|
|
while (( "$#" )); do
|
|
case $1 in
|
|
--tgz)
|
|
MAKE_TGZ=true
|
|
;;
|
|
--spark-provided)
|
|
SPARK_PROVIDED=true
|
|
;;
|
|
--name)
|
|
NAME="$2"
|
|
shift
|
|
;;
|
|
--help)
|
|
exit_with_usage
|
|
;;
|
|
--*)
|
|
echo "Error: $1 is not supported"
|
|
exit_with_usage
|
|
;;
|
|
-*)
|
|
break
|
|
;;
|
|
*)
|
|
echo "Error: $1 is not supported"
|
|
exit_with_usage
|
|
;;
|
|
esac
|
|
shift
|
|
done
|
|
# Setup java
|
|
if [[ -z "$JAVA_HOME" ]]; then
|
|
if [[ `command -v java` ]]; then
|
|
# If java is in /usr/bin/java, we want /usr
|
|
JAVA_HOME="$(dirname $(dirname $(which java)))"
|
|
fi
|
|
fi
|
|
|
|
if [[ -z "$JAVA_HOME" ]]; then
|
|
echo "Error: JAVA_HOME is not set, cannot proceed."
|
|
exit -1
|
|
fi
|
|
|
|
echo "JAVA_HOME is set to $JAVA_HOME"
|
|
|
|
if [[ $(command -v git) ]]; then
|
|
GITREV=$(git rev-parse --short HEAD 2>/dev/null || :)
|
|
if [[ ! -z "$GITREV" ]]; then
|
|
GITREVSTRING="(git revision $GITREV)"
|
|
fi
|
|
unset GITREV
|
|
fi
|
|
|
|
VERSION=$("$MVN" help:evaluate -Dexpression=project.version $@ 2>/dev/null\
|
|
| grep -v "INFO"\
|
|
| grep -v "WARNING"\
|
|
| tail -n 1)
|
|
|
|
JAVA_VERSION=$("$MVN" help:evaluate -Dexpression=java.version $@ 2>/dev/null\
|
|
| grep -v "INFO"\
|
|
| grep -v "WARNING"\
|
|
| tail -n 1)
|
|
|
|
SCALA_VERSION=$("$MVN" help:evaluate -Dexpression=scala.binary.version $@ 2>/dev/null\
|
|
| grep -v "INFO"\
|
|
| grep -v "WARNING"\
|
|
| tail -n 1)
|
|
|
|
SPARK_VERSION=$("$MVN" help:evaluate -Dexpression=spark.version $@ 2>/dev/null\
|
|
| grep -v "INFO"\
|
|
| grep -v "WARNING"\
|
|
| tail -n 1)
|
|
|
|
SPARK_HADOOP_VERSION=$("$MVN" help:evaluate -Dexpression=hadoop.binary.version $@ 2>/dev/null\
|
|
| grep -v "INFO"\
|
|
| grep -v "WARNING"\
|
|
| tail -n 1)
|
|
|
|
HADOOP_VERSION=$("$MVN" help:evaluate -Dexpression=hadoop.version $@ 2>/dev/null\
|
|
| grep -v "INFO"\
|
|
| grep -v "WARNING"\
|
|
| tail -n 1)
|
|
|
|
HIVE_VERSION=$("$MVN" help:evaluate -Dexpression=hive.version $@ 2>/dev/null\
|
|
| grep -v "INFO"\
|
|
| grep -v "WARNING"\
|
|
| tail -n 1)
|
|
|
|
echo "Building Kyuubi package of version $VERSION against Spark version - $SPARK_VERSION"
|
|
|
|
if [[ "$NAME" == "none" ]]; then
|
|
SPARK_HADOOP_VERSION_SUFFIX="-hadoop${SPARK_HADOOP_VERSION}"
|
|
|
|
if [[ "$SPARK_PROVIDED" == "true" ]]; then
|
|
NAME="without-spark"
|
|
else
|
|
NAME="spark-"${SPARK_VERSION:0:3}$SPARK_HADOOP_VERSION_SUFFIX
|
|
fi
|
|
fi
|
|
|
|
if [[ "$MAKE_TGZ" == "true" ]]; then
|
|
echo "Making kyuubi-$VERSION-bin-$NAME.tar.gz"
|
|
else
|
|
echo "Making distribution for Kyuubi $VERSION named $NAME in '$DISTDIR'..."
|
|
fi
|
|
|
|
MVN_DIST_OPT="-DskipTests"
|
|
if [[ "$SPARK_PROVIDED" == "true" ]]; then
|
|
MVN_DIST_OPT="$MVN_DIST_OPT -Pspark-provided"
|
|
fi
|
|
|
|
BUILD_COMMAND=("$MVN" -T 1C clean package $MVN_DIST_OPT $@)
|
|
|
|
echo -e "\nBuilding with..."
|
|
echo -e "\$ ${BUILD_COMMAND[@]}\n"
|
|
|
|
"${BUILD_COMMAND[@]}"
|
|
|
|
# Make directories
|
|
rm -rf "$DISTDIR"
|
|
mkdir -p "$DISTDIR/pid"
|
|
mkdir -p "$DISTDIR/logs"
|
|
mkdir -p "$DISTDIR/work"
|
|
mkdir -p "$DISTDIR/externals/engines/spark"
|
|
mkdir -p "$DISTDIR/tools/kubernetes/docker/spark-block-cleaner/jars"
|
|
echo "Kyuubi $VERSION $GITREVSTRING built for" > "$DISTDIR/RELEASE"
|
|
echo "Java $JAVA_VERSION" >> "$DISTDIR/RELEASE"
|
|
echo "Scala $SCALA_VERSION" >> "$DISTDIR/RELEASE"
|
|
echo "Spark $SPARK_VERSION" >> "$DISTDIR/RELEASE"
|
|
echo "Hadoop $SPARK_HADOOP_VERSION" >> "$DISTDIR/RELEASE"
|
|
echo "Hive $HIVE_VERSION" >> "$DISTDIR/RELEASE"
|
|
echo "Build flags: $@" >> "$DISTDIR/RELEASE"
|
|
|
|
# Copy kyuubi server jars
|
|
cp -r "$KYUUBI_HOME/kyuubi-assembly/target/scala-$SCALA_VERSION/jars/" "$DISTDIR/jars"
|
|
|
|
# Copy spark engines
|
|
cp "$KYUUBI_HOME/externals/kyuubi-spark-sql-engine/target/kyuubi-spark-sql-engine-$VERSION.jar" "$DISTDIR/externals/engines/spark"
|
|
|
|
# Copy kyuubi tools
|
|
cp -r "$KYUUBI_HOME/tools/kubernetes/docker/" "$DISTDIR/tools/kubernetes"
|
|
cp -r "$KYUUBI_HOME/kyuubi-assembly/target/scala-$SCALA_VERSION/jars/" "$DISTDIR/tools/kubernetes/docker/spark-block-cleaner/"
|
|
cp "$KYUUBI_HOME/tools/kubernetes/spark-block-cleaner/target/spark-block-cleaner-$VERSION.jar" "$DISTDIR/tools/kubernetes/docker/spark-block-cleaner/jars"
|
|
|
|
# Copy Kyuubi extension
|
|
SPARK_MID_VERSION=${SPARK_VERSION%.*}
|
|
if [[ -f $"$KYUUBI_HOME/dev/kyuubi-extension-spark_$SPARK_MID_VERSION/target/kyuubi-extension-spark_$SPARK_MID_VERSION-$VERSION.jar" ]]; then
|
|
mkdir -p "$DISTDIR/extension"
|
|
cp "$KYUUBI_HOME/dev/kyuubi-extension-spark_$SPARK_MID_VERSION/target/kyuubi-extension-spark_$SPARK_MID_VERSION-$VERSION.jar" "$DISTDIR/extension"
|
|
fi
|
|
|
|
if [[ "$SPARK_PROVIDED" != "true" ]]; then
|
|
# Copy spark binary dist
|
|
cp -r "$KYUUBI_HOME/externals/kyuubi-download/target/spark-$SPARK_VERSION-bin-hadoop${SPARK_HADOOP_VERSION}$HIVE_VERSION_SUFFIX/" \
|
|
"$DISTDIR/externals/spark-$SPARK_VERSION-bin-hadoop${SPARK_HADOOP_VERSION}$HIVE_VERSION_SUFFIX/"
|
|
fi
|
|
|
|
# Copy license files
|
|
if [ -e "$KYUUBI_HOME/LICENSE-binary" ]; then
|
|
cp "$KYUUBI_HOME/LICENSE-binary" "$DISTDIR/LICENSE"
|
|
cp -r "$KYUUBI_HOME/licenses-binary" "$DISTDIR/licenses"
|
|
else
|
|
echo "Skipping copying LICENSE files"
|
|
fi
|
|
|
|
cp -r "$KYUUBI_HOME/bin" "$DISTDIR"
|
|
cp -r "$KYUUBI_HOME/conf" "$DISTDIR"
|
|
|
|
if [[ "$MAKE_TGZ" == "true" ]]; then
|
|
TARDIR_NAME=kyuubi-$VERSION-bin-$NAME
|
|
TARDIR="$KYUUBI_HOME/$TARDIR_NAME"
|
|
rm -rf "$TARDIR"
|
|
cp -r "$DISTDIR" "$TARDIR"
|
|
tar czf "$TARDIR_NAME.tar.gz" -C "$KYUUBI_HOME" "$TARDIR_NAME"
|
|
rm -rf "$TARDIR"
|
|
fi
|