kyuubi/build/dist
Binjie Yang 5e53748bb5 [KYUUBI #768] [K8s] [Tool] tools shuffle-files-cleaner
### _Why are the changes needed?_
Add Kyuubi tools cache-file-cleaner on kubernetes.
This tools help to start daemonSet on Kubernetes to clean shuffle file and spark cache file.
By config some envs, cache-file-cleaner will help to check some old file(which depending on configuration) and delete them for free dist.
And after first cleaning, if the free capacity lower than threshold, cache file cleaner will start deep clean to help free dist.
With the cache-file-cleaner, add Dockerfile, entrypoint and daemonSet.yaml to help user to use this tool.
Using threadPool to help delete shuffle files.

### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible

- [ ] Add screenshots for manual tests if appropriate

- [x] [Run test](https://kyuubi.readthedocs.io/en/latest/tools/testing.html#running-tests) locally before make a pull request

Closes #768 from zwangsheng/shuffle-cleaner.

Closes #768

3e3d5c65 [Binjie Yang] tini
2f7eab6e [Binjie Yang] dist
d19b9d0c [Binjie Yang] dist
3c8b7828 [Binjie Yang] dist
21bf5b4d [Binjie Yang] big change
4d803504 [Binjie Yang] rename to spark-block-cleaner & fix bug
ba6515d0 [Binjie Yang] do some change
1084feaa [Binjie Yang] delete empty dir
ada682da [Binjie Yang] big change
aabbf9ba [Binjie Yang] bugfix
a504448a [Binjie Yang] big change
0e0d5811 [Binjie Yang] modify
262fe314 [Binjie Yang] pom
ad9c533c [Binjie Yang] dist
0364f6e9 [Binjie Yang] pom
6fbb5cae [Binjie Yang] dist
797bcc39 [Binjie Yang] shuffle cleaner

Authored-by: Binjie Yang <2213335496@qq.com>
Signed-off-by: ulysses-you <ulyssesyou18@gmail.com>
2021-07-12 15:56:10 +08:00

234 lines
7.1 KiB
Bash
Executable File

#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Script to create a binary distribution for easy deploys of Spark.
# The distribution directory defaults to dist/ but can be overridden below.
# The distribution contains fat (assembly) jars that include the Scala library,
# so it is completely self contained.
# It does not contain source or *.class files.
set -o pipefail
set -e
set -x
KYUUBI_HOME="$(cd "`dirname "$0"`/.."; pwd)"
DISTDIR="$KYUUBI_HOME/dist"
MAKE_TGZ=false
SPARK_PROVIDED=false
NAME=none
MVN="$KYUUBI_HOME/build/mvn"
function usage {
set +x
echo "./build/dist - Tool for making binary distributions of Kyuubi"
echo ""
echo "Usage:"
echo "+--------------------------------------------------------------------------------------+"
echo "| ./build/dist [--name <custom_name>] [--tgz] [--spark-provided] <maven build options> |"
echo "+--------------------------------------------------------------------------------------+"
echo "name: - custom binary name, using project version if undefined"
echo "tgz: - whether to make a whole bundled package"
echo "spark-provided: - whether to make a package without Spark binary"
echo ""
}
function exit_with_usage {
usage
exit 1
}
# Parse arguments
while (( "$#" )); do
case $1 in
--tgz)
MAKE_TGZ=true
;;
--spark-provided)
SPARK_PROVIDED=true
;;
--name)
NAME="$2"
shift
;;
--help)
exit_with_usage
;;
--*)
echo "Error: $1 is not supported"
exit_with_usage
;;
-*)
break
;;
*)
echo "Error: $1 is not supported"
exit_with_usage
;;
esac
shift
done
# Setup java
if [[ -z "$JAVA_HOME" ]]; then
if [[ `command -v java` ]]; then
# If java is in /usr/bin/java, we want /usr
JAVA_HOME="$(dirname $(dirname $(which java)))"
fi
fi
if [[ -z "$JAVA_HOME" ]]; then
echo "Error: JAVA_HOME is not set, cannot proceed."
exit -1
fi
echo "JAVA_HOME is set to $JAVA_HOME"
if [[ $(command -v git) ]]; then
GITREV=$(git rev-parse --short HEAD 2>/dev/null || :)
if [[ ! -z "$GITREV" ]]; then
GITREVSTRING="(git revision $GITREV)"
fi
unset GITREV
fi
VERSION=$("$MVN" help:evaluate -Dexpression=project.version $@ 2>/dev/null\
| grep -v "INFO"\
| grep -v "WARNING"\
| tail -n 1)
JAVA_VERSION=$("$MVN" help:evaluate -Dexpression=java.version $@ 2>/dev/null\
| grep -v "INFO"\
| grep -v "WARNING"\
| tail -n 1)
SCALA_VERSION=$("$MVN" help:evaluate -Dexpression=scala.binary.version $@ 2>/dev/null\
| grep -v "INFO"\
| grep -v "WARNING"\
| tail -n 1)
SPARK_VERSION=$("$MVN" help:evaluate -Dexpression=spark.version $@ 2>/dev/null\
| grep -v "INFO"\
| grep -v "WARNING"\
| tail -n 1)
SPARK_HADOOP_VERSION=$("$MVN" help:evaluate -Dexpression=hadoop.binary.version $@ 2>/dev/null\
| grep -v "INFO"\
| grep -v "WARNING"\
| tail -n 1)
HADOOP_VERSION=$("$MVN" help:evaluate -Dexpression=hadoop.version $@ 2>/dev/null\
| grep -v "INFO"\
| grep -v "WARNING"\
| tail -n 1)
HIVE_VERSION=$("$MVN" help:evaluate -Dexpression=hive.version $@ 2>/dev/null\
| grep -v "INFO"\
| grep -v "WARNING"\
| tail -n 1)
echo "Building Kyuubi package of version $VERSION against Spark version - $SPARK_VERSION"
if [[ "$NAME" == "none" ]]; then
SPARK_HADOOP_VERSION_SUFFIX="-hadoop${SPARK_HADOOP_VERSION}"
if [[ "$SPARK_PROVIDED" == "true" ]]; then
NAME="without-spark"
else
NAME="spark-"${SPARK_VERSION:0:3}$SPARK_HADOOP_VERSION_SUFFIX
fi
fi
if [[ "$MAKE_TGZ" == "true" ]]; then
echo "Making kyuubi-$VERSION-bin-$NAME.tar.gz"
else
echo "Making distribution for Kyuubi $VERSION named $NAME in '$DISTDIR'..."
fi
MVN_DIST_OPT="-DskipTests"
if [[ "$SPARK_PROVIDED" == "true" ]]; then
MVN_DIST_OPT="$MVN_DIST_OPT -Pspark-provided"
fi
BUILD_COMMAND=("$MVN" -T 1C clean package $MVN_DIST_OPT $@)
echo -e "\nBuilding with..."
echo -e "\$ ${BUILD_COMMAND[@]}\n"
"${BUILD_COMMAND[@]}"
# Make directories
rm -rf "$DISTDIR"
mkdir -p "$DISTDIR/pid"
mkdir -p "$DISTDIR/logs"
mkdir -p "$DISTDIR/work"
mkdir -p "$DISTDIR/externals/engines/spark"
mkdir -p "$DISTDIR/tools/kubernetes/docker/spark-block-cleaner/jars"
echo "Kyuubi $VERSION $GITREVSTRING built for" > "$DISTDIR/RELEASE"
echo "Java $JAVA_VERSION" >> "$DISTDIR/RELEASE"
echo "Scala $SCALA_VERSION" >> "$DISTDIR/RELEASE"
echo "Spark $SPARK_VERSION" >> "$DISTDIR/RELEASE"
echo "Hadoop $SPARK_HADOOP_VERSION" >> "$DISTDIR/RELEASE"
echo "Hive $HIVE_VERSION" >> "$DISTDIR/RELEASE"
echo "Build flags: $@" >> "$DISTDIR/RELEASE"
# Copy kyuubi server jars
cp -r "$KYUUBI_HOME/kyuubi-assembly/target/scala-$SCALA_VERSION/jars/" "$DISTDIR/jars"
# Copy spark engines
cp "$KYUUBI_HOME/externals/kyuubi-spark-sql-engine/target/kyuubi-spark-sql-engine-$VERSION.jar" "$DISTDIR/externals/engines/spark"
# Copy kyuubi tools
cp -r "$KYUUBI_HOME/tools/kubernetes/docker/" "$DISTDIR/tools/kubernetes"
cp -r "$KYUUBI_HOME/kyuubi-assembly/target/scala-$SCALA_VERSION/jars/" "$DISTDIR/tools/kubernetes/docker/spark-block-cleaner/"
cp "$KYUUBI_HOME/tools/kubernetes/spark-block-cleaner/target/spark-block-cleaner-$VERSION.jar" "$DISTDIR/tools/kubernetes/docker/spark-block-cleaner/jars"
# Copy Kyuubi extension
SPARK_MID_VERSION=${SPARK_VERSION%.*}
if [[ -f $"$KYUUBI_HOME/dev/kyuubi-extension-spark_$SPARK_MID_VERSION/target/kyuubi-extension-spark_$SPARK_MID_VERSION-$VERSION.jar" ]]; then
mkdir -p "$DISTDIR/extension"
cp "$KYUUBI_HOME/dev/kyuubi-extension-spark_$SPARK_MID_VERSION/target/kyuubi-extension-spark_$SPARK_MID_VERSION-$VERSION.jar" "$DISTDIR/extension"
fi
if [[ "$SPARK_PROVIDED" != "true" ]]; then
# Copy spark binary dist
cp -r "$KYUUBI_HOME/externals/kyuubi-download/target/spark-$SPARK_VERSION-bin-hadoop${SPARK_HADOOP_VERSION}$HIVE_VERSION_SUFFIX/" \
"$DISTDIR/externals/spark-$SPARK_VERSION-bin-hadoop${SPARK_HADOOP_VERSION}$HIVE_VERSION_SUFFIX/"
fi
# Copy license files
if [ -e "$KYUUBI_HOME/LICENSE-binary" ]; then
cp "$KYUUBI_HOME/LICENSE-binary" "$DISTDIR/LICENSE"
cp -r "$KYUUBI_HOME/licenses-binary" "$DISTDIR/licenses"
else
echo "Skipping copying LICENSE files"
fi
cp -r "$KYUUBI_HOME/bin" "$DISTDIR"
cp -r "$KYUUBI_HOME/conf" "$DISTDIR"
if [[ "$MAKE_TGZ" == "true" ]]; then
TARDIR_NAME=kyuubi-$VERSION-bin-$NAME
TARDIR="$KYUUBI_HOME/$TARDIR_NAME"
rm -rf "$TARDIR"
cp -r "$DISTDIR" "$TARDIR"
tar czf "$TARDIR_NAME.tar.gz" -C "$KYUUBI_HOME" "$TARDIR_NAME"
rm -rf "$TARDIR"
fi