diff --git a/.gitignore b/.gitignore index 5634a434d..7e4b7e52d 100644 --- a/.gitignore +++ b/.gitignore @@ -20,70 +20,11 @@ .pydevproject .scala_dependencies .settings -/lib/ -R-unit-tests.log -R/unit-tests.out -R/cran-check.out -R/pkg/vignettes/sparkr-vignettes.html -build/*.jar -build/apache-maven* -build/scala* -build/zinc* -cache -checkpoint -conf/*.cmd -conf/*.conf -conf/*.properties -conf/*.sh -conf/*.xml -conf/java-opts -conf/slaves -dependency-reduced-pom.xml -derby.log -dev/create-release/*final -dev/create-release/*txt -dist/ -docs/_site -docs/api -lib_managed/ -lint-r-report.log -log/ + +bin/apache-maven* +bin/scala* + logs/ -out/ -project/boot/ -project/build/target/ -project/plugins/lib_managed/ -project/plugins/project/build.properties -project/plugins/src_managed/ -project/plugins/target/ -python/lib/pyspark.zip -python/deps -python/pyspark/python -reports/ -scalastyle-on-compile.generated.xml -scalastyle-output.xml -scalastyle.txt -spark-*-bin-*.tgz -spark-tests.log -src_managed/ -streaming-tests.log + target/ -unit-tests.log -work/ - -# For Hive -TempStatsStore/ -metastore/ -metastore_db/ -sql/hive-thriftserver/test_warehouses -warehouse/ -spark-warehouse/ - -# For R session data -.RData -.RHistory -.Rhistory -*.Rproj -*.Rproj.* - -.Rproj.user +out/ \ No newline at end of file diff --git a/bin/mvn b/bin/mvn new file mode 100755 index 000000000..462d066cb --- /dev/null +++ b/bin/mvn @@ -0,0 +1,107 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Determine the current working directory +_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +# Preserve the calling directory +_CALLING_DIR="$(pwd)" + +# Installs any application tarball given a URL, the expected tarball name, +# and, optionally, a checkable binary path to determine if the binary has +# already been installed +## Arg1 - URL +## Arg2 - Tarball Name +## Arg3 - Checkable Binary +install_app() { + local remote_tarball="$1/$2" + local local_tarball="${_DIR}/$2" + local binary="${_DIR}/$3" + + # setup `curl` and `wget` silent options if we're running on Jenkins + local curl_opts="-L" + local wget_opts="" + curl_opts="--progress-bar ${curl_opts}" + wget_opts="--progress=bar:force ${wget_opts}" + + if [ -z "$3" -o ! -f "$binary" ]; then + # check if we already have the tarball + # check if we have curl installed + # download application + [ ! -f "${local_tarball}" ] && [ $(command -v curl) ] && \ + echo "exec: curl ${curl_opts} ${remote_tarball}" 1>&2 && \ + curl ${curl_opts} "${remote_tarball}" > "${local_tarball}" + # if the file still doesn't exist, lets try `wget` and cross our fingers + [ ! -f "${local_tarball}" ] && [ $(command -v wget) ] && \ + echo "exec: wget ${wget_opts} ${remote_tarball}" 1>&2 && \ + wget ${wget_opts} -O "${local_tarball}" "${remote_tarball}" + # if both were unsuccessful, exit + [ ! -f "${local_tarball}" ] && \ + echo -n "ERROR: Cannot download $2 with cURL or wget; " && \ + echo "please install manually and try again." && \ + exit 2 + cd "${_DIR}" && tar -xzf "$2" + rm -rf "$local_tarball" + fi +} + +# Determine the Maven version from the root pom.xml file and +# install maven under the build/ folder if needed. +install_mvn() { + local MVN_VERSION=`grep "" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` + MVN_BIN="$(command -v mvn)" + if [ "$MVN_BIN" ]; then + local MVN_DETECTED_VERSION="$(mvn --version | head -n1 | awk '{print $3}')" + fi + # See simple version normalization: http://stackoverflow.com/questions/16989598/bash-comparing-version-numbers + function version { echo "$@" | awk -F. '{ printf("%03d%03d%03d\n", $1,$2,$3); }'; } + if [ $(version $MVN_DETECTED_VERSION) -lt $(version $MVN_VERSION) ]; then + local APACHE_MIRROR=${APACHE_MIRROR:-'https://www.apache.org/dyn/closer.lua?action=download&filename='} + + install_app \ + "${APACHE_MIRROR}/maven/maven-3/${MVN_VERSION}/binaries" \ + "apache-maven-${MVN_VERSION}-bin.tar.gz" \ + "apache-maven-${MVN_VERSION}/bin/mvn" + + MVN_BIN="${_DIR}/apache-maven-${MVN_VERSION}/bin/mvn" + fi +} + +# Determine the Scala version from the root pom.xml file, set the Scala URL, +# and, with that, download the specific version of Scala necessary under +# the build/ folder +install_scala() { + # determine the Scala version used in Spark + local scala_version=`grep "scala.version" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'` + local scala_bin="${_DIR}/scala-${scala_version}/bin/scala" + local TYPESAFE_MIRROR=${TYPESAFE_MIRROR:-https://downloads.typesafe.com} + + install_app \ + "${TYPESAFE_MIRROR}/scala/${scala_version}" \ + "scala-${scala_version}.tgz" \ + "scala-${scala_version}/bin/scala" + + SCALA_COMPILER="$(cd "$(dirname "${scala_bin}")/../lib" && pwd)/scala-compiler.jar" + SCALA_LIBRARY="$(cd "$(dirname "${scala_bin}")/../lib" && pwd)/scala-library.jar" +} + +install_scala +install_mvn +cd "${_CALLING_DIR}" +echo "Using \`mvn\` from path: $MVN_BIN" 1>&2 +${MVN_BIN} "$@" diff --git a/bin/start-kyuubi.sh b/bin/start-kyuubi.sh new file mode 100755 index 000000000..267bdd53a --- /dev/null +++ b/bin/start-kyuubi.sh @@ -0,0 +1,148 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +## Kyuubi Server Main Entrance +CLASS="yaooqinn.kyuubi.server.KyuubiServer" + +KYUUBI_OPTIONS=$1 +shift +SPARK_OPTIONS=$@ + +function usage { + echo "Usage: ./bin/start-kyuubi.sh (start|stop) [spark-submit options]" +} + +if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then + usage + exit 0 +fi + +## Find the Kyuubi Jar +KYUUBI_JAR_DIR="$(cd "`dirname "$0"`"/..; pwd)/target" +KYUUBI_JAR_NUM="$(ls ${KYUUBI_JAR_DIR} | grep kyuubi- | grep .jar | wc -l)" + +if [ ${KYUUBI_JAR_NUM} = "0" ]; then + echo "Kyuubi Server: need to build kyuubi first. Run ./bin/mvn clean package" >&2 + exit 1 +fi + +if [ ${KYUUBI_JAR_NUM} != "1" ]; then + echo "Kyuubi Server: duplicated kyuubi jars found. Run ./bin/mvn clean package" >&2 + exit 1 +fi + +KYUUBI_JAR=${KYUUBI_JAR_DIR}/"$(ls ${KYUUBI_JAR_DIR} |grep kyuubi- | grep .jar)" + +echo "Kyuubi Server: jar founded:" ${KYUUBI_JAR} >&2 + + +function rotate_log { + log=$1; + num=5; + if [ -n "$2" ]; then + num=$2 + fi + if [ -f "$log" ]; then # rotate logs + while [ ${num} -gt 1 ]; do + prev=`expr ${num} - 1` + [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num" + num=${prev} + done + mv "$log" "$log.$num"; + fi +} + +# get log directory +if [ "$KYUUBI_LOG_DIR" = "" ]; then + export KYUUBI_LOG_DIR="$(cd "`dirname "$0"`"/..; pwd)/logs" +fi +mkdir -p "$KYUUBI_LOG_DIR" +touch "$KYUUBI_LOG_DIR"/.kyuubi_test > /dev/null 2>&1 + +if [ "$?" = "0" ]; then + rm -f "$KYUUBI_LOG_DIR"/.kyuubi_test +else + chown "$USER" "$KYUUBI_LOG_DIR" +fi + +LOG="$KYUUBI_LOG_DIR/kyuubi-$USER-$CLASS-$HOSTNAME.out" +PID="$KYUUBI_LOG_DIR/kyuubi-$USER-$CLASS-$HOSTNAME.pid" + +function start_server { + rotate_log "$LOG" + echo "Kyuubi Server: starting and logging to $LOG" >&2 + + # Find the spark-submit + if [ -n "$SPARK_HOME" ]; then + SPARK_SUBMIT="$SPARK_HOME/bin/spark-submit" + else + echo "Kyuubi Server: SPARK_HOME is not set" >&2 + exit 1 + fi + + nohup bash "$SPARK_SUBMIT" --class "$CLASS" "$SPARK_OPTIONS" "$KYUUBI_JAR" >> "$LOG" 2>&1 < /dev/null & + + PID_TMP="$!" + echo ${PID_TMP} > ${PID} + + # Poll for up to 5 seconds for the java process to start + for i in {1..10} + do + if [[ $(ps -p "$PID_TMP" -o comm=) =~ "java" ]]; then + break + fi + sleep 0.5 + done + + sleep 2 + + # Check if the process has died; in that case we'll tail the log so the user can see + if [[ ! $(ps -p "$PID_TMP" -o comm=) =~ "java" ]]; then + echo "Kyuubi Server: failed to launch: $SPARK_OPTIONS" >&2 + tail -2 "$log" | sed 's/^/ /' + echo "Kyuubi Server: full log in $log" >&2 + fi +} + +case ${KYUUBI_OPTIONS} in + (start) + start_server + ;; + + (stop) + if [ -f ${PID} ]; then + TARGET_ID="$(cat "$PID")" + if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then + echo "Kyuubi Server: stopping $CLASS" + kill "$TARGET_ID" && rm -f "$PID" + else + echo "Kyuubi Server: no $CLASS to stop" + fi + else + echo "Kyuubi Server: no $CLASS to stop" + fi + ;; + + (*) + usage + exit 1 + ;; + +esac diff --git a/docs/__init__.py b/docs/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/pom.xml b/pom.xml index 7e1db1a5e..df271cf6c 100644 --- a/pom.xml +++ b/pom.xml @@ -367,26 +367,6 @@ - - - org.apache.maven.plugins - maven-assembly-plugin - 3.0.0 - - - jar-with-dependencies - - - - - assemble-all - package - - single - - - -