From 25d2d587b018a6456b6ea78171ce4f84025d7a7a Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Thu, 4 Mar 2021 18:25:11 +0800 Subject: [PATCH] [KYUUBI #393] Add Support for Hadoop 3.2 ![yaooqinn](https://badgen.net/badge/Hello/yaooqinn/green) [![Closes #393](https://badgen.net/badge/Preview/Closes%20%23393/blue)](https://github.com/yaooqinn/kyuubi/pull/393) ![42](https://badgen.net/badge/%2B/42/red) ![41](https://badgen.net/badge/-/41/green) ![3](https://badgen.net/badge/commits/3/yellow) ![Target Issue](https://badgen.net/badge/Missing/Target%20Issue/ff0000) [❨?❩](https://pullrequestbadge.com/?utm_medium=github&utm_source=yaooqinn&utm_campaign=badge_info) ### _Why are the changes needed?_ This PR Add Support for Hadoop 3.2 Profile to work with spark3.1.1 ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/latest/tools/testing.html#running-tests) locally before make a pull request Closes #393 from yaooqinn/guava2. 1f56082 [Kent Yao] nit c8c576c [Kent Yao] nit ccdfd03 [Kent Yao] Add Support for Hadoop 3.2 Authored-by: Kent Yao Signed-off-by: Kent Yao --- .github/workflows/master.yml | 3 +- .github/workflows/release.yml | 15 +++++++--- .travis.yml | 17 ++++------- build/dist | 19 +++++++------ .../engine/spark/SparkProcessBuilder.scala | 28 +++++++++---------- pom.xml | 3 +- 6 files changed, 43 insertions(+), 42 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 110e975fc..fa15a02a5 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -19,6 +19,7 @@ jobs: profiles: - '-Pspark-3.0 -Phadoop-2.7' - '-Pspark-3.1 -Phadoop-2.7' + - '-Pspark-3.1 -Phadoop-3.2' steps: - uses: actions/checkout@v2 - name: Setup JDK 1.8 @@ -51,7 +52,7 @@ jobs: ${{ runner.os }}-maven-io- - name: Build with Maven run: | - mvn clean install ${{ matrix.profiles }} -Dmaven.javadoc.skip=true -B -V + mvn clean install ${{ matrix.profiles }} -Dmaven.javadoc.skip=true -V bash <(curl -s https://codecov.io/bash) - name: Collect unit tests log run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7de5b97b4..4378f564e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,6 +9,15 @@ jobs: build: name: Create and Publish Release runs-on: ubuntu-latest + strategy: + matrix: + profiles: + - '-Pspark-3.0 -Phadoop-2.7' + - '--spark-provided -Pspark-3.0 -Phadoop-2.7' + - '-Pspark-3.1 -Phadoop-2.7' + - '--spark-provided -Pspark-3.1 -Phadoop-2.7' + - '-Pspark-3.1 -Phadoop-3.2' + - '--spark-provided -Pspark-3.1 -Phadoop-3.2' steps: - uses: actions/checkout@master # We split caches because GitHub Action Cache has a 400MB-size limit. @@ -34,10 +43,8 @@ jobs: uses: actions/setup-java@v1 with: java-version: '1.8' - - name: Make Distribution with Spark - run: ./build/dist --tgz - - name: Make Distribution without Spark - run: ./build/dist --tgz --spark-provided + - name: Make Distribution + run: ./build/dist --tgz ${{ matrix.profiles }} - name: Create Release id: create_release uses: actions/create-release@v1 diff --git a/.travis.yml b/.travis.yml index ab4b5a34d..2f0829286 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,25 +33,18 @@ matrix: env: - PROFILE="-Pspark-3.0 -Phadoop-2.7" - EXCLUDE_TAGS="" - - name: Tes Kyuubi -Pspark-3.1 -Phadoop-2.7 + - name: Test Kyuubi -Pspark-3.1 -Phadoop-2.7 env: - PROFILE="-Pspark-3.1 -Phadoop-2.7" - EXCLUDE_TAGS="org.apache.kyuubi.tags.DataLakeTest" -# - name: Tes Kyuubi -Pspark-3.1 -Phadoop-3.2 -# env: -# - PROFILE="-Pspark-3.1 -Phadoop-3.2" -# - EXCLUDE_TAGS="org.apache.kyuubi.tags.DataLakeTest" + - name: Test Kyuubi -Pspark-3.1 -Phadoop-3.2 + env: + - PROFILE="-Pspark-3.1 -Phadoop-3.2" + - EXCLUDE_TAGS="org.apache.kyuubi.tags.DataLakeTest" install: - mvn --version -before_script: - - mvn help:evaluate -Dexpression=project.version - - mvn help:evaluate -Dexpression=java.version - - mvn help:evaluate -Dexpression=scala.binary.version - - mvn help:evaluate -Dexpression=hadoop.version - - mvn help:evaluate -Dexpression=hive.version - script: - mvn clean install $PROFILE -Dmaven.plugin.scalatest.exclude.tags=$EXCLUDE_TAGS -Dmaven.javadoc.skip=true -V diff --git a/build/dist b/build/dist index b6210e0c3..3379d8b93 100755 --- a/build/dist +++ b/build/dist @@ -141,10 +141,17 @@ HIVE_VERSION=$("$MVN" help:evaluate -Dexpression=hive.version $@ 2>/dev/null\ echo "Building Kyuubi package of version $VERSION against Spark version - $SPARK_VERSION" if [[ "$NAME" == "none" ]]; then - if [[ "$SPARK_PROVIDED" == "true" ]]; then - NAME="without-spark" + + if [[ ${HADOOP_VERSION:0:3} == "2.7" ]]; then + HADOOP_VERSION_SUFFIX="" else - NAME="spark-"${SPARK_VERSION:0:3} + HADOOP_VERSION_SUFFIX="-hadoop${HADOOP_VERSION:0:3}" + fi + + if [[ "$SPARK_PROVIDED" == "true" ]]; then + NAME="without-spark"$HADOOP_VERSION_SUFFIX + else + NAME="spark-"${SPARK_VERSION:0:3}$HADOOP_VERSION_SUFFIX fi fi @@ -184,12 +191,6 @@ echo "Build flags: $@" >> "$DISTDIR/RELEASE" cp -r "$KYUUBI_HOME/kyuubi-assembly/target/scala-$SCALA_VERSION/jars/" "$DISTDIR/jars" ## cp engines -if [[ ${HIVE_VERSION:0:3} == "2.3" ]]; then - HIVE_VERSION_SUFFIX="" -else - HIVE_VERSION_SUFFIX="-hive1.2" -fi - if [[ "$SPARK_PROVIDED" != "true" ]]; then cp -r "$KYUUBI_HOME/externals/kyuubi-download/target/spark-$SPARK_VERSION-bin-hadoop${HADOOP_VERSION:0:3}$HIVE_VERSION_SUFFIX/" \ "$DISTDIR/externals/spark-$SPARK_VERSION-bin-hadoop${HADOOP_VERSION:0:3}$HIVE_VERSION_SUFFIX/" diff --git a/kyuubi-main/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala b/kyuubi-main/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala index 523852e33..1440b21eb 100644 --- a/kyuubi-main/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala +++ b/kyuubi-main/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala @@ -17,7 +17,7 @@ package org.apache.kyuubi.engine.spark -import java.io.IOException +import java.io.{File, FilenameFilter, IOException} import java.nio.file.{Files, Path, Paths} import scala.collection.mutable.ArrayBuffer @@ -38,24 +38,24 @@ class SparkProcessBuilder( import SparkProcessBuilder._ override protected val executable: String = { - val path = env.get("SPARK_HOME").map { sparkHome => - Paths.get(sparkHome, "bin", SPARK_SUBMIT_FILE).toAbsolutePath - } getOrElse { - val sparkVer = SPARK_COMPILE_VERSION - val hadoopVer = HADOOP_COMPILE_VERSION.take(3) + val sparkHomeOpt = env.get("SPARK_HOME").orElse { val kyuubiPattern = "/kyuubi/" val cwd = getClass.getProtectionDomain.getCodeSource.getLocation.getPath val idx = kyuubiPattern.length + cwd.lastIndexOf(kyuubiPattern) val kyuubiDevHome = cwd.substring(0, idx) - Paths.get( - kyuubiDevHome, - "externals", - "kyuubi-download", - "target", - s"spark-$sparkVer-bin-hadoop$hadoopVer", - "bin", SPARK_SUBMIT_FILE) + Paths.get(kyuubiDevHome, "externals", "kyuubi-download", "target").toFile + .listFiles(new FilenameFilter { + override def accept(dir: File, name: String): Boolean = { + dir.isDirectory && name.startsWith("spark-") + } + }).headOption.map(_.getAbsolutePath) + } + + sparkHomeOpt.map{ dir => + Paths.get(dir, "bin", SPARK_SUBMIT_FILE).toAbsolutePath.toFile.getCanonicalPath + }.getOrElse { + throw KyuubiSQLException("SPARK_HOME is not set!") } - path.toAbsolutePath.toFile.getCanonicalPath } override def mainClass: String = "org.apache.kyuubi.engine.spark.SparkSQLEngine" diff --git a/pom.xml b/pom.xml index a1f3d6b76..d116f739f 100644 --- a/pom.xml +++ b/pom.xml @@ -63,7 +63,7 @@ 3.10 4.5.6 4.4.12 - 16.0.1 + 24.1.1-jre 2.12.0 2.7.4 2.7 @@ -1373,7 +1373,6 @@ hadoop-3.2 - 3.2.2 3.2