From 25d2d587b018a6456b6ea78171ce4f84025d7a7a Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Thu, 4 Mar 2021 18:25:11 +0800
Subject: [PATCH] [KYUUBI #393] Add Support for Hadoop 3.2

![yaooqinn](https://badgen.net/badge/Hello/yaooqinn/green) [![Closes #393](https://badgen.net/badge/Preview/Closes%20%23393/blue)](https://github.com/yaooqinn/kyuubi/pull/393) ![42](https://badgen.net/badge/%2B/42/red) ![41](https://badgen.net/badge/-/41/green) ![3](https://badgen.net/badge/commits/3/yellow) ![Target Issue](https://badgen.net/badge/Missing/Target%20Issue/ff0000) [&#10088;?&#10089;](https://pullrequestbadge.com/?utm_medium=github&utm_source=yaooqinn&utm_campaign=badge_info)<!-- PR-BADGE: PLEASE DO NOT REMOVE THIS COMMENT -->

<!--
Thanks for sending a pull request!

Here are some tips for you:
  1. If this is your first time, please read our contributor guidelines: https://kyuubi.readthedocs.io/en/latest/community/contributions.html
  2. If the PR is related to an issue in https://github.com/yaooqinn/kyuubi/issues, add '[KYUUBI #XXXX]' in your PR title, e.g., '[KYUUBI #XXXX] Your PR title ...'.
  3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP][KYUUBI #XXXX] Your PR title ...'.
-->

### _Why are the changes needed?_
<!--
Please clarify why the changes are needed. For instance,
  1. If you add a feature, you can talk about the use case of it.
  2. If you fix a bug, you can clarify why it is a bug.
-->

This PR Add Support for Hadoop 3.2 Profile to work with spark3.1.1

### _How was this patch tested?_
- [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible

- [ ] Add screenshots for manual tests if appropriate

- [x] [Run test](https://kyuubi.readthedocs.io/en/latest/tools/testing.html#running-tests) locally before make a pull request

Closes #393 from yaooqinn/guava2.

1f56082 [Kent Yao] nit
c8c576c [Kent Yao] nit
ccdfd03 [Kent Yao] Add Support for Hadoop 3.2

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
---
 .github/workflows/master.yml                  |  3 +-
 .github/workflows/release.yml                 | 15 +++++++---
 .travis.yml                                   | 17 ++++-------
 build/dist                                    | 19 +++++++------
 .../engine/spark/SparkProcessBuilder.scala    | 28 +++++++++----------
 pom.xml                                       |  3 +-
 6 files changed, 43 insertions(+), 42 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 110e975fc..fa15a02a5 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -19,6 +19,7 @@ jobs:
         profiles:
           - '-Pspark-3.0 -Phadoop-2.7'
           - '-Pspark-3.1 -Phadoop-2.7'
+          - '-Pspark-3.1 -Phadoop-3.2'
     steps:
       - uses: actions/checkout@v2
       - name: Setup JDK 1.8
@@ -51,7 +52,7 @@ jobs:
             ${{ runner.os }}-maven-io-
       - name: Build with Maven
         run: |
-          mvn clean install ${{ matrix.profiles }} -Dmaven.javadoc.skip=true -B -V
+          mvn clean install ${{ matrix.profiles }} -Dmaven.javadoc.skip=true -V
           bash <(curl -s https://codecov.io/bash)
       - name: Collect unit tests log
         run: |
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 7de5b97b4..4378f564e 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -9,6 +9,15 @@ jobs:
   build:
     name: Create and Publish Release
     runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        profiles:
+          - '-Pspark-3.0 -Phadoop-2.7'
+          - '--spark-provided -Pspark-3.0 -Phadoop-2.7'
+          - '-Pspark-3.1 -Phadoop-2.7'
+          - '--spark-provided -Pspark-3.1 -Phadoop-2.7'
+          - '-Pspark-3.1 -Phadoop-3.2'
+          - '--spark-provided -Pspark-3.1 -Phadoop-3.2'
     steps:
       - uses: actions/checkout@master
       # We split caches because GitHub Action Cache has a 400MB-size limit.
@@ -34,10 +43,8 @@ jobs:
         uses: actions/setup-java@v1
         with:
           java-version: '1.8'
-      - name: Make Distribution with Spark
-        run: ./build/dist --tgz
-      - name: Make Distribution without Spark
-        run: ./build/dist --tgz --spark-provided
+      - name: Make Distribution
+        run: ./build/dist --tgz ${{ matrix.profiles }}
       - name: Create Release
         id: create_release
         uses: actions/create-release@v1
diff --git a/.travis.yml b/.travis.yml
index ab4b5a34d..2f0829286 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -33,25 +33,18 @@ matrix:
       env:
         - PROFILE="-Pspark-3.0 -Phadoop-2.7"
         - EXCLUDE_TAGS=""
-    - name: Tes Kyuubi -Pspark-3.1 -Phadoop-2.7
+    - name: Test Kyuubi -Pspark-3.1 -Phadoop-2.7
       env:
         - PROFILE="-Pspark-3.1 -Phadoop-2.7"
         - EXCLUDE_TAGS="org.apache.kyuubi.tags.DataLakeTest"
-#    - name: Tes Kyuubi -Pspark-3.1 -Phadoop-3.2
-#      env:
-#        - PROFILE="-Pspark-3.1 -Phadoop-3.2"
-#        - EXCLUDE_TAGS="org.apache.kyuubi.tags.DataLakeTest"
+    - name: Test Kyuubi -Pspark-3.1 -Phadoop-3.2
+      env:
+        - PROFILE="-Pspark-3.1 -Phadoop-3.2"
+        - EXCLUDE_TAGS="org.apache.kyuubi.tags.DataLakeTest"
 
 install:
   - mvn --version
 
-before_script:
-  - mvn help:evaluate -Dexpression=project.version
-  - mvn help:evaluate -Dexpression=java.version
-  - mvn help:evaluate -Dexpression=scala.binary.version
-  - mvn help:evaluate -Dexpression=hadoop.version
-  - mvn help:evaluate -Dexpression=hive.version
-
 script:
   - mvn clean install $PROFILE -Dmaven.plugin.scalatest.exclude.tags=$EXCLUDE_TAGS -Dmaven.javadoc.skip=true -V
 
diff --git a/build/dist b/build/dist
index b6210e0c3..3379d8b93 100755
--- a/build/dist
+++ b/build/dist
@@ -141,10 +141,17 @@ HIVE_VERSION=$("$MVN" help:evaluate -Dexpression=hive.version $@ 2>/dev/null\
 echo "Building Kyuubi package of version $VERSION against Spark version - $SPARK_VERSION"
 
 if [[ "$NAME" == "none" ]]; then
-  if [[ "$SPARK_PROVIDED" == "true" ]]; then
-    NAME="without-spark"
+
+  if [[ ${HADOOP_VERSION:0:3} == "2.7" ]]; then
+    HADOOP_VERSION_SUFFIX=""
   else
-    NAME="spark-"${SPARK_VERSION:0:3}
+    HADOOP_VERSION_SUFFIX="-hadoop${HADOOP_VERSION:0:3}"
+  fi
+
+  if [[ "$SPARK_PROVIDED" == "true" ]]; then
+    NAME="without-spark"$HADOOP_VERSION_SUFFIX
+  else
+    NAME="spark-"${SPARK_VERSION:0:3}$HADOOP_VERSION_SUFFIX
   fi
 fi
 
@@ -184,12 +191,6 @@ echo "Build flags: $@" >> "$DISTDIR/RELEASE"
 cp -r "$KYUUBI_HOME/kyuubi-assembly/target/scala-$SCALA_VERSION/jars/" "$DISTDIR/jars"
 ## cp engines
 
-if [[ ${HIVE_VERSION:0:3} == "2.3" ]]; then
-  HIVE_VERSION_SUFFIX=""
-else
-  HIVE_VERSION_SUFFIX="-hive1.2"
-fi
-
 if [[ "$SPARK_PROVIDED" != "true" ]]; then
   cp -r "$KYUUBI_HOME/externals/kyuubi-download/target/spark-$SPARK_VERSION-bin-hadoop${HADOOP_VERSION:0:3}$HIVE_VERSION_SUFFIX/" \
         "$DISTDIR/externals/spark-$SPARK_VERSION-bin-hadoop${HADOOP_VERSION:0:3}$HIVE_VERSION_SUFFIX/"
diff --git a/kyuubi-main/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala b/kyuubi-main/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
index 523852e33..1440b21eb 100644
--- a/kyuubi-main/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
+++ b/kyuubi-main/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala
@@ -17,7 +17,7 @@
 
 package org.apache.kyuubi.engine.spark
 
-import java.io.IOException
+import java.io.{File, FilenameFilter, IOException}
 import java.nio.file.{Files, Path, Paths}
 
 import scala.collection.mutable.ArrayBuffer
@@ -38,24 +38,24 @@ class SparkProcessBuilder(
   import SparkProcessBuilder._
 
   override protected val executable: String = {
-    val path = env.get("SPARK_HOME").map { sparkHome =>
-      Paths.get(sparkHome, "bin", SPARK_SUBMIT_FILE).toAbsolutePath
-    } getOrElse {
-      val sparkVer = SPARK_COMPILE_VERSION
-      val hadoopVer = HADOOP_COMPILE_VERSION.take(3)
+    val sparkHomeOpt = env.get("SPARK_HOME").orElse {
       val kyuubiPattern = "/kyuubi/"
       val cwd = getClass.getProtectionDomain.getCodeSource.getLocation.getPath
       val idx = kyuubiPattern.length + cwd.lastIndexOf(kyuubiPattern)
       val kyuubiDevHome = cwd.substring(0, idx)
-      Paths.get(
-        kyuubiDevHome,
-        "externals",
-        "kyuubi-download",
-        "target",
-        s"spark-$sparkVer-bin-hadoop$hadoopVer",
-        "bin", SPARK_SUBMIT_FILE)
+      Paths.get(kyuubiDevHome, "externals", "kyuubi-download", "target").toFile
+        .listFiles(new FilenameFilter {
+        override def accept(dir: File, name: String): Boolean = {
+          dir.isDirectory && name.startsWith("spark-")
+        }
+      }).headOption.map(_.getAbsolutePath)
+    }
+
+    sparkHomeOpt.map{ dir =>
+      Paths.get(dir, "bin", SPARK_SUBMIT_FILE).toAbsolutePath.toFile.getCanonicalPath
+    }.getOrElse {
+      throw KyuubiSQLException("SPARK_HOME is not set!")
     }
-    path.toAbsolutePath.toFile.getCanonicalPath
   }
 
   override def mainClass: String = "org.apache.kyuubi.engine.spark.SparkSQLEngine"
diff --git a/pom.xml b/pom.xml
index a1f3d6b76..d116f739f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -63,7 +63,7 @@
         <commons-lang3.version>3.10</commons-lang3.version>
         <commons.httpclient.version>4.5.6</commons.httpclient.version>
         <commons.httpcore.version>4.4.12</commons.httpcore.version>
-        <guava.version>16.0.1</guava.version>
+        <guava.version>24.1.1-jre</guava.version>
         <curator.version>2.12.0</curator.version>
         <hadoop.version>2.7.4</hadoop.version>
         <hadoop.binary.version>2.7</hadoop.binary.version>
@@ -1373,7 +1373,6 @@
         <profile>
             <id>hadoop-3.2</id>
             <properties>
-                <!-- TODO: Enable this after sovle guava dep issue and so on -->
                 <hadoop.version>3.2.2</hadoop.version>
                 <hadoop.binary.version>3.2</hadoop.binary.version>
             </properties>