[KYUUBI #393] Add Support for Hadoop 3.2

![yaooqinn](https://badgen.net/badge/Hello/yaooqinn/green) [![Closes #393](https://badgen.net/badge/Preview/Closes%20%23393/blue)](https://github.com/yaooqinn/kyuubi/pull/393) ![42](https://badgen.net/badge/%2B/42/red) ![41](https://badgen.net/badge/-/41/green) ![3](https://badgen.net/badge/commits/3/yellow) ![Target Issue](https://badgen.net/badge/Missing/Target%20Issue/ff0000) [&#10088;?&#10089;](https://pullrequestbadge.com/?utm_medium=github&utm_source=yaooqinn&utm_campaign=badge_info)<!-- PR-BADGE: PLEASE DO NOT REMOVE THIS COMMENT -->

<!--
Thanks for sending a pull request!

Here are some tips for you:
  1. If this is your first time, please read our contributor guidelines: https://kyuubi.readthedocs.io/en/latest/community/contributions.html
  2. If the PR is related to an issue in https://github.com/yaooqinn/kyuubi/issues, add '[KYUUBI #XXXX]' in your PR title, e.g., '[KYUUBI #XXXX] Your PR title ...'.
  3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP][KYUUBI #XXXX] Your PR title ...'.
-->

### _Why are the changes needed?_
<!--
Please clarify why the changes are needed. For instance,
  1. If you add a feature, you can talk about the use case of it.
  2. If you fix a bug, you can clarify why it is a bug.
-->

This PR Add Support for Hadoop 3.2 Profile to work with spark3.1.1

### _How was this patch tested?_
- [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible

- [ ] Add screenshots for manual tests if appropriate

- [x] [Run test](https://kyuubi.readthedocs.io/en/latest/tools/testing.html#running-tests) locally before make a pull request

Closes #393 from yaooqinn/guava2.

1f56082 [Kent Yao] nit
c8c576c [Kent Yao] nit
ccdfd03 [Kent Yao] Add Support for Hadoop 3.2

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
This commit is contained in:
Kent Yao 2021-03-04 18:25:11 +08:00
parent 2e3315867b
commit 25d2d587b0
No known key found for this signature in database
GPG Key ID: F7051850A0AF904D
6 changed files with 43 additions and 42 deletions

View File

@ -19,6 +19,7 @@ jobs:
profiles:
- '-Pspark-3.0 -Phadoop-2.7'
- '-Pspark-3.1 -Phadoop-2.7'
- '-Pspark-3.1 -Phadoop-3.2'
steps:
- uses: actions/checkout@v2
- name: Setup JDK 1.8
@ -51,7 +52,7 @@ jobs:
${{ runner.os }}-maven-io-
- name: Build with Maven
run: |
mvn clean install ${{ matrix.profiles }} -Dmaven.javadoc.skip=true -B -V
mvn clean install ${{ matrix.profiles }} -Dmaven.javadoc.skip=true -V
bash <(curl -s https://codecov.io/bash)
- name: Collect unit tests log
run: |

View File

@ -9,6 +9,15 @@ jobs:
build:
name: Create and Publish Release
runs-on: ubuntu-latest
strategy:
matrix:
profiles:
- '-Pspark-3.0 -Phadoop-2.7'
- '--spark-provided -Pspark-3.0 -Phadoop-2.7'
- '-Pspark-3.1 -Phadoop-2.7'
- '--spark-provided -Pspark-3.1 -Phadoop-2.7'
- '-Pspark-3.1 -Phadoop-3.2'
- '--spark-provided -Pspark-3.1 -Phadoop-3.2'
steps:
- uses: actions/checkout@master
# We split caches because GitHub Action Cache has a 400MB-size limit.
@ -34,10 +43,8 @@ jobs:
uses: actions/setup-java@v1
with:
java-version: '1.8'
- name: Make Distribution with Spark
run: ./build/dist --tgz
- name: Make Distribution without Spark
run: ./build/dist --tgz --spark-provided
- name: Make Distribution
run: ./build/dist --tgz ${{ matrix.profiles }}
- name: Create Release
id: create_release
uses: actions/create-release@v1

View File

@ -33,25 +33,18 @@ matrix:
env:
- PROFILE="-Pspark-3.0 -Phadoop-2.7"
- EXCLUDE_TAGS=""
- name: Tes Kyuubi -Pspark-3.1 -Phadoop-2.7
- name: Test Kyuubi -Pspark-3.1 -Phadoop-2.7
env:
- PROFILE="-Pspark-3.1 -Phadoop-2.7"
- EXCLUDE_TAGS="org.apache.kyuubi.tags.DataLakeTest"
# - name: Tes Kyuubi -Pspark-3.1 -Phadoop-3.2
# env:
# - PROFILE="-Pspark-3.1 -Phadoop-3.2"
# - EXCLUDE_TAGS="org.apache.kyuubi.tags.DataLakeTest"
- name: Test Kyuubi -Pspark-3.1 -Phadoop-3.2
env:
- PROFILE="-Pspark-3.1 -Phadoop-3.2"
- EXCLUDE_TAGS="org.apache.kyuubi.tags.DataLakeTest"
install:
- mvn --version
before_script:
- mvn help:evaluate -Dexpression=project.version
- mvn help:evaluate -Dexpression=java.version
- mvn help:evaluate -Dexpression=scala.binary.version
- mvn help:evaluate -Dexpression=hadoop.version
- mvn help:evaluate -Dexpression=hive.version
script:
- mvn clean install $PROFILE -Dmaven.plugin.scalatest.exclude.tags=$EXCLUDE_TAGS -Dmaven.javadoc.skip=true -V

View File

@ -141,10 +141,17 @@ HIVE_VERSION=$("$MVN" help:evaluate -Dexpression=hive.version $@ 2>/dev/null\
echo "Building Kyuubi package of version $VERSION against Spark version - $SPARK_VERSION"
if [[ "$NAME" == "none" ]]; then
if [[ "$SPARK_PROVIDED" == "true" ]]; then
NAME="without-spark"
if [[ ${HADOOP_VERSION:0:3} == "2.7" ]]; then
HADOOP_VERSION_SUFFIX=""
else
NAME="spark-"${SPARK_VERSION:0:3}
HADOOP_VERSION_SUFFIX="-hadoop${HADOOP_VERSION:0:3}"
fi
if [[ "$SPARK_PROVIDED" == "true" ]]; then
NAME="without-spark"$HADOOP_VERSION_SUFFIX
else
NAME="spark-"${SPARK_VERSION:0:3}$HADOOP_VERSION_SUFFIX
fi
fi
@ -184,12 +191,6 @@ echo "Build flags: $@" >> "$DISTDIR/RELEASE"
cp -r "$KYUUBI_HOME/kyuubi-assembly/target/scala-$SCALA_VERSION/jars/" "$DISTDIR/jars"
## cp engines
if [[ ${HIVE_VERSION:0:3} == "2.3" ]]; then
HIVE_VERSION_SUFFIX=""
else
HIVE_VERSION_SUFFIX="-hive1.2"
fi
if [[ "$SPARK_PROVIDED" != "true" ]]; then
cp -r "$KYUUBI_HOME/externals/kyuubi-download/target/spark-$SPARK_VERSION-bin-hadoop${HADOOP_VERSION:0:3}$HIVE_VERSION_SUFFIX/" \
"$DISTDIR/externals/spark-$SPARK_VERSION-bin-hadoop${HADOOP_VERSION:0:3}$HIVE_VERSION_SUFFIX/"

View File

@ -17,7 +17,7 @@
package org.apache.kyuubi.engine.spark
import java.io.IOException
import java.io.{File, FilenameFilter, IOException}
import java.nio.file.{Files, Path, Paths}
import scala.collection.mutable.ArrayBuffer
@ -38,24 +38,24 @@ class SparkProcessBuilder(
import SparkProcessBuilder._
override protected val executable: String = {
val path = env.get("SPARK_HOME").map { sparkHome =>
Paths.get(sparkHome, "bin", SPARK_SUBMIT_FILE).toAbsolutePath
} getOrElse {
val sparkVer = SPARK_COMPILE_VERSION
val hadoopVer = HADOOP_COMPILE_VERSION.take(3)
val sparkHomeOpt = env.get("SPARK_HOME").orElse {
val kyuubiPattern = "/kyuubi/"
val cwd = getClass.getProtectionDomain.getCodeSource.getLocation.getPath
val idx = kyuubiPattern.length + cwd.lastIndexOf(kyuubiPattern)
val kyuubiDevHome = cwd.substring(0, idx)
Paths.get(
kyuubiDevHome,
"externals",
"kyuubi-download",
"target",
s"spark-$sparkVer-bin-hadoop$hadoopVer",
"bin", SPARK_SUBMIT_FILE)
Paths.get(kyuubiDevHome, "externals", "kyuubi-download", "target").toFile
.listFiles(new FilenameFilter {
override def accept(dir: File, name: String): Boolean = {
dir.isDirectory && name.startsWith("spark-")
}
}).headOption.map(_.getAbsolutePath)
}
sparkHomeOpt.map{ dir =>
Paths.get(dir, "bin", SPARK_SUBMIT_FILE).toAbsolutePath.toFile.getCanonicalPath
}.getOrElse {
throw KyuubiSQLException("SPARK_HOME is not set!")
}
path.toAbsolutePath.toFile.getCanonicalPath
}
override def mainClass: String = "org.apache.kyuubi.engine.spark.SparkSQLEngine"

View File

@ -63,7 +63,7 @@
<commons-lang3.version>3.10</commons-lang3.version>
<commons.httpclient.version>4.5.6</commons.httpclient.version>
<commons.httpcore.version>4.4.12</commons.httpcore.version>
<guava.version>16.0.1</guava.version>
<guava.version>24.1.1-jre</guava.version>
<curator.version>2.12.0</curator.version>
<hadoop.version>2.7.4</hadoop.version>
<hadoop.binary.version>2.7</hadoop.binary.version>
@ -1373,7 +1373,6 @@
<profile>
<id>hadoop-3.2</id>
<properties>
<!-- TODO: Enable this after sovle guava dep issue and so on -->
<hadoop.version>3.2.2</hadoop.version>
<hadoop.binary.version>3.2</hadoop.binary.version>
</properties>