[KYUUBI #4741] Kyuubi Spark Engine/TPC connectors support Spark 3.4

### _Why are the changes needed?_

- Add CI for Spark 3.4
- Kyuubi Spark TPC-DS/H connectors support Spark 3.4

### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible

- [ ] Add screenshots for manual tests if appropriate

- [x] [Run test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests) locally before make a pull request

Closes #4741 from pan3793/spark-3.4.

Closes #4741

84a2d6ad7 [Cheng Pan] log
b9b2ec1fb [Cheng Pan] Add spark-3.4 profile

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: Cheng Pan <chengpan@apache.org>
This commit is contained in:
Cheng Pan 2023-04-23 20:17:20 +08:00
parent 19d5a9a371
commit ab1f67cb31
No known key found for this signature in database
GPG Key ID: 8001952629BCC75D
11 changed files with 123 additions and 96 deletions

View File

@ -50,6 +50,7 @@ jobs:
- '3.1'
- '3.2'
- '3.3'
- '3.4'
spark-archive: [""]
exclude-tags: [""]
comment: ["normal"]
@ -64,6 +65,11 @@ jobs:
spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.2.4 -Dspark.archive.name=spark-3.2.4-bin-hadoop3.2.tgz'
exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest'
comment: 'verify-on-spark-3.2-binary'
- java: 8
spark: '3.3'
spark-archive: '-Dspark.archive.mirror=https://archive.apache.org/dist/spark/spark-3.4.0 -Dspark.archive.name=spark-3.4.0-bin-hadoop3.tgz'
exclude-tags: '-Dmaven.plugin.scalatest.exclude.tags=org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest'
comment: 'verify-on-spark-3.4-binary'
env:
SPARK_LOCAL_IP: localhost
steps:
@ -88,6 +94,12 @@ jobs:
- name: Build and test Kyuubi and Spark with maven w/o linters
run: |
TEST_MODULES="dev/kyuubi-codecov"
if [[ "${{ matrix.spark }}" == "3.4" ]]; then
# FIXME: Spark 3.4 supports authz plugin
TEST_MODULES="$TEST_MODULES,!extensions/spark/kyuubi-spark-authz"
# FIXME: Spark 3.4 supports lineage plugin
TEST_MODULES="$TEST_MODULES,!extensions/spark/kyuubi-spark-lineage"
fi
./build/mvn clean install ${MVN_OPT} -pl ${TEST_MODULES} -am \
-Pspark-${{ matrix.spark }} ${{ matrix.spark-archive }} ${{ matrix.exclude-tags }}
- name: Code coverage

View File

@ -125,10 +125,21 @@
<artifactId>jakarta.xml.bind-api</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>

View File

@ -125,10 +125,21 @@
<artifactId>jakarta.xml.bind-api</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>

View File

@ -110,10 +110,21 @@
<artifactId>jakarta.xml.bind-api</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.antlr</groupId>

View File

@ -34,6 +34,7 @@ build/mvn clean package -pl :kyuubi-spark-authz_2.12 -Dspark.version=3.2.1 -Dran
`-Dspark.version=`
- [x] master
- [ ] 3.4.x
- [x] 3.3.x (default)
- [x] 3.2.x
- [x] 3.1.x

View File

@ -87,10 +87,21 @@
<artifactId>scalacheck-1-17_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>

View File

@ -170,7 +170,8 @@ class TPCDSCatalogSuite extends KyuubiFunSuite {
val exception = intercept[AnalysisException] {
spark.table("tpcds.sf1.nonexistent_table")
}
assert(exception.message === "Table or view not found: tpcds.sf1.nonexistent_table")
assert(exception.message.contains("Table or view not found")
|| exception.message.contains("TABLE_OR_VIEW_NOT_FOUND"))
}
}
}

View File

@ -158,7 +158,8 @@ class TPCHCatalogSuite extends KyuubiFunSuite {
val exception = intercept[AnalysisException] {
spark.table("tpch.sf1.nonexistent_table")
}
assert(exception.message === "Table or view not found: tpch.sf1.nonexistent_table")
assert(exception.message.contains("Table or view not found")
|| exception.message.contains("TABLE_OR_VIEW_NOT_FOUND"))
}
}
}

View File

@ -34,6 +34,7 @@ build/mvn clean package -pl :kyuubi-spark-lineage_2.12 -am -Dspark.version=3.2.1
`-Dspark.version=`
- [x] master
- [ ] 3.4.x
- [x] 3.3.x (default)
- [x] 3.2.x
- [x] 3.1.x

View File

@ -85,10 +85,14 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<testResources>
<testResource>
<directory>${project.basedir}/src/test/resources</directory>

141
pom.xml
View File

@ -196,6 +196,8 @@
<swagger.version>2.2.1</swagger.version>
<swagger-ui.version>4.9.1</swagger-ui.version>
<testcontainers-scala.version>0.40.12</testcontainers-scala.version>
<!-- https://github.com/ThreeTen/threeten-extra/issues/226 -->
<threeten.version>1.7.0</threeten.version>
<thrift.version>0.9.3</thrift.version>
<trino.client.version>363</trino.client.version>
<trino.tpcds.version>1.4</trino.tpcds.version>
@ -403,19 +405,15 @@
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-repl_${scala.binary.version}</artifactId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<exclusions>
<!--
Use Hadoop Shaded Client to gain more clean transitive dependencies
-->
<!-- Use Hadoop Shaded Client to gain more clean transitive dependencies -->
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</exclusion>
<!--
Use log4j2
-->
<!-- Use log4j2 -->
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
@ -424,33 +422,24 @@
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<!-- SPARK-40511 upgrade SLF4J2, which is not compatible w/ SLF4J1 -->
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j2-impl</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-repl_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<exclusions>
<!--
Use Hadoop Shaded Client to gain more clean transitive dependencies
-->
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</exclusion>
<!--
Use log4j2
-->
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
@ -458,28 +447,11 @@
<artifactId>spark-hive_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<exclusions>
<!--
Use Hadoop Shaded Client to gain more clean transitive dependencies
-->
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</exclusion>
<!-- Use Hadoop Shaded Client to gain more clean transitive dependencies -->
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</exclusion>
<!--
Use log4j2
-->
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
@ -489,17 +461,25 @@
<version>${spark.version}</version>
<type>test-jar</type>
<exclusions>
<!--
Use Hadoop Shaded Client to gain more clean transitive dependencies
-->
<!-- Use Hadoop Shaded Client to gain more clean transitive dependencies -->
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</exclusion>
<!--
The module is only used in Kyuubi Spark Extensions, we should respect
the Spark bundled log4j.
-->
<!-- Use log4j2 -->
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<!-- SPARK-40511 upgrade SLF4J2, which is not compatible w/ SLF4J1 -->
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j2-impl</artifactId>
</exclusion>
</exclusions>
</dependency>
@ -508,19 +488,6 @@
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<type>test-jar</type>
<exclusions>
<!--
Use Hadoop Shaded Client to gain more clean transitive dependencies
-->
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</exclusion>
<!--
The module is only used in Kyuubi Spark Extensions, so we don't care about which
version of Log4j it depends on.
-->
</exclusions>
</dependency>
<dependency>
@ -528,19 +495,6 @@
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<type>test-jar</type>
<exclusions>
<!--
Use Hadoop Shaded Client to gain more clean transitive dependencies
-->
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</exclusion>
<!--
The module is only used in Kyuubi Spark Extensions and Engine Spark SQL, so we
don't care about which version of Log4j it depends on.
-->
</exclusions>
</dependency>
<dependency>
@ -1524,6 +1478,12 @@
<artifactId>service</artifactId>
<version>${openai.java.version}</version>
</dependency>
<dependency>
<groupId>org.threeten</groupId>
<artifactId>threeten-extra</artifactId>
<version>${threeten.version}</version>
</dependency>
</dependencies>
</dependencyManagement>
@ -2210,23 +2170,26 @@
</properties>
</profile>
<profile>
<id>spark-3.4</id>
<modules>
<module>extensions/spark/kyuubi-spark-connector-hive</module>
<module>extensions/spark/kyuubi-spark-connector-kudu</module>
</modules>
<properties>
<spark.version>3.4.0</spark.version>
<!-- FIXME: used for constructing Iceberg artifact name, correct it once Iceberg supports Spark 3.4 -->
<spark.binary.version>3.3</spark.binary.version>
<maven.plugin.scalatest.exclude.tags>org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.HudiTest,org.apache.kyuubi.tags.IcebergTest</maven.plugin.scalatest.exclude.tags>
</properties>
</profile>
<profile>
<id>spark-master</id>
<properties>
<spark.version>3.5.0-SNAPSHOT</spark.version>
<!-- https://github.com/ThreeTen/threeten-extra/issues/226 -->
<threeten.version>1.7.0</threeten.version>
<maven.plugin.scalatest.exclude.tags>org.scalatest.tags.Slow,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.PySparkTest</maven.plugin.scalatest.exclude.tags>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.threeten</groupId>
<artifactId>threeten-extra</artifactId>
<version>${threeten.version}</version>
</dependency>
</dependencies>
</dependencyManagement>
<repositories>
<repository>
<releases>