[KYUUBI #6091] Deprecate and remove building support for Spark 3.1
# 🔍 Description ## Issue References 🔗 This pull request aims to remove building support for Spark 3.1, while still keeping the engine support for Spark 3.1. - VOTE: https://lists.apache.org/thread/670fx1qx7rm0vpvk8k8094q2d0fthw5b - VOTE RESULT: https://lists.apache.org/thread/0zdxg5zjnc1wpxmw9mgtsxp1ywqt6qvb The next step is to clean up code in Spark extensions to drop 3.1-related code. ## Describe Your Solution 🔧 - Remove Maven profile `spark-3.1`, and references on docs, release scripts, etc. - Keep the cross-version verification to ensure that the Spark SQL engine built on the default Spark version (3.4) still works well on Spark 3.1 runtime. ## Types of changes 🔖 - [ ] Bugfix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [x] Breaking change (fix or feature that would cause existing functionality to change) ## Test Plan 🧪 Pass GA. --- # Checklist 📝 - [x] This patch was not authored or co-authored using [Generative Tooling](https://www.apache.org/legal/generative-tooling.html) **Be nice. Be informative.** Closes #6091 from pan3793/remove-spark-3.1-profile. Closes #6091 ce2983284 [Cheng Pan] nit 5887c808b [Cheng Pan] migration guide cf28096d3 [Cheng Pan] Log deprecation message on Spark SQL engine with 3.1 a467e618d [Cheng Pan] nit e11c0fb31 [Cheng Pan] Remove building support for Spark 3.1 Authored-by: Cheng Pan <chengpan@apache.org> Signed-off-by: Cheng Pan <chengpan@apache.org>
This commit is contained in:
parent
a9b90c7100
commit
e0d706e696
2
.github/workflows/license.yml
vendored
2
.github/workflows/license.yml
vendored
@ -45,7 +45,7 @@ jobs:
|
||||
- run: >-
|
||||
build/mvn org.apache.rat:apache-rat-plugin:check
|
||||
-Ptpcds -Pkubernetes-it
|
||||
-Pspark-3.1 -Pspark-3.2 -Pspark-3.3 -Pspark-3.4 -Pspark-3.5
|
||||
-Pspark-3.2 -Pspark-3.3 -Pspark-3.4 -Pspark-3.5
|
||||
- name: Upload rat report
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v3
|
||||
|
||||
5
.github/workflows/publish-snapshot-nexus.yml
vendored
5
.github/workflows/publish-snapshot-nexus.yml
vendored
@ -33,14 +33,17 @@ jobs:
|
||||
- branch-1.7
|
||||
- branch-1.8
|
||||
profiles:
|
||||
- -Pflink-provided,spark-provided,hive-provided,spark-3.1
|
||||
- -Pflink-provided,spark-provided,hive-provided,spark-3.2
|
||||
- -Pflink-provided,spark-provided,hive-provided,spark-3.3,tpcds
|
||||
include:
|
||||
- branch: master
|
||||
profiles: -Pflink-provided,spark-provided,hive-provided,spark-3.4
|
||||
- branch: master
|
||||
profiles: -Pflink-provided,spark-provided,hive-provided,spark-3.5
|
||||
- branch: branch-1.8
|
||||
profiles: -Pflink-provided,spark-provided,hive-provided,spark-3.4
|
||||
- branch: branch-1.8
|
||||
profiles: -Pflink-provided,spark-provided,hive-provided,spark-3.5
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
|
||||
5
.github/workflows/style.yml
vendored
5
.github/workflows/style.yml
vendored
@ -66,17 +66,14 @@ jobs:
|
||||
run: |
|
||||
MVN_OPT="-DskipTests -Dorg.slf4j.simpleLogger.defaultLogLevel=warn -Dmaven.javadoc.skip=true -Drat.skip=true -Dscalastyle.skip=true -Dspotless.check.skip"
|
||||
build/mvn clean install ${MVN_OPT} -Pflink-provided,hive-provided,spark-provided,spark-3.2,tpcds
|
||||
build/mvn clean install ${MVN_OPT} -pl extensions/spark/kyuubi-extension-spark-3-1 -Pspark-3.1
|
||||
build/mvn clean install ${MVN_OPT} -pl extensions/spark/kyuubi-extension-spark-3-3,extensions/spark/kyuubi-spark-connector-hive -Pspark-3.3
|
||||
build/mvn clean install ${MVN_OPT} -pl extensions/spark/kyuubi-extension-spark-3-4 -Pspark-3.4
|
||||
build/mvn clean install ${MVN_OPT} -pl extensions/spark/kyuubi-extension-spark-3-5 -Pspark-3.5
|
||||
|
||||
- name: Scalastyle with maven
|
||||
id: scalastyle-check
|
||||
# Check with Spark 3.1 profile separately as it use Iceberg 1.3.1 which is not compatible with Spark 3.5+
|
||||
run: |
|
||||
build/mvn scalastyle:check ${{ matrix.profiles }}
|
||||
build/mvn scalastyle:check -Pflink-provided,hive-provided,spark-provided,spark-3.1
|
||||
- name: Print scalastyle error report
|
||||
if: failure() && steps.scalastyle-check.outcome != 'success'
|
||||
run: >-
|
||||
@ -90,7 +87,7 @@ jobs:
|
||||
run: |
|
||||
SPOTLESS_BLACK_VERSION=$(build/mvn help:evaluate -Dexpression=spotless.python.black.version -q -DforceStdout)
|
||||
pip install black==$SPOTLESS_BLACK_VERSION
|
||||
build/mvn spotless:check ${{ matrix.profiles }} -Pspotless-python,spark-3.1
|
||||
build/mvn spotless:check ${{ matrix.profiles }} -Pspotless-python
|
||||
- name: setup npm
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
# Usage:
|
||||
# Run the docker command below
|
||||
# docker build \
|
||||
# --build-arg MVN_ARG="-Pspark-3.1,spark-hadoop-3.2" \
|
||||
# --build-arg MVN_ARG="-Pspark-3.5" \
|
||||
# --file build/Dockerfile.CI \
|
||||
# --tag apache/kyuubi:<tag> \
|
||||
# .
|
||||
|
||||
@ -110,11 +110,6 @@ upload_svn_staging() {
|
||||
}
|
||||
|
||||
upload_nexus_staging() {
|
||||
# Spark Extension Plugin for Spark 3.1
|
||||
${KYUUBI_DIR}/build/mvn clean deploy -DskipTests -Papache-release,flink-provided,spark-provided,hive-provided,spark-3.1 \
|
||||
-s "${KYUUBI_DIR}/build/release/asf-settings.xml" \
|
||||
-pl extensions/spark/kyuubi-extension-spark-3-1 -am
|
||||
|
||||
# Spark Extension Plugin for Spark 3.2
|
||||
${KYUUBI_DIR}/build/mvn clean deploy -DskipTests -Papache-release,flink-provided,spark-provided,hive-provided,spark-3.2 \
|
||||
-s "${KYUUBI_DIR}/build/release/asf-settings.xml" \
|
||||
|
||||
@ -161,16 +161,6 @@
|
||||
</build>
|
||||
|
||||
<profiles>
|
||||
<profile>
|
||||
<id>spark-3.1</id>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.kyuubi</groupId>
|
||||
<artifactId>kyuubi-extension-spark-3-1_${scala.binary.version}</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</profile>
|
||||
<profile>
|
||||
<id>spark-3.2</id>
|
||||
<dependencies>
|
||||
|
||||
@ -20,7 +20,7 @@ set -x
|
||||
|
||||
KYUUBI_HOME="$(cd "`dirname "$0"`/.."; pwd)"
|
||||
|
||||
PROFILES="-Pflink-provided,hive-provided,spark-provided,spark-3.5,spark-3.4,spark-3.3,spark-3.2,spark-3.1,tpcds,kubernetes-it"
|
||||
PROFILES="-Pflink-provided,hive-provided,spark-provided,spark-3.5,spark-3.4,spark-3.3,spark-3.2,tpcds,kubernetes-it"
|
||||
|
||||
# python style checks rely on `black` in path
|
||||
if ! command -v black &> /dev/null
|
||||
|
||||
@ -63,7 +63,6 @@ Since v1.1.0, Kyuubi support building with different Spark profiles,
|
||||
|
||||
| Profile | Default | Since |
|
||||
|-------------|---------|-------|
|
||||
| -Pspark-3.1 | | 1.1.0 |
|
||||
| -Pspark-3.2 | | 1.4.0 |
|
||||
| -Pspark-3.3 | | 1.6.0 |
|
||||
| -Pspark-3.4 | ✓ | 1.8.0 |
|
||||
|
||||
@ -21,6 +21,8 @@
|
||||
|
||||
* Since Kyuubi 1.9.0, `kyuubi.session.conf.advisor` can be set as a sequence, Kyuubi supported chaining SessionConfAdvisors.
|
||||
* Since Kyuubi 1.9.0, the support of Derby is removal for Kyuubi metastore.
|
||||
* Since Kyuubi 1.9.0, the support of Spark SQL engine for Spark 3.1 is deprecated, and will be removed in the future.
|
||||
* Since Kyuubi 1.9.0, the support of Spark extensions for Spark 3.1 is removed, please use Spark 3.2 or higher versions.
|
||||
|
||||
## Upgrading from Kyuubi 1.8.0 to 1.8.1
|
||||
|
||||
|
||||
@ -33,7 +33,7 @@ Hive Dialect helps to solve failures access Kyuubi. It fails and unexpected resu
|
||||
1. Get the Kyuubi Hive Dialect Extension jar
|
||||
1. compile the extension by executing `build/mvn clean package -pl :kyuubi-extension-spark-jdbc-dialect_2.12 -DskipTests`
|
||||
2. get the extension jar under `extensions/spark/kyuubi-extension-spark-jdbc-dialect/target`
|
||||
3. If you like, you can compile the extension jar with the corresponding Maven's profile on you compile command, i.e. you can get extension jar for Spark 3.2 by compiling with `-Pspark-3.1`
|
||||
3. If you like, you can compile the extension jar with the corresponding Maven's profile on you compile command, i.e. you can get extension jar for Spark 3.5 by compiling with `-Pspark-3.5`
|
||||
2. Put the Kyuubi Hive Dialect Extension jar `kyuubi-extension-spark-jdbc-dialect_-*.jar` into `$SPARK_HOME/jars`
|
||||
3. Enable `KyuubiSparkJdbcDialectExtension`, by setting `spark.sql.extensions=org.apache.spark.sql.dialect.KyuubiSparkJdbcDialectExtension`, i.e.
|
||||
- add a config into `$SPARK_HOME/conf/spark-defaults.conf`
|
||||
|
||||
@ -43,17 +43,19 @@ And don't worry, Kyuubi will support the new Apache Spark version in the future.
|
||||
|
||||
## Usage
|
||||
|
||||
| Kyuubi Spark SQL extension | Supported Spark version(s) | Available since | EOL | Bundled in Binary release tarball | Maven profile |
|
||||
|----------------------------|----------------------------|------------------|-----|-----------------------------------|---------------|
|
||||
| kyuubi-extension-spark-3-1 | 3.1.x | 1.3.0-incubating | N/A | 1.3.0-incubating | spark-3.1 |
|
||||
| kyuubi-extension-spark-3-2 | 3.2.x | 1.4.0-incubating | N/A | 1.4.0-incubating | spark-3.2 |
|
||||
| kyuubi-extension-spark-3-3 | 3.3.x | 1.6.0-incubating | N/A | 1.6.0-incubating | spark-3.3 |
|
||||
| Kyuubi Spark SQL extension | Supported Spark version(s) | Available since | EOL | Bundled in Binary release tarball | Maven profile |
|
||||
|----------------------------|----------------------------|------------------|-------|-----------------------------------|---------------|
|
||||
| kyuubi-extension-spark-3-1 | 3.1.x | 1.3.0-incubating | 1.8.0 | 1.3.0-incubating | spark-3.1 |
|
||||
| kyuubi-extension-spark-3-2 | 3.2.x | 1.4.0-incubating | N/A | 1.4.0-incubating | spark-3.2 |
|
||||
| kyuubi-extension-spark-3-3 | 3.3.x | 1.6.0-incubating | N/A | 1.6.0-incubating | spark-3.3 |
|
||||
| kyuubi-extension-spark-3-4 | 3.4.x | 1.8.0 | N/A | 1.8.0 | spark-3.4 |
|
||||
| kyuubi-extension-spark-3-4 | 3.5.x | 1.8.0 | N/A | N/A | spark-3.5 |
|
||||
|
||||
1. Check the matrix that if you are using the supported Spark version, and find the corresponding Kyuubi Spark SQL Extension jar
|
||||
2. Get the Kyuubi Spark SQL Extension jar
|
||||
1. Each Kyuubi binary release tarball only contains one default version of Kyuubi Spark SQL Extension jar, if you are looking for such version, you can find it under `$KYUUBI_HOME/extension`
|
||||
2. All supported versions of Kyuubi Spark SQL Extension jar will be deployed to [Maven Central](https://search.maven.org/search?q=kyuubi-extension-spark)
|
||||
3. If you like, you can compile Kyuubi Spark SQL Extension jar by yourself, please activate the corresponding Maven's profile on you compile command, i.e. you can get Kyuubi Spark SQL Extension jar for Spark 3.1 under `extensions/spark/kyuubi-extension-spark-3-1/target` when compile with `-Pspark-3.1`
|
||||
3. If you like, you can compile Kyuubi Spark SQL Extension jar by yourself, please activate the corresponding Maven's profile on you compile command, i.e. you can get Kyuubi Spark SQL Extension jar for Spark 3.5 under `extensions/spark/kyuubi-extension-spark-3-5/target` when compile with `-Pspark-3.5`
|
||||
3. Put the Kyuubi Spark SQL extension jar `kyuubi-extension-spark-*.jar` into `$SPARK_HOME/jars`
|
||||
4. Enable `KyuubiSparkSQLExtension`, i.e. add a config into `$SPARK_HOME/conf/spark-defaults.conf`, `spark.sql.extensions=org.apache.kyuubi.sql.KyuubiSparkSQLExtension`
|
||||
|
||||
|
||||
@ -75,10 +75,10 @@ Due to the extra sort, the upstream job will run a little slower than before
|
||||
|
||||
This feature is inside Kyuubi extension, so you should apply the extension to Spark by following steps.
|
||||
|
||||
- add extension jar: `copy $KYUUBI_HOME/extension/kyuubi-extension-spark-3-1* $SPARK_HOME/jars/`
|
||||
- add extension jar: `copy $KYUUBI_HOME/extension/kyuubi-extension-spark-3-5* $SPARK_HOME/jars/`
|
||||
- add config into `spark-defaults.conf`: `spark.sql.extensions=org.apache.kyuubi.sql.KyuubiSparkSQLExtension`
|
||||
|
||||
Due to the extension, z-order only works with Spark-3.1 and higher version.
|
||||
Due to the extension, z-order only works with Spark 3.2 and higher version.
|
||||
|
||||
### Optimize history data
|
||||
|
||||
|
||||
@ -245,7 +245,7 @@ Error operating EXECUTE_STATEMENT: org.apache.spark.sql.AnalysisException: Can n
|
||||
If you get this exception when creating a function, you can check your JDK version.
|
||||
You should update JDK to JDK1.8.0_121 and later, since JDK1.8.0_121 fix a security issue [Additional access restrictions for URLClassLoader.newInstance](https://www.oracle.com/java/technologies/javase/8u121-relnotes.html).
|
||||
|
||||
### Failed to start Spark 3.1 with error msg 'Cannot modify the value of a Spark config'
|
||||
### Failed to start Spark 3.1 or above version with error msg 'Cannot modify the value of a Spark config'
|
||||
|
||||
Here is the error message
|
||||
|
||||
@ -260,6 +260,6 @@ Caused by: org.apache.spark.sql.AnalysisException: Cannot modify the value of a
|
||||
... 12 more
|
||||
```
|
||||
|
||||
This is because Spark-3.1 will check the config which you set and throw exception if the config is static or used in other module (e.g. yarn/core).
|
||||
This is because since Spark 3.1, it will check the config which you set and throw exception if the config is static or used in other module (e.g. yarn/core).
|
||||
|
||||
You can add a config `spark.sql.legacy.setCommandRejectsSparkCoreConfs=false` in `spark-defaults.conf` to disable this behavior.
|
||||
|
||||
@ -29,8 +29,8 @@ import org.apache.kyuubi.sql.zorder.ZorderBytesUtils
|
||||
*
|
||||
* {{{
|
||||
* RUN_BENCHMARK=1 ./build/mvn clean test \
|
||||
* -pl extensions/spark/kyuubi-extension-spark-3-1 -am \
|
||||
* -Pspark-3.1,kyuubi-extension-spark-3-1 \
|
||||
* -pl extensions/spark/kyuubi-extension-spark-3-4 -am \
|
||||
* -Pspark-3.4,kyuubi-extension-spark-3-4 \
|
||||
* -Dtest=none -DwildcardSuites=org.apache.spark.sql.ZorderCoreBenchmark
|
||||
* }}}
|
||||
*/
|
||||
|
||||
@ -29,8 +29,8 @@ import org.apache.kyuubi.sql.zorder.ZorderBytesUtils
|
||||
*
|
||||
* {{{
|
||||
* RUN_BENCHMARK=1 ./build/mvn clean test \
|
||||
* -pl extensions/spark/kyuubi-extension-spark-3-1 -am \
|
||||
* -Pspark-3.1,kyuubi-extension-spark-3-1 \
|
||||
* -pl extensions/spark/kyuubi-extension-spark-3-5 -am \
|
||||
* -Pspark-3.5,kyuubi-extension-spark-3-5 \
|
||||
* -Dtest=none -DwildcardSuites=org.apache.spark.sql.ZorderCoreBenchmark
|
||||
* }}}
|
||||
*/
|
||||
|
||||
@ -29,8 +29,8 @@ import org.apache.kyuubi.sql.zorder.ZorderBytesUtils
|
||||
*
|
||||
* {{{
|
||||
* RUN_BENCHMARK=1 ./build/mvn clean test \
|
||||
* -pl extensions/spark/kyuubi-extension-spark-3-1 -am \
|
||||
* -Pspark-3.1,kyuubi-extension-spark-3-1 \
|
||||
* -pl extensions/spark/kyuubi-extension-spark-3-3 -am \
|
||||
* -Pspark-3.3,kyuubi-extension-spark-3-3 \
|
||||
* -Dtest=none -DwildcardSuites=org.apache.spark.sql.ZorderCoreBenchmark
|
||||
* }}}
|
||||
*/
|
||||
|
||||
@ -381,6 +381,9 @@ object SparkSQLEngine extends Logging {
|
||||
}
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
if (KyuubiSparkUtil.SPARK_ENGINE_RUNTIME_VERSION === "3.1") {
|
||||
warn("The support for Spark 3.1 is deprecated, and will be removed in the next version.")
|
||||
}
|
||||
val startedTime = System.currentTimeMillis()
|
||||
val submitTime = kyuubiConf.getOption(KYUUBI_ENGINE_SUBMIT_TIME_KEY) match {
|
||||
case Some(t) => t.toLong
|
||||
|
||||
@ -159,7 +159,7 @@ object KyuubiArrowConverters extends SQLConfHelper with Logging {
|
||||
partsScanned.until(math.min(partsScanned + numPartsToTry, totalParts))
|
||||
|
||||
// TODO: SparkPlan.session introduced in SPARK-35798, replace with SparkPlan.session once we
|
||||
// drop Spark-3.1.x support.
|
||||
// drop Spark 3.1 support.
|
||||
val sc = SparkSession.active.sparkContext
|
||||
val res = sc.runJob(
|
||||
childRDD,
|
||||
@ -347,6 +347,6 @@ object KyuubiArrowConverters extends SQLConfHelper with Logging {
|
||||
largeVarTypes)
|
||||
}
|
||||
|
||||
// IpcOption.DEFAULT was introduced in ARROW-11081(ARROW-4.0.0), add this for adapt Spark-3.1/3.2
|
||||
// IpcOption.DEFAULT was introduced in ARROW-11081(ARROW-4.0.0), add this for adapt Spark 3.1/3.2
|
||||
final private val ARROW_IPC_OPTION_DEFAULT = new IpcOption()
|
||||
}
|
||||
|
||||
@ -161,7 +161,7 @@ object SparkDatasetHelper extends Logging {
|
||||
|
||||
private def doCollectLimit(collectLimit: CollectLimitExec): Array[Array[Byte]] = {
|
||||
// TODO: SparkPlan.session introduced in SPARK-35798, replace with SparkPlan.session once we
|
||||
// drop Spark-3.1.x support.
|
||||
// drop Spark 3.1 support.
|
||||
val timeZoneId = SparkSession.active.sessionState.conf.sessionLocalTimeZone
|
||||
val maxRecordsPerBatch = SparkSession.active.sessionState.conf.arrowMaxRecordsPerBatch
|
||||
|
||||
|
||||
@ -523,9 +523,9 @@ class SparkArrowbasedOperationSuite extends WithSparkSQLEngine with SparkDataTyp
|
||||
|
||||
/**
|
||||
* This method provides a reflection-based implementation of [[SQLConf.isStaticConfigKey]] to
|
||||
* adapt Spark-3.1.x
|
||||
* adapt Spark 3.1
|
||||
*
|
||||
* TODO: Once we drop support for Spark 3.1.x, we can directly call
|
||||
* TODO: Once we drop support for Spark 3.1, we can directly call
|
||||
* [[SQLConf.isStaticConfigKey()]].
|
||||
*/
|
||||
private def isStaticConfigKey(key: String): Boolean =
|
||||
|
||||
18
pom.xml
18
pom.xml
@ -2231,24 +2231,6 @@
|
||||
</build>
|
||||
</profile>
|
||||
|
||||
<profile>
|
||||
<id>spark-3.1</id>
|
||||
<modules>
|
||||
<module>extensions/spark/kyuubi-extension-spark-common</module>
|
||||
<module>extensions/spark/kyuubi-extension-spark-3-1</module>
|
||||
</modules>
|
||||
<properties>
|
||||
<spark.version>3.1.3</spark.version>
|
||||
<spark.binary.version>3.1</spark.binary.version>
|
||||
<delta.artifact>delta-core</delta.artifact>
|
||||
<delta.version>1.0.1</delta.version>
|
||||
<!-- Iceberg removed the support for Spark 3.1 in apache/iceberg#8661 since 1.4.0 -->
|
||||
<iceberg.version>1.3.1</iceberg.version>
|
||||
<spark.archive.name>spark-${spark.version}-bin-hadoop3.2.tgz</spark.archive.name>
|
||||
<maven.plugin.scalatest.exclude.tags>org.scalatest.tags.Slow</maven.plugin.scalatest.exclude.tags>
|
||||
</properties>
|
||||
</profile>
|
||||
|
||||
<profile>
|
||||
<id>spark-3.2</id>
|
||||
<modules>
|
||||
|
||||
Loading…
Reference in New Issue
Block a user