[KYUUBI #6091] Deprecate and remove building support for Spark 3.1

# 🔍 Description
## Issue References 🔗

This pull request aims to remove building support for Spark 3.1, while still keeping the engine support for Spark 3.1.

- VOTE: https://lists.apache.org/thread/670fx1qx7rm0vpvk8k8094q2d0fthw5b
- VOTE RESULT: https://lists.apache.org/thread/0zdxg5zjnc1wpxmw9mgtsxp1ywqt6qvb

The next step is to clean up code in Spark extensions to drop 3.1-related code.

## Describe Your Solution 🔧

- Remove Maven profile `spark-3.1`, and references on docs, release scripts, etc.
- Keep the cross-version verification to ensure that the Spark SQL engine built on the default Spark version (3.4) still works well on Spark 3.1 runtime.

## Types of changes 🔖

- [ ] Bugfix (non-breaking change which fixes an issue)
- [ ] New feature (non-breaking change which adds functionality)
- [x] Breaking change (fix or feature that would cause existing functionality to change)

## Test Plan 🧪

Pass GA.

---

# Checklist 📝

- [x] This patch was not authored or co-authored using [Generative Tooling](https://www.apache.org/legal/generative-tooling.html)

**Be nice. Be informative.**

Closes #6091 from pan3793/remove-spark-3.1-profile.

Closes #6091

ce2983284 [Cheng Pan] nit
5887c808b [Cheng Pan] migration guide
cf28096d3 [Cheng Pan] Log deprecation message on Spark SQL engine with 3.1
a467e618d [Cheng Pan] nit
e11c0fb31 [Cheng Pan] Remove building support for Spark 3.1

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: Cheng Pan <chengpan@apache.org>
This commit is contained in:
Cheng Pan 2024-03-04 20:23:06 +08:00
parent a9b90c7100
commit e0d706e696
No known key found for this signature in database
GPG Key ID: 8001952629BCC75D
21 changed files with 37 additions and 64 deletions

View File

@ -45,7 +45,7 @@ jobs:
- run: >-
build/mvn org.apache.rat:apache-rat-plugin:check
-Ptpcds -Pkubernetes-it
-Pspark-3.1 -Pspark-3.2 -Pspark-3.3 -Pspark-3.4 -Pspark-3.5
-Pspark-3.2 -Pspark-3.3 -Pspark-3.4 -Pspark-3.5
- name: Upload rat report
if: failure()
uses: actions/upload-artifact@v3

View File

@ -33,14 +33,17 @@ jobs:
- branch-1.7
- branch-1.8
profiles:
- -Pflink-provided,spark-provided,hive-provided,spark-3.1
- -Pflink-provided,spark-provided,hive-provided,spark-3.2
- -Pflink-provided,spark-provided,hive-provided,spark-3.3,tpcds
include:
- branch: master
profiles: -Pflink-provided,spark-provided,hive-provided,spark-3.4
- branch: master
profiles: -Pflink-provided,spark-provided,hive-provided,spark-3.5
- branch: branch-1.8
profiles: -Pflink-provided,spark-provided,hive-provided,spark-3.4
- branch: branch-1.8
profiles: -Pflink-provided,spark-provided,hive-provided,spark-3.5
steps:
- uses: actions/checkout@v4
with:

View File

@ -66,17 +66,14 @@ jobs:
run: |
MVN_OPT="-DskipTests -Dorg.slf4j.simpleLogger.defaultLogLevel=warn -Dmaven.javadoc.skip=true -Drat.skip=true -Dscalastyle.skip=true -Dspotless.check.skip"
build/mvn clean install ${MVN_OPT} -Pflink-provided,hive-provided,spark-provided,spark-3.2,tpcds
build/mvn clean install ${MVN_OPT} -pl extensions/spark/kyuubi-extension-spark-3-1 -Pspark-3.1
build/mvn clean install ${MVN_OPT} -pl extensions/spark/kyuubi-extension-spark-3-3,extensions/spark/kyuubi-spark-connector-hive -Pspark-3.3
build/mvn clean install ${MVN_OPT} -pl extensions/spark/kyuubi-extension-spark-3-4 -Pspark-3.4
build/mvn clean install ${MVN_OPT} -pl extensions/spark/kyuubi-extension-spark-3-5 -Pspark-3.5
- name: Scalastyle with maven
id: scalastyle-check
# Check with Spark 3.1 profile separately as it use Iceberg 1.3.1 which is not compatible with Spark 3.5+
run: |
build/mvn scalastyle:check ${{ matrix.profiles }}
build/mvn scalastyle:check -Pflink-provided,hive-provided,spark-provided,spark-3.1
- name: Print scalastyle error report
if: failure() && steps.scalastyle-check.outcome != 'success'
run: >-
@ -90,7 +87,7 @@ jobs:
run: |
SPOTLESS_BLACK_VERSION=$(build/mvn help:evaluate -Dexpression=spotless.python.black.version -q -DforceStdout)
pip install black==$SPOTLESS_BLACK_VERSION
build/mvn spotless:check ${{ matrix.profiles }} -Pspotless-python,spark-3.1
build/mvn spotless:check ${{ matrix.profiles }} -Pspotless-python
- name: setup npm
uses: actions/setup-node@v4
with:

View File

@ -20,7 +20,7 @@
# Usage:
# Run the docker command below
# docker build \
# --build-arg MVN_ARG="-Pspark-3.1,spark-hadoop-3.2" \
# --build-arg MVN_ARG="-Pspark-3.5" \
# --file build/Dockerfile.CI \
# --tag apache/kyuubi:<tag> \
# .

View File

@ -110,11 +110,6 @@ upload_svn_staging() {
}
upload_nexus_staging() {
# Spark Extension Plugin for Spark 3.1
${KYUUBI_DIR}/build/mvn clean deploy -DskipTests -Papache-release,flink-provided,spark-provided,hive-provided,spark-3.1 \
-s "${KYUUBI_DIR}/build/release/asf-settings.xml" \
-pl extensions/spark/kyuubi-extension-spark-3-1 -am
# Spark Extension Plugin for Spark 3.2
${KYUUBI_DIR}/build/mvn clean deploy -DskipTests -Papache-release,flink-provided,spark-provided,hive-provided,spark-3.2 \
-s "${KYUUBI_DIR}/build/release/asf-settings.xml" \

View File

@ -161,16 +161,6 @@
</build>
<profiles>
<profile>
<id>spark-3.1</id>
<dependencies>
<dependency>
<groupId>org.apache.kyuubi</groupId>
<artifactId>kyuubi-extension-spark-3-1_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</profile>
<profile>
<id>spark-3.2</id>
<dependencies>

View File

@ -20,7 +20,7 @@ set -x
KYUUBI_HOME="$(cd "`dirname "$0"`/.."; pwd)"
PROFILES="-Pflink-provided,hive-provided,spark-provided,spark-3.5,spark-3.4,spark-3.3,spark-3.2,spark-3.1,tpcds,kubernetes-it"
PROFILES="-Pflink-provided,hive-provided,spark-provided,spark-3.5,spark-3.4,spark-3.3,spark-3.2,tpcds,kubernetes-it"
# python style checks rely on `black` in path
if ! command -v black &> /dev/null

View File

@ -63,7 +63,6 @@ Since v1.1.0, Kyuubi support building with different Spark profiles,
| Profile | Default | Since |
|-------------|---------|-------|
| -Pspark-3.1 | | 1.1.0 |
| -Pspark-3.2 | | 1.4.0 |
| -Pspark-3.3 | | 1.6.0 |
| -Pspark-3.4 | ✓ | 1.8.0 |

View File

@ -21,6 +21,8 @@
* Since Kyuubi 1.9.0, `kyuubi.session.conf.advisor` can be set as a sequence, Kyuubi supported chaining SessionConfAdvisors.
* Since Kyuubi 1.9.0, the support of Derby is removal for Kyuubi metastore.
* Since Kyuubi 1.9.0, the support of Spark SQL engine for Spark 3.1 is deprecated, and will be removed in the future.
* Since Kyuubi 1.9.0, the support of Spark extensions for Spark 3.1 is removed, please use Spark 3.2 or higher versions.
## Upgrading from Kyuubi 1.8.0 to 1.8.1

View File

@ -33,7 +33,7 @@ Hive Dialect helps to solve failures access Kyuubi. It fails and unexpected resu
1. Get the Kyuubi Hive Dialect Extension jar
1. compile the extension by executing `build/mvn clean package -pl :kyuubi-extension-spark-jdbc-dialect_2.12 -DskipTests`
2. get the extension jar under `extensions/spark/kyuubi-extension-spark-jdbc-dialect/target`
3. If you like, you can compile the extension jar with the corresponding Maven's profile on you compile command, i.e. you can get extension jar for Spark 3.2 by compiling with `-Pspark-3.1`
3. If you like, you can compile the extension jar with the corresponding Maven's profile on you compile command, i.e. you can get extension jar for Spark 3.5 by compiling with `-Pspark-3.5`
2. Put the Kyuubi Hive Dialect Extension jar `kyuubi-extension-spark-jdbc-dialect_-*.jar` into `$SPARK_HOME/jars`
3. Enable `KyuubiSparkJdbcDialectExtension`, by setting `spark.sql.extensions=org.apache.spark.sql.dialect.KyuubiSparkJdbcDialectExtension`, i.e.
- add a config into `$SPARK_HOME/conf/spark-defaults.conf`

View File

@ -43,17 +43,19 @@ And don't worry, Kyuubi will support the new Apache Spark version in the future.
## Usage
| Kyuubi Spark SQL extension | Supported Spark version(s) | Available since | EOL | Bundled in Binary release tarball | Maven profile |
|----------------------------|----------------------------|------------------|-----|-----------------------------------|---------------|
| kyuubi-extension-spark-3-1 | 3.1.x | 1.3.0-incubating | N/A | 1.3.0-incubating | spark-3.1 |
| kyuubi-extension-spark-3-2 | 3.2.x | 1.4.0-incubating | N/A | 1.4.0-incubating | spark-3.2 |
| kyuubi-extension-spark-3-3 | 3.3.x | 1.6.0-incubating | N/A | 1.6.0-incubating | spark-3.3 |
| Kyuubi Spark SQL extension | Supported Spark version(s) | Available since | EOL | Bundled in Binary release tarball | Maven profile |
|----------------------------|----------------------------|------------------|-------|-----------------------------------|---------------|
| kyuubi-extension-spark-3-1 | 3.1.x | 1.3.0-incubating | 1.8.0 | 1.3.0-incubating | spark-3.1 |
| kyuubi-extension-spark-3-2 | 3.2.x | 1.4.0-incubating | N/A | 1.4.0-incubating | spark-3.2 |
| kyuubi-extension-spark-3-3 | 3.3.x | 1.6.0-incubating | N/A | 1.6.0-incubating | spark-3.3 |
| kyuubi-extension-spark-3-4 | 3.4.x | 1.8.0 | N/A | 1.8.0 | spark-3.4 |
| kyuubi-extension-spark-3-4 | 3.5.x | 1.8.0 | N/A | N/A | spark-3.5 |
1. Check the matrix that if you are using the supported Spark version, and find the corresponding Kyuubi Spark SQL Extension jar
2. Get the Kyuubi Spark SQL Extension jar
1. Each Kyuubi binary release tarball only contains one default version of Kyuubi Spark SQL Extension jar, if you are looking for such version, you can find it under `$KYUUBI_HOME/extension`
2. All supported versions of Kyuubi Spark SQL Extension jar will be deployed to [Maven Central](https://search.maven.org/search?q=kyuubi-extension-spark)
3. If you like, you can compile Kyuubi Spark SQL Extension jar by yourself, please activate the corresponding Maven's profile on you compile command, i.e. you can get Kyuubi Spark SQL Extension jar for Spark 3.1 under `extensions/spark/kyuubi-extension-spark-3-1/target` when compile with `-Pspark-3.1`
3. If you like, you can compile Kyuubi Spark SQL Extension jar by yourself, please activate the corresponding Maven's profile on you compile command, i.e. you can get Kyuubi Spark SQL Extension jar for Spark 3.5 under `extensions/spark/kyuubi-extension-spark-3-5/target` when compile with `-Pspark-3.5`
3. Put the Kyuubi Spark SQL extension jar `kyuubi-extension-spark-*.jar` into `$SPARK_HOME/jars`
4. Enable `KyuubiSparkSQLExtension`, i.e. add a config into `$SPARK_HOME/conf/spark-defaults.conf`, `spark.sql.extensions=org.apache.kyuubi.sql.KyuubiSparkSQLExtension`

View File

@ -75,10 +75,10 @@ Due to the extra sort, the upstream job will run a little slower than before
This feature is inside Kyuubi extension, so you should apply the extension to Spark by following steps.
- add extension jar: `copy $KYUUBI_HOME/extension/kyuubi-extension-spark-3-1* $SPARK_HOME/jars/`
- add extension jar: `copy $KYUUBI_HOME/extension/kyuubi-extension-spark-3-5* $SPARK_HOME/jars/`
- add config into `spark-defaults.conf`: `spark.sql.extensions=org.apache.kyuubi.sql.KyuubiSparkSQLExtension`
Due to the extension, z-order only works with Spark-3.1 and higher version.
Due to the extension, z-order only works with Spark 3.2 and higher version.
### Optimize history data

View File

@ -245,7 +245,7 @@ Error operating EXECUTE_STATEMENT: org.apache.spark.sql.AnalysisException: Can n
If you get this exception when creating a function, you can check your JDK version.
You should update JDK to JDK1.8.0_121 and later, since JDK1.8.0_121 fix a security issue [Additional access restrictions for URLClassLoader.newInstance](https://www.oracle.com/java/technologies/javase/8u121-relnotes.html).
### Failed to start Spark 3.1 with error msg 'Cannot modify the value of a Spark config'
### Failed to start Spark 3.1 or above version with error msg 'Cannot modify the value of a Spark config'
Here is the error message
@ -260,6 +260,6 @@ Caused by: org.apache.spark.sql.AnalysisException: Cannot modify the value of a
... 12 more
```
This is because Spark-3.1 will check the config which you set and throw exception if the config is static or used in other module (e.g. yarn/core).
This is because since Spark 3.1, it will check the config which you set and throw exception if the config is static or used in other module (e.g. yarn/core).
You can add a config `spark.sql.legacy.setCommandRejectsSparkCoreConfs=false` in `spark-defaults.conf` to disable this behavior.

View File

@ -29,8 +29,8 @@ import org.apache.kyuubi.sql.zorder.ZorderBytesUtils
*
* {{{
* RUN_BENCHMARK=1 ./build/mvn clean test \
* -pl extensions/spark/kyuubi-extension-spark-3-1 -am \
* -Pspark-3.1,kyuubi-extension-spark-3-1 \
* -pl extensions/spark/kyuubi-extension-spark-3-4 -am \
* -Pspark-3.4,kyuubi-extension-spark-3-4 \
* -Dtest=none -DwildcardSuites=org.apache.spark.sql.ZorderCoreBenchmark
* }}}
*/

View File

@ -29,8 +29,8 @@ import org.apache.kyuubi.sql.zorder.ZorderBytesUtils
*
* {{{
* RUN_BENCHMARK=1 ./build/mvn clean test \
* -pl extensions/spark/kyuubi-extension-spark-3-1 -am \
* -Pspark-3.1,kyuubi-extension-spark-3-1 \
* -pl extensions/spark/kyuubi-extension-spark-3-5 -am \
* -Pspark-3.5,kyuubi-extension-spark-3-5 \
* -Dtest=none -DwildcardSuites=org.apache.spark.sql.ZorderCoreBenchmark
* }}}
*/

View File

@ -29,8 +29,8 @@ import org.apache.kyuubi.sql.zorder.ZorderBytesUtils
*
* {{{
* RUN_BENCHMARK=1 ./build/mvn clean test \
* -pl extensions/spark/kyuubi-extension-spark-3-1 -am \
* -Pspark-3.1,kyuubi-extension-spark-3-1 \
* -pl extensions/spark/kyuubi-extension-spark-3-3 -am \
* -Pspark-3.3,kyuubi-extension-spark-3-3 \
* -Dtest=none -DwildcardSuites=org.apache.spark.sql.ZorderCoreBenchmark
* }}}
*/

View File

@ -381,6 +381,9 @@ object SparkSQLEngine extends Logging {
}
def main(args: Array[String]): Unit = {
if (KyuubiSparkUtil.SPARK_ENGINE_RUNTIME_VERSION === "3.1") {
warn("The support for Spark 3.1 is deprecated, and will be removed in the next version.")
}
val startedTime = System.currentTimeMillis()
val submitTime = kyuubiConf.getOption(KYUUBI_ENGINE_SUBMIT_TIME_KEY) match {
case Some(t) => t.toLong

View File

@ -159,7 +159,7 @@ object KyuubiArrowConverters extends SQLConfHelper with Logging {
partsScanned.until(math.min(partsScanned + numPartsToTry, totalParts))
// TODO: SparkPlan.session introduced in SPARK-35798, replace with SparkPlan.session once we
// drop Spark-3.1.x support.
// drop Spark 3.1 support.
val sc = SparkSession.active.sparkContext
val res = sc.runJob(
childRDD,
@ -347,6 +347,6 @@ object KyuubiArrowConverters extends SQLConfHelper with Logging {
largeVarTypes)
}
// IpcOption.DEFAULT was introduced in ARROW-11081(ARROW-4.0.0), add this for adapt Spark-3.1/3.2
// IpcOption.DEFAULT was introduced in ARROW-11081(ARROW-4.0.0), add this for adapt Spark 3.1/3.2
final private val ARROW_IPC_OPTION_DEFAULT = new IpcOption()
}

View File

@ -161,7 +161,7 @@ object SparkDatasetHelper extends Logging {
private def doCollectLimit(collectLimit: CollectLimitExec): Array[Array[Byte]] = {
// TODO: SparkPlan.session introduced in SPARK-35798, replace with SparkPlan.session once we
// drop Spark-3.1.x support.
// drop Spark 3.1 support.
val timeZoneId = SparkSession.active.sessionState.conf.sessionLocalTimeZone
val maxRecordsPerBatch = SparkSession.active.sessionState.conf.arrowMaxRecordsPerBatch

View File

@ -523,9 +523,9 @@ class SparkArrowbasedOperationSuite extends WithSparkSQLEngine with SparkDataTyp
/**
* This method provides a reflection-based implementation of [[SQLConf.isStaticConfigKey]] to
* adapt Spark-3.1.x
* adapt Spark 3.1
*
* TODO: Once we drop support for Spark 3.1.x, we can directly call
* TODO: Once we drop support for Spark 3.1, we can directly call
* [[SQLConf.isStaticConfigKey()]].
*/
private def isStaticConfigKey(key: String): Boolean =

18
pom.xml
View File

@ -2231,24 +2231,6 @@
</build>
</profile>
<profile>
<id>spark-3.1</id>
<modules>
<module>extensions/spark/kyuubi-extension-spark-common</module>
<module>extensions/spark/kyuubi-extension-spark-3-1</module>
</modules>
<properties>
<spark.version>3.1.3</spark.version>
<spark.binary.version>3.1</spark.binary.version>
<delta.artifact>delta-core</delta.artifact>
<delta.version>1.0.1</delta.version>
<!-- Iceberg removed the support for Spark 3.1 in apache/iceberg#8661 since 1.4.0 -->
<iceberg.version>1.3.1</iceberg.version>
<spark.archive.name>spark-${spark.version}-bin-hadoop3.2.tgz</spark.archive.name>
<maven.plugin.scalatest.exclude.tags>org.scalatest.tags.Slow</maven.plugin.scalatest.exclude.tags>
</properties>
</profile>
<profile>
<id>spark-3.2</id>
<modules>