diff --git a/.github/workflows/license.yml b/.github/workflows/license.yml index 0aef73441..6b894dbc3 100644 --- a/.github/workflows/license.yml +++ b/.github/workflows/license.yml @@ -45,7 +45,7 @@ jobs: - run: >- build/mvn org.apache.rat:apache-rat-plugin:check -Ptpcds -Pkubernetes-it - -Pspark-3.1 -Pspark-3.2 -Pspark-3.3 -Pspark-3.4 -Pspark-3.5 + -Pspark-3.2 -Pspark-3.3 -Pspark-3.4 -Pspark-3.5 - name: Upload rat report if: failure() uses: actions/upload-artifact@v3 diff --git a/.github/workflows/publish-snapshot-nexus.yml b/.github/workflows/publish-snapshot-nexus.yml index 2be08301f..4af12e94c 100644 --- a/.github/workflows/publish-snapshot-nexus.yml +++ b/.github/workflows/publish-snapshot-nexus.yml @@ -33,14 +33,17 @@ jobs: - branch-1.7 - branch-1.8 profiles: - - -Pflink-provided,spark-provided,hive-provided,spark-3.1 - -Pflink-provided,spark-provided,hive-provided,spark-3.2 - -Pflink-provided,spark-provided,hive-provided,spark-3.3,tpcds include: - branch: master profiles: -Pflink-provided,spark-provided,hive-provided,spark-3.4 + - branch: master + profiles: -Pflink-provided,spark-provided,hive-provided,spark-3.5 - branch: branch-1.8 profiles: -Pflink-provided,spark-provided,hive-provided,spark-3.4 + - branch: branch-1.8 + profiles: -Pflink-provided,spark-provided,hive-provided,spark-3.5 steps: - uses: actions/checkout@v4 with: diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 0c3dd1e60..9669bbdcb 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -66,17 +66,14 @@ jobs: run: | MVN_OPT="-DskipTests -Dorg.slf4j.simpleLogger.defaultLogLevel=warn -Dmaven.javadoc.skip=true -Drat.skip=true -Dscalastyle.skip=true -Dspotless.check.skip" build/mvn clean install ${MVN_OPT} -Pflink-provided,hive-provided,spark-provided,spark-3.2,tpcds - build/mvn clean install ${MVN_OPT} -pl extensions/spark/kyuubi-extension-spark-3-1 -Pspark-3.1 build/mvn clean install ${MVN_OPT} -pl extensions/spark/kyuubi-extension-spark-3-3,extensions/spark/kyuubi-spark-connector-hive -Pspark-3.3 build/mvn clean install ${MVN_OPT} -pl extensions/spark/kyuubi-extension-spark-3-4 -Pspark-3.4 build/mvn clean install ${MVN_OPT} -pl extensions/spark/kyuubi-extension-spark-3-5 -Pspark-3.5 - name: Scalastyle with maven id: scalastyle-check - # Check with Spark 3.1 profile separately as it use Iceberg 1.3.1 which is not compatible with Spark 3.5+ run: | build/mvn scalastyle:check ${{ matrix.profiles }} - build/mvn scalastyle:check -Pflink-provided,hive-provided,spark-provided,spark-3.1 - name: Print scalastyle error report if: failure() && steps.scalastyle-check.outcome != 'success' run: >- @@ -90,7 +87,7 @@ jobs: run: | SPOTLESS_BLACK_VERSION=$(build/mvn help:evaluate -Dexpression=spotless.python.black.version -q -DforceStdout) pip install black==$SPOTLESS_BLACK_VERSION - build/mvn spotless:check ${{ matrix.profiles }} -Pspotless-python,spark-3.1 + build/mvn spotless:check ${{ matrix.profiles }} -Pspotless-python - name: setup npm uses: actions/setup-node@v4 with: diff --git a/build/Dockerfile.CI b/build/Dockerfile.CI index 2ce8191fa..3209a143f 100644 --- a/build/Dockerfile.CI +++ b/build/Dockerfile.CI @@ -20,7 +20,7 @@ # Usage: # Run the docker command below # docker build \ -# --build-arg MVN_ARG="-Pspark-3.1,spark-hadoop-3.2" \ +# --build-arg MVN_ARG="-Pspark-3.5" \ # --file build/Dockerfile.CI \ # --tag apache/kyuubi: \ # . diff --git a/build/release/release.sh b/build/release/release.sh index 95a9e7471..de32a492e 100755 --- a/build/release/release.sh +++ b/build/release/release.sh @@ -110,11 +110,6 @@ upload_svn_staging() { } upload_nexus_staging() { - # Spark Extension Plugin for Spark 3.1 - ${KYUUBI_DIR}/build/mvn clean deploy -DskipTests -Papache-release,flink-provided,spark-provided,hive-provided,spark-3.1 \ - -s "${KYUUBI_DIR}/build/release/asf-settings.xml" \ - -pl extensions/spark/kyuubi-extension-spark-3-1 -am - # Spark Extension Plugin for Spark 3.2 ${KYUUBI_DIR}/build/mvn clean deploy -DskipTests -Papache-release,flink-provided,spark-provided,hive-provided,spark-3.2 \ -s "${KYUUBI_DIR}/build/release/asf-settings.xml" \ diff --git a/dev/kyuubi-codecov/pom.xml b/dev/kyuubi-codecov/pom.xml index cdf798273..4161d04e7 100644 --- a/dev/kyuubi-codecov/pom.xml +++ b/dev/kyuubi-codecov/pom.xml @@ -161,16 +161,6 @@ - - spark-3.1 - - - org.apache.kyuubi - kyuubi-extension-spark-3-1_${scala.binary.version} - ${project.version} - - - spark-3.2 diff --git a/dev/reformat b/dev/reformat index fe05408cc..eca7b9f5d 100755 --- a/dev/reformat +++ b/dev/reformat @@ -20,7 +20,7 @@ set -x KYUUBI_HOME="$(cd "`dirname "$0"`/.."; pwd)" -PROFILES="-Pflink-provided,hive-provided,spark-provided,spark-3.5,spark-3.4,spark-3.3,spark-3.2,spark-3.1,tpcds,kubernetes-it" +PROFILES="-Pflink-provided,hive-provided,spark-provided,spark-3.5,spark-3.4,spark-3.3,spark-3.2,tpcds,kubernetes-it" # python style checks rely on `black` in path if ! command -v black &> /dev/null diff --git a/docs/contributing/code/building.md b/docs/contributing/code/building.md index 049d7afb0..a42c5d2f9 100644 --- a/docs/contributing/code/building.md +++ b/docs/contributing/code/building.md @@ -63,7 +63,6 @@ Since v1.1.0, Kyuubi support building with different Spark profiles, | Profile | Default | Since | |-------------|---------|-------| -| -Pspark-3.1 | | 1.1.0 | | -Pspark-3.2 | | 1.4.0 | | -Pspark-3.3 | | 1.6.0 | | -Pspark-3.4 | ✓ | 1.8.0 | diff --git a/docs/deployment/migration-guide.md b/docs/deployment/migration-guide.md index 1f73d7342..3b3541b7e 100644 --- a/docs/deployment/migration-guide.md +++ b/docs/deployment/migration-guide.md @@ -21,6 +21,8 @@ * Since Kyuubi 1.9.0, `kyuubi.session.conf.advisor` can be set as a sequence, Kyuubi supported chaining SessionConfAdvisors. * Since Kyuubi 1.9.0, the support of Derby is removal for Kyuubi metastore. +* Since Kyuubi 1.9.0, the support of Spark SQL engine for Spark 3.1 is deprecated, and will be removed in the future. +* Since Kyuubi 1.9.0, the support of Spark extensions for Spark 3.1 is removed, please use Spark 3.2 or higher versions. ## Upgrading from Kyuubi 1.8.0 to 1.8.1 diff --git a/docs/extensions/engines/spark/jdbc-dialect.md b/docs/extensions/engines/spark/jdbc-dialect.md index e22c33926..a04a6df45 100644 --- a/docs/extensions/engines/spark/jdbc-dialect.md +++ b/docs/extensions/engines/spark/jdbc-dialect.md @@ -33,7 +33,7 @@ Hive Dialect helps to solve failures access Kyuubi. It fails and unexpected resu 1. Get the Kyuubi Hive Dialect Extension jar 1. compile the extension by executing `build/mvn clean package -pl :kyuubi-extension-spark-jdbc-dialect_2.12 -DskipTests` 2. get the extension jar under `extensions/spark/kyuubi-extension-spark-jdbc-dialect/target` - 3. If you like, you can compile the extension jar with the corresponding Maven's profile on you compile command, i.e. you can get extension jar for Spark 3.2 by compiling with `-Pspark-3.1` + 3. If you like, you can compile the extension jar with the corresponding Maven's profile on you compile command, i.e. you can get extension jar for Spark 3.5 by compiling with `-Pspark-3.5` 2. Put the Kyuubi Hive Dialect Extension jar `kyuubi-extension-spark-jdbc-dialect_-*.jar` into `$SPARK_HOME/jars` 3. Enable `KyuubiSparkJdbcDialectExtension`, by setting `spark.sql.extensions=org.apache.spark.sql.dialect.KyuubiSparkJdbcDialectExtension`, i.e. - add a config into `$SPARK_HOME/conf/spark-defaults.conf` diff --git a/docs/extensions/engines/spark/rules.md b/docs/extensions/engines/spark/rules.md index 55357e46d..34781a894 100644 --- a/docs/extensions/engines/spark/rules.md +++ b/docs/extensions/engines/spark/rules.md @@ -43,17 +43,19 @@ And don't worry, Kyuubi will support the new Apache Spark version in the future. ## Usage -| Kyuubi Spark SQL extension | Supported Spark version(s) | Available since | EOL | Bundled in Binary release tarball | Maven profile | -|----------------------------|----------------------------|------------------|-----|-----------------------------------|---------------| -| kyuubi-extension-spark-3-1 | 3.1.x | 1.3.0-incubating | N/A | 1.3.0-incubating | spark-3.1 | -| kyuubi-extension-spark-3-2 | 3.2.x | 1.4.0-incubating | N/A | 1.4.0-incubating | spark-3.2 | -| kyuubi-extension-spark-3-3 | 3.3.x | 1.6.0-incubating | N/A | 1.6.0-incubating | spark-3.3 | +| Kyuubi Spark SQL extension | Supported Spark version(s) | Available since | EOL | Bundled in Binary release tarball | Maven profile | +|----------------------------|----------------------------|------------------|-------|-----------------------------------|---------------| +| kyuubi-extension-spark-3-1 | 3.1.x | 1.3.0-incubating | 1.8.0 | 1.3.0-incubating | spark-3.1 | +| kyuubi-extension-spark-3-2 | 3.2.x | 1.4.0-incubating | N/A | 1.4.0-incubating | spark-3.2 | +| kyuubi-extension-spark-3-3 | 3.3.x | 1.6.0-incubating | N/A | 1.6.0-incubating | spark-3.3 | +| kyuubi-extension-spark-3-4 | 3.4.x | 1.8.0 | N/A | 1.8.0 | spark-3.4 | +| kyuubi-extension-spark-3-4 | 3.5.x | 1.8.0 | N/A | N/A | spark-3.5 | 1. Check the matrix that if you are using the supported Spark version, and find the corresponding Kyuubi Spark SQL Extension jar 2. Get the Kyuubi Spark SQL Extension jar 1. Each Kyuubi binary release tarball only contains one default version of Kyuubi Spark SQL Extension jar, if you are looking for such version, you can find it under `$KYUUBI_HOME/extension` 2. All supported versions of Kyuubi Spark SQL Extension jar will be deployed to [Maven Central](https://search.maven.org/search?q=kyuubi-extension-spark) - 3. If you like, you can compile Kyuubi Spark SQL Extension jar by yourself, please activate the corresponding Maven's profile on you compile command, i.e. you can get Kyuubi Spark SQL Extension jar for Spark 3.1 under `extensions/spark/kyuubi-extension-spark-3-1/target` when compile with `-Pspark-3.1` + 3. If you like, you can compile Kyuubi Spark SQL Extension jar by yourself, please activate the corresponding Maven's profile on you compile command, i.e. you can get Kyuubi Spark SQL Extension jar for Spark 3.5 under `extensions/spark/kyuubi-extension-spark-3-5/target` when compile with `-Pspark-3.5` 3. Put the Kyuubi Spark SQL extension jar `kyuubi-extension-spark-*.jar` into `$SPARK_HOME/jars` 4. Enable `KyuubiSparkSQLExtension`, i.e. add a config into `$SPARK_HOME/conf/spark-defaults.conf`, `spark.sql.extensions=org.apache.kyuubi.sql.KyuubiSparkSQLExtension` diff --git a/docs/extensions/engines/spark/z-order.md b/docs/extensions/engines/spark/z-order.md index d04ca3e0c..c7e6e049b 100644 --- a/docs/extensions/engines/spark/z-order.md +++ b/docs/extensions/engines/spark/z-order.md @@ -75,10 +75,10 @@ Due to the extra sort, the upstream job will run a little slower than before This feature is inside Kyuubi extension, so you should apply the extension to Spark by following steps. -- add extension jar: `copy $KYUUBI_HOME/extension/kyuubi-extension-spark-3-1* $SPARK_HOME/jars/` +- add extension jar: `copy $KYUUBI_HOME/extension/kyuubi-extension-spark-3-5* $SPARK_HOME/jars/` - add config into `spark-defaults.conf`: `spark.sql.extensions=org.apache.kyuubi.sql.KyuubiSparkSQLExtension` -Due to the extension, z-order only works with Spark-3.1 and higher version. +Due to the extension, z-order only works with Spark 3.2 and higher version. ### Optimize history data diff --git a/docs/monitor/trouble_shooting.md b/docs/monitor/trouble_shooting.md index e6ba5ea1a..9c1bab71b 100644 --- a/docs/monitor/trouble_shooting.md +++ b/docs/monitor/trouble_shooting.md @@ -245,7 +245,7 @@ Error operating EXECUTE_STATEMENT: org.apache.spark.sql.AnalysisException: Can n If you get this exception when creating a function, you can check your JDK version. You should update JDK to JDK1.8.0_121 and later, since JDK1.8.0_121 fix a security issue [Additional access restrictions for URLClassLoader.newInstance](https://www.oracle.com/java/technologies/javase/8u121-relnotes.html). -### Failed to start Spark 3.1 with error msg 'Cannot modify the value of a Spark config' +### Failed to start Spark 3.1 or above version with error msg 'Cannot modify the value of a Spark config' Here is the error message @@ -260,6 +260,6 @@ Caused by: org.apache.spark.sql.AnalysisException: Cannot modify the value of a ... 12 more ``` -This is because Spark-3.1 will check the config which you set and throw exception if the config is static or used in other module (e.g. yarn/core). +This is because since Spark 3.1, it will check the config which you set and throw exception if the config is static or used in other module (e.g. yarn/core). You can add a config `spark.sql.legacy.setCommandRejectsSparkCoreConfs=false` in `spark-defaults.conf` to disable this behavior. diff --git a/extensions/spark/kyuubi-extension-spark-3-4/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala b/extensions/spark/kyuubi-extension-spark-3-4/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala index 9b1614fce..b2ce305e4 100644 --- a/extensions/spark/kyuubi-extension-spark-3-4/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala +++ b/extensions/spark/kyuubi-extension-spark-3-4/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala @@ -29,8 +29,8 @@ import org.apache.kyuubi.sql.zorder.ZorderBytesUtils * * {{{ * RUN_BENCHMARK=1 ./build/mvn clean test \ - * -pl extensions/spark/kyuubi-extension-spark-3-1 -am \ - * -Pspark-3.1,kyuubi-extension-spark-3-1 \ + * -pl extensions/spark/kyuubi-extension-spark-3-4 -am \ + * -Pspark-3.4,kyuubi-extension-spark-3-4 \ * -Dtest=none -DwildcardSuites=org.apache.spark.sql.ZorderCoreBenchmark * }}} */ diff --git a/extensions/spark/kyuubi-extension-spark-3-5/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala b/extensions/spark/kyuubi-extension-spark-3-5/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala index 9b1614fce..7af1ca048 100644 --- a/extensions/spark/kyuubi-extension-spark-3-5/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala +++ b/extensions/spark/kyuubi-extension-spark-3-5/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala @@ -29,8 +29,8 @@ import org.apache.kyuubi.sql.zorder.ZorderBytesUtils * * {{{ * RUN_BENCHMARK=1 ./build/mvn clean test \ - * -pl extensions/spark/kyuubi-extension-spark-3-1 -am \ - * -Pspark-3.1,kyuubi-extension-spark-3-1 \ + * -pl extensions/spark/kyuubi-extension-spark-3-5 -am \ + * -Pspark-3.5,kyuubi-extension-spark-3-5 \ * -Dtest=none -DwildcardSuites=org.apache.spark.sql.ZorderCoreBenchmark * }}} */ diff --git a/extensions/spark/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala b/extensions/spark/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala index 9b1614fce..d6c52b3a5 100644 --- a/extensions/spark/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala +++ b/extensions/spark/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala @@ -29,8 +29,8 @@ import org.apache.kyuubi.sql.zorder.ZorderBytesUtils * * {{{ * RUN_BENCHMARK=1 ./build/mvn clean test \ - * -pl extensions/spark/kyuubi-extension-spark-3-1 -am \ - * -Pspark-3.1,kyuubi-extension-spark-3-1 \ + * -pl extensions/spark/kyuubi-extension-spark-3-3 -am \ + * -Pspark-3.3,kyuubi-extension-spark-3-3 \ * -Dtest=none -DwildcardSuites=org.apache.spark.sql.ZorderCoreBenchmark * }}} */ diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala index d1331cd02..6dd438ffd 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala @@ -381,6 +381,9 @@ object SparkSQLEngine extends Logging { } def main(args: Array[String]): Unit = { + if (KyuubiSparkUtil.SPARK_ENGINE_RUNTIME_VERSION === "3.1") { + warn("The support for Spark 3.1 is deprecated, and will be removed in the next version.") + } val startedTime = System.currentTimeMillis() val submitTime = kyuubiConf.getOption(KYUUBI_ENGINE_SUBMIT_TIME_KEY) match { case Some(t) => t.toLong diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/execution/arrow/KyuubiArrowConverters.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/execution/arrow/KyuubiArrowConverters.scala index 5c4d7086f..4a54180cc 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/execution/arrow/KyuubiArrowConverters.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/execution/arrow/KyuubiArrowConverters.scala @@ -159,7 +159,7 @@ object KyuubiArrowConverters extends SQLConfHelper with Logging { partsScanned.until(math.min(partsScanned + numPartsToTry, totalParts)) // TODO: SparkPlan.session introduced in SPARK-35798, replace with SparkPlan.session once we - // drop Spark-3.1.x support. + // drop Spark 3.1 support. val sc = SparkSession.active.sparkContext val res = sc.runJob( childRDD, @@ -347,6 +347,6 @@ object KyuubiArrowConverters extends SQLConfHelper with Logging { largeVarTypes) } - // IpcOption.DEFAULT was introduced in ARROW-11081(ARROW-4.0.0), add this for adapt Spark-3.1/3.2 + // IpcOption.DEFAULT was introduced in ARROW-11081(ARROW-4.0.0), add this for adapt Spark 3.1/3.2 final private val ARROW_IPC_OPTION_DEFAULT = new IpcOption() } diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala index e8f90ecee..7af51abfe 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala @@ -161,7 +161,7 @@ object SparkDatasetHelper extends Logging { private def doCollectLimit(collectLimit: CollectLimitExec): Array[Array[Byte]] = { // TODO: SparkPlan.session introduced in SPARK-35798, replace with SparkPlan.session once we - // drop Spark-3.1.x support. + // drop Spark 3.1 support. val timeZoneId = SparkSession.active.sessionState.conf.sessionLocalTimeZone val maxRecordsPerBatch = SparkSession.active.sessionState.conf.arrowMaxRecordsPerBatch diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkArrowbasedOperationSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkArrowbasedOperationSuite.scala index d3d4a56d7..4e0414824 100644 --- a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkArrowbasedOperationSuite.scala +++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkArrowbasedOperationSuite.scala @@ -523,9 +523,9 @@ class SparkArrowbasedOperationSuite extends WithSparkSQLEngine with SparkDataTyp /** * This method provides a reflection-based implementation of [[SQLConf.isStaticConfigKey]] to - * adapt Spark-3.1.x + * adapt Spark 3.1 * - * TODO: Once we drop support for Spark 3.1.x, we can directly call + * TODO: Once we drop support for Spark 3.1, we can directly call * [[SQLConf.isStaticConfigKey()]]. */ private def isStaticConfigKey(key: String): Boolean = diff --git a/pom.xml b/pom.xml index d6e5e2dd0..747df8fd5 100644 --- a/pom.xml +++ b/pom.xml @@ -2231,24 +2231,6 @@ - - spark-3.1 - - extensions/spark/kyuubi-extension-spark-common - extensions/spark/kyuubi-extension-spark-3-1 - - - 3.1.3 - 3.1 - delta-core - 1.0.1 - - 1.3.1 - spark-${spark.version}-bin-hadoop3.2.tgz - org.scalatest.tags.Slow - - - spark-3.2