[KYUUBI #6939] Bump Spark 3.5.5

### Why are the changes needed?

Test Spark 3.5.5 Release Notes

https://spark.apache.org/releases/spark-release-3-5-5.html

### How was this patch tested?

Pass GHA.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #6939 from pan3793/spark-3.5.5.

Closes #6939

8c0288ae5 [Cheng Pan] ga
78b0e72db [Cheng Pan] nit
686a7b0a9 [Cheng Pan] fix
d40cc5bba [Cheng Pan] Bump Spark 3.5.5

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: Cheng Pan <chengpan@apache.org>
This commit is contained in:
Cheng Pan 2025-03-03 13:42:09 +08:00
parent bfcf2e708f
commit d5b01fa3e2
No known key found for this signature in database
GPG Key ID: 8001952629BCC75D
8 changed files with 75 additions and 75 deletions

View File

@ -444,8 +444,8 @@ jobs:
# https://minikube.sigs.k8s.io/docs/handbook/pushing/#7-loading-directly-to-in-cluster-container-runtime
minikube image load apache/kyuubi:latest
# pre-install spark into minikube
docker pull apache/spark:3.5.4
minikube image load apache/spark:3.5.4
docker pull apache/spark:3.5.5
minikube image load apache/spark:3.5.5
- name: kubectl pre-check
run: |
kubectl get nodes

View File

@ -181,8 +181,8 @@ Examples:
$0 -r docker.io/myrepo -t v1.8.1 build
$0 -r docker.io/myrepo -t v1.8.1 push
- Build and push with tag "v1.8.1" and Spark-3.5.4 as base image to docker.io/myrepo
$0 -r docker.io/myrepo -t v1.8.1 -b BASE_IMAGE=repo/spark:3.5.4 build
- Build and push with tag "v1.8.1" and Spark-3.5.5 as base image to docker.io/myrepo
$0 -r docker.io/myrepo -t v1.8.1 -b BASE_IMAGE=repo/spark:3.5.5 build
$0 -r docker.io/myrepo -t v1.8.1 push
- Build and push for multiple archs to docker.io/myrepo

View File

@ -42,8 +42,8 @@ Examples:
$0 -r docker.io/myrepo -t v1.8.1 build
$0 -r docker.io/myrepo -t v1.8.1 push
- Build and push with tag "v1.8.1" and Spark-3.5.4 as base image to docker.io/myrepo
$0 -r docker.io/myrepo -t v1.8.1 -b BASE_IMAGE=repo/spark:3.5.4 build
- Build and push with tag "v1.8.1" and Spark-3.5.5 as base image to docker.io/myrepo
$0 -r docker.io/myrepo -t v1.8.1 -b BASE_IMAGE=repo/spark:3.5.5 build
$0 -r docker.io/myrepo -t v1.8.1 push
- Build and push for multiple archs to docker.io/myrepo

View File

@ -117,7 +117,7 @@ Sometimes, it may be incompatible with other Spark distributions, then you may n
For example,
```shell
build/mvn clean package -pl :kyuubi-spark-lineage_2.12 -am -DskipTests -Dspark.version=3.5.4
build/mvn clean package -pl :kyuubi-spark-lineage_2.12 -am -DskipTests -Dspark.version=3.5.5
```
The available `spark.version`s are shown in the following table.

View File

@ -19,15 +19,16 @@ package org.apache.kyuubi.spark.connector.hive
import java.lang.{Boolean => JBoolean, Long => JLong}
import scala.util.Try
import org.apache.hadoop.fs.{FileStatus, Path}
import org.apache.hadoop.hive.ql.plan.{FileSinkDesc, TableDesc}
import org.apache.spark.SPARK_VERSION
import org.apache.spark.internal.Logging
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTablePartition}
import org.apache.spark.sql.connector.catalog.TableChange
import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnPosition, DeleteColumn, First, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType}
import org.apache.spark.sql.connector.catalog.TableChange._
import org.apache.spark.sql.execution.command.CommandUtils
import org.apache.spark.sql.execution.command.CommandUtils.{calculateMultipleLocationSizes, calculateSingleLocationSize}
import org.apache.spark.sql.execution.datasources.{PartitionDirectory, PartitionedFile}
@ -35,20 +36,18 @@ import org.apache.spark.sql.hive.execution.HiveFileFormat
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{ArrayType, MapType, StructField, StructType}
import org.apache.kyuubi.spark.connector.common.SparkUtils.SPARK_RUNTIME_VERSION
import org.apache.kyuubi.util.reflect.{DynClasses, DynConstructors, DynMethods}
import org.apache.kyuubi.util.reflect.ReflectUtils.invokeAs
object HiveConnectorUtils extends Logging {
// SPARK-43186
def getHiveFileFormat(fileSinkConf: FileSinkDesc): HiveFileFormat = {
if (SPARK_RUNTIME_VERSION >= "3.5") {
def getHiveFileFormat(fileSinkConf: FileSinkDesc): HiveFileFormat =
Try { // SPARK-43186: 3.5.0
DynConstructors.builder()
.impl(classOf[HiveFileFormat], classOf[FileSinkDesc])
.build[HiveFileFormat]()
.newInstance(fileSinkConf)
} else if (SPARK_RUNTIME_VERSION >= "3.3") {
}.recover { case _: Exception =>
val shimFileSinkDescClz = DynClasses.builder()
.impl("org.apache.spark.sql.hive.HiveShim$ShimFileSinkDesc")
.build()
@ -67,34 +66,26 @@ object HiveConnectorUtils extends Logging {
.impl(classOf[HiveFileFormat], shimFileSinkDescClz)
.build[HiveFileFormat]()
.newInstance(shimFileSinkDesc)
} else {
throw unsupportedSparkVersion()
}
}
}.get
// SPARK-41970
def partitionedFilePath(file: PartitionedFile): String = {
if (SPARK_RUNTIME_VERSION >= "3.4") {
def partitionedFilePath(file: PartitionedFile): String =
Try { // SPARK-41970: 3.4.0
invokeAs[String](file, "urlEncodedPath")
} else if (SPARK_RUNTIME_VERSION >= "3.3") {
}.recover { case _: Exception =>
invokeAs[String](file, "filePath")
} else {
throw unsupportedSparkVersion()
}
}
}.get
def splitFiles(
sparkSession: SparkSession,
file: AnyRef,
filePath: Path,
isSplitable: Boolean,
maxSplitBytes: Long,
partitionValues: InternalRow): Seq[PartitionedFile] = {
if (SPARK_RUNTIME_VERSION >= "4.0") { // SPARK-42821
isSplitable: JBoolean,
maxSplitBytes: JLong,
partitionValues: InternalRow): Seq[PartitionedFile] =
Try { // SPARK-42821: 4.0.0-preview2
val fileStatusWithMetadataClz = DynClasses.builder()
.impl("org.apache.spark.sql.execution.datasources.FileStatusWithMetadata")
.build()
.buildChecked()
DynMethods
.builder("splitFiles")
.impl(
@ -103,17 +94,40 @@ object HiveConnectorUtils extends Logging {
classOf[Boolean],
classOf[Long],
classOf[InternalRow])
.build()
.invoke[Seq[PartitionedFile]](
.buildChecked()
.invokeChecked[Seq[PartitionedFile]](
null,
file,
isSplitable.asInstanceOf[JBoolean],
maxSplitBytes.asInstanceOf[JLong],
isSplitable,
maxSplitBytes,
partitionValues)
} else if (SPARK_RUNTIME_VERSION >= "3.5") { // SPARK-43039
}.recover { case _: Exception => // SPARK-51185: Spark 3.5.5
val fileStatusWithMetadataClz = DynClasses.builder()
.impl("org.apache.spark.sql.execution.datasources.FileStatusWithMetadata")
.build()
.buildChecked()
DynMethods
.builder("splitFiles")
.impl(
"org.apache.spark.sql.execution.PartitionedFileUtil",
classOf[SparkSession],
fileStatusWithMetadataClz,
classOf[Path],
classOf[Boolean],
classOf[Long],
classOf[InternalRow])
.buildChecked()
.invokeChecked[Seq[PartitionedFile]](
null,
sparkSession,
file,
filePath,
isSplitable,
maxSplitBytes,
partitionValues)
}.recover { case _: Exception => // SPARK-43039: 3.5.0
val fileStatusWithMetadataClz = DynClasses.builder()
.impl("org.apache.spark.sql.execution.datasources.FileStatusWithMetadata")
.buildChecked()
DynMethods
.builder("splitFiles")
.impl(
@ -123,15 +137,15 @@ object HiveConnectorUtils extends Logging {
classOf[Boolean],
classOf[Long],
classOf[InternalRow])
.build()
.invoke[Seq[PartitionedFile]](
.buildChecked()
.invokeChecked[Seq[PartitionedFile]](
null,
sparkSession,
file,
isSplitable.asInstanceOf[JBoolean],
maxSplitBytes.asInstanceOf[JLong],
isSplitable,
maxSplitBytes,
partitionValues)
} else if (SPARK_RUNTIME_VERSION >= "3.3") {
}.recover { case _: Exception =>
DynMethods
.builder("splitFiles")
.impl(
@ -142,55 +156,41 @@ object HiveConnectorUtils extends Logging {
classOf[Boolean],
classOf[Long],
classOf[InternalRow])
.build()
.invoke[Seq[PartitionedFile]](
.buildChecked()
.invokeChecked[Seq[PartitionedFile]](
null,
sparkSession,
file,
filePath,
isSplitable.asInstanceOf[JBoolean],
maxSplitBytes.asInstanceOf[JLong],
isSplitable,
maxSplitBytes,
partitionValues)
} else {
throw unsupportedSparkVersion()
}
}
}.get
def createPartitionDirectory(values: InternalRow, files: Seq[FileStatus]): PartitionDirectory = {
if (SPARK_RUNTIME_VERSION >= "3.5") {
def createPartitionDirectory(values: InternalRow, files: Seq[FileStatus]): PartitionDirectory =
Try { // SPARK-43039: 3.5.0
new DynMethods.Builder("apply")
.impl(classOf[PartitionDirectory], classOf[InternalRow], classOf[Array[FileStatus]])
.buildChecked()
.asStatic()
.invoke[PartitionDirectory](values, files.toArray)
} else if (SPARK_RUNTIME_VERSION >= "3.3") {
}.recover { case _: Exception =>
new DynMethods.Builder("apply")
.impl(classOf[PartitionDirectory], classOf[InternalRow], classOf[Seq[FileStatus]])
.buildChecked()
.asStatic()
.invoke[PartitionDirectory](values, files)
} else {
throw unsupportedSparkVersion()
}
}
}.get
def getPartitionFilePath(file: AnyRef): Path = {
if (SPARK_RUNTIME_VERSION >= "3.5") {
def getPartitionFilePath(file: AnyRef): Path =
Try { // SPARK-43039: 3.5.0
new DynMethods.Builder("getPath")
.impl("org.apache.spark.sql.execution.datasources.FileStatusWithMetadata")
.build()
.invoke[Path](file)
} else if (SPARK_RUNTIME_VERSION >= "3.3") {
}.recover { case _: Exception =>
file.asInstanceOf[FileStatus].getPath
} else {
throw unsupportedSparkVersion()
}
}
private def unsupportedSparkVersion(): KyuubiHiveConnectorException = {
KyuubiHiveConnectorException(s"Spark version $SPARK_VERSION " +
"is not supported by Kyuubi spark hive connector.")
}
}.get
def calculateTotalSize(
spark: SparkSession,

View File

@ -56,7 +56,7 @@ class KyuubiOnKubernetesWithSparkTestsBase extends WithKyuubiServerOnKubernetes
Map(
"spark.master" -> s"k8s://$miniKubeApiMaster",
// We should update spark docker image in ./github/workflows/master.yml at the same time
"spark.kubernetes.container.image" -> "apache/spark:3.5.4",
"spark.kubernetes.container.image" -> "apache/spark:3.5.5",
"spark.kubernetes.container.image.pullPolicy" -> "IfNotPresent",
"spark.executor.memory" -> "512M",
"spark.driver.memory" -> "1024M",

View File

@ -51,7 +51,7 @@ abstract class SparkOnKubernetesSuiteBase
// TODO Support more Spark version
// Spark official docker image: https://hub.docker.com/r/apache/spark/tags
KyuubiConf().set("spark.master", s"k8s://$apiServerAddress")
.set("spark.kubernetes.container.image", "apache/spark:3.5.4")
.set("spark.kubernetes.container.image", "apache/spark:3.5.5")
.set("spark.kubernetes.container.image.pullPolicy", "IfNotPresent")
.set("spark.executor.instances", "1")
.set("spark.executor.memory", "512M")

View File

@ -200,7 +200,7 @@
DO NOT forget to change the following properties when change the minor version of Spark:
`delta.version`, `delta.artifact`, `maven.plugin.scalatest.exclude.tags`
-->
<spark.version>3.5.4</spark.version>
<spark.version>3.5.5</spark.version>
<spark.binary.version>3.5</spark.binary.version>
<spark.archive.scala.suffix></spark.archive.scala.suffix>
<spark.archive.name>spark-${spark.version}-bin-hadoop3${spark.archive.scala.suffix}.tgz</spark.archive.name>
@ -1932,7 +1932,7 @@
<java.version>8</java.version>
<!--
Iceberg drops support for Java 8 since 1.7.0.
And it may have compatible issue with Spark 3.5.4, see Iceberg #11731
And it may have compatible issue with Spark 3.5.4+, see Iceberg #11731
-->
<iceberg.version>1.6.1</iceberg.version>
</properties>
@ -2044,7 +2044,7 @@
<module>extensions/spark/kyuubi-spark-connector-hive</module>
</modules>
<properties>
<spark.version>3.5.4</spark.version>
<spark.version>3.5.5</spark.version>
<spark.binary.version>3.5</spark.binary.version>
<delta.version>3.3.0</delta.version>
<delta.artifact>delta-spark_${scala.binary.version}</delta.artifact>