[KYUUBI #6939] Bump Spark 3.5.5
### Why are the changes needed? Test Spark 3.5.5 Release Notes https://spark.apache.org/releases/spark-release-3-5-5.html ### How was this patch tested? Pass GHA. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #6939 from pan3793/spark-3.5.5. Closes #6939 8c0288ae5 [Cheng Pan] ga 78b0e72db [Cheng Pan] nit 686a7b0a9 [Cheng Pan] fix d40cc5bba [Cheng Pan] Bump Spark 3.5.5 Authored-by: Cheng Pan <chengpan@apache.org> Signed-off-by: Cheng Pan <chengpan@apache.org>
This commit is contained in:
parent
bfcf2e708f
commit
d5b01fa3e2
4
.github/workflows/master.yml
vendored
4
.github/workflows/master.yml
vendored
@ -444,8 +444,8 @@ jobs:
|
||||
# https://minikube.sigs.k8s.io/docs/handbook/pushing/#7-loading-directly-to-in-cluster-container-runtime
|
||||
minikube image load apache/kyuubi:latest
|
||||
# pre-install spark into minikube
|
||||
docker pull apache/spark:3.5.4
|
||||
minikube image load apache/spark:3.5.4
|
||||
docker pull apache/spark:3.5.5
|
||||
minikube image load apache/spark:3.5.5
|
||||
- name: kubectl pre-check
|
||||
run: |
|
||||
kubectl get nodes
|
||||
|
||||
@ -181,8 +181,8 @@ Examples:
|
||||
$0 -r docker.io/myrepo -t v1.8.1 build
|
||||
$0 -r docker.io/myrepo -t v1.8.1 push
|
||||
|
||||
- Build and push with tag "v1.8.1" and Spark-3.5.4 as base image to docker.io/myrepo
|
||||
$0 -r docker.io/myrepo -t v1.8.1 -b BASE_IMAGE=repo/spark:3.5.4 build
|
||||
- Build and push with tag "v1.8.1" and Spark-3.5.5 as base image to docker.io/myrepo
|
||||
$0 -r docker.io/myrepo -t v1.8.1 -b BASE_IMAGE=repo/spark:3.5.5 build
|
||||
$0 -r docker.io/myrepo -t v1.8.1 push
|
||||
|
||||
- Build and push for multiple archs to docker.io/myrepo
|
||||
|
||||
@ -42,8 +42,8 @@ Examples:
|
||||
$0 -r docker.io/myrepo -t v1.8.1 build
|
||||
$0 -r docker.io/myrepo -t v1.8.1 push
|
||||
|
||||
- Build and push with tag "v1.8.1" and Spark-3.5.4 as base image to docker.io/myrepo
|
||||
$0 -r docker.io/myrepo -t v1.8.1 -b BASE_IMAGE=repo/spark:3.5.4 build
|
||||
- Build and push with tag "v1.8.1" and Spark-3.5.5 as base image to docker.io/myrepo
|
||||
$0 -r docker.io/myrepo -t v1.8.1 -b BASE_IMAGE=repo/spark:3.5.5 build
|
||||
$0 -r docker.io/myrepo -t v1.8.1 push
|
||||
|
||||
- Build and push for multiple archs to docker.io/myrepo
|
||||
|
||||
@ -117,7 +117,7 @@ Sometimes, it may be incompatible with other Spark distributions, then you may n
|
||||
For example,
|
||||
|
||||
```shell
|
||||
build/mvn clean package -pl :kyuubi-spark-lineage_2.12 -am -DskipTests -Dspark.version=3.5.4
|
||||
build/mvn clean package -pl :kyuubi-spark-lineage_2.12 -am -DskipTests -Dspark.version=3.5.5
|
||||
```
|
||||
|
||||
The available `spark.version`s are shown in the following table.
|
||||
|
||||
@ -19,15 +19,16 @@ package org.apache.kyuubi.spark.connector.hive
|
||||
|
||||
import java.lang.{Boolean => JBoolean, Long => JLong}
|
||||
|
||||
import scala.util.Try
|
||||
|
||||
import org.apache.hadoop.fs.{FileStatus, Path}
|
||||
import org.apache.hadoop.hive.ql.plan.{FileSinkDesc, TableDesc}
|
||||
import org.apache.spark.SPARK_VERSION
|
||||
import org.apache.spark.internal.Logging
|
||||
import org.apache.spark.sql.SparkSession
|
||||
import org.apache.spark.sql.catalyst.InternalRow
|
||||
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTablePartition}
|
||||
import org.apache.spark.sql.connector.catalog.TableChange
|
||||
import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnPosition, DeleteColumn, First, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType}
|
||||
import org.apache.spark.sql.connector.catalog.TableChange._
|
||||
import org.apache.spark.sql.execution.command.CommandUtils
|
||||
import org.apache.spark.sql.execution.command.CommandUtils.{calculateMultipleLocationSizes, calculateSingleLocationSize}
|
||||
import org.apache.spark.sql.execution.datasources.{PartitionDirectory, PartitionedFile}
|
||||
@ -35,20 +36,18 @@ import org.apache.spark.sql.hive.execution.HiveFileFormat
|
||||
import org.apache.spark.sql.internal.SQLConf
|
||||
import org.apache.spark.sql.types.{ArrayType, MapType, StructField, StructType}
|
||||
|
||||
import org.apache.kyuubi.spark.connector.common.SparkUtils.SPARK_RUNTIME_VERSION
|
||||
import org.apache.kyuubi.util.reflect.{DynClasses, DynConstructors, DynMethods}
|
||||
import org.apache.kyuubi.util.reflect.ReflectUtils.invokeAs
|
||||
|
||||
object HiveConnectorUtils extends Logging {
|
||||
|
||||
// SPARK-43186
|
||||
def getHiveFileFormat(fileSinkConf: FileSinkDesc): HiveFileFormat = {
|
||||
if (SPARK_RUNTIME_VERSION >= "3.5") {
|
||||
def getHiveFileFormat(fileSinkConf: FileSinkDesc): HiveFileFormat =
|
||||
Try { // SPARK-43186: 3.5.0
|
||||
DynConstructors.builder()
|
||||
.impl(classOf[HiveFileFormat], classOf[FileSinkDesc])
|
||||
.build[HiveFileFormat]()
|
||||
.newInstance(fileSinkConf)
|
||||
} else if (SPARK_RUNTIME_VERSION >= "3.3") {
|
||||
}.recover { case _: Exception =>
|
||||
val shimFileSinkDescClz = DynClasses.builder()
|
||||
.impl("org.apache.spark.sql.hive.HiveShim$ShimFileSinkDesc")
|
||||
.build()
|
||||
@ -67,34 +66,26 @@ object HiveConnectorUtils extends Logging {
|
||||
.impl(classOf[HiveFileFormat], shimFileSinkDescClz)
|
||||
.build[HiveFileFormat]()
|
||||
.newInstance(shimFileSinkDesc)
|
||||
} else {
|
||||
throw unsupportedSparkVersion()
|
||||
}
|
||||
}
|
||||
}.get
|
||||
|
||||
// SPARK-41970
|
||||
def partitionedFilePath(file: PartitionedFile): String = {
|
||||
if (SPARK_RUNTIME_VERSION >= "3.4") {
|
||||
def partitionedFilePath(file: PartitionedFile): String =
|
||||
Try { // SPARK-41970: 3.4.0
|
||||
invokeAs[String](file, "urlEncodedPath")
|
||||
} else if (SPARK_RUNTIME_VERSION >= "3.3") {
|
||||
}.recover { case _: Exception =>
|
||||
invokeAs[String](file, "filePath")
|
||||
} else {
|
||||
throw unsupportedSparkVersion()
|
||||
}
|
||||
}
|
||||
}.get
|
||||
|
||||
def splitFiles(
|
||||
sparkSession: SparkSession,
|
||||
file: AnyRef,
|
||||
filePath: Path,
|
||||
isSplitable: Boolean,
|
||||
maxSplitBytes: Long,
|
||||
partitionValues: InternalRow): Seq[PartitionedFile] = {
|
||||
|
||||
if (SPARK_RUNTIME_VERSION >= "4.0") { // SPARK-42821
|
||||
isSplitable: JBoolean,
|
||||
maxSplitBytes: JLong,
|
||||
partitionValues: InternalRow): Seq[PartitionedFile] =
|
||||
Try { // SPARK-42821: 4.0.0-preview2
|
||||
val fileStatusWithMetadataClz = DynClasses.builder()
|
||||
.impl("org.apache.spark.sql.execution.datasources.FileStatusWithMetadata")
|
||||
.build()
|
||||
.buildChecked()
|
||||
DynMethods
|
||||
.builder("splitFiles")
|
||||
.impl(
|
||||
@ -103,17 +94,40 @@ object HiveConnectorUtils extends Logging {
|
||||
classOf[Boolean],
|
||||
classOf[Long],
|
||||
classOf[InternalRow])
|
||||
.build()
|
||||
.invoke[Seq[PartitionedFile]](
|
||||
.buildChecked()
|
||||
.invokeChecked[Seq[PartitionedFile]](
|
||||
null,
|
||||
file,
|
||||
isSplitable.asInstanceOf[JBoolean],
|
||||
maxSplitBytes.asInstanceOf[JLong],
|
||||
isSplitable,
|
||||
maxSplitBytes,
|
||||
partitionValues)
|
||||
} else if (SPARK_RUNTIME_VERSION >= "3.5") { // SPARK-43039
|
||||
}.recover { case _: Exception => // SPARK-51185: Spark 3.5.5
|
||||
val fileStatusWithMetadataClz = DynClasses.builder()
|
||||
.impl("org.apache.spark.sql.execution.datasources.FileStatusWithMetadata")
|
||||
.build()
|
||||
.buildChecked()
|
||||
DynMethods
|
||||
.builder("splitFiles")
|
||||
.impl(
|
||||
"org.apache.spark.sql.execution.PartitionedFileUtil",
|
||||
classOf[SparkSession],
|
||||
fileStatusWithMetadataClz,
|
||||
classOf[Path],
|
||||
classOf[Boolean],
|
||||
classOf[Long],
|
||||
classOf[InternalRow])
|
||||
.buildChecked()
|
||||
.invokeChecked[Seq[PartitionedFile]](
|
||||
null,
|
||||
sparkSession,
|
||||
file,
|
||||
filePath,
|
||||
isSplitable,
|
||||
maxSplitBytes,
|
||||
partitionValues)
|
||||
}.recover { case _: Exception => // SPARK-43039: 3.5.0
|
||||
val fileStatusWithMetadataClz = DynClasses.builder()
|
||||
.impl("org.apache.spark.sql.execution.datasources.FileStatusWithMetadata")
|
||||
.buildChecked()
|
||||
DynMethods
|
||||
.builder("splitFiles")
|
||||
.impl(
|
||||
@ -123,15 +137,15 @@ object HiveConnectorUtils extends Logging {
|
||||
classOf[Boolean],
|
||||
classOf[Long],
|
||||
classOf[InternalRow])
|
||||
.build()
|
||||
.invoke[Seq[PartitionedFile]](
|
||||
.buildChecked()
|
||||
.invokeChecked[Seq[PartitionedFile]](
|
||||
null,
|
||||
sparkSession,
|
||||
file,
|
||||
isSplitable.asInstanceOf[JBoolean],
|
||||
maxSplitBytes.asInstanceOf[JLong],
|
||||
isSplitable,
|
||||
maxSplitBytes,
|
||||
partitionValues)
|
||||
} else if (SPARK_RUNTIME_VERSION >= "3.3") {
|
||||
}.recover { case _: Exception =>
|
||||
DynMethods
|
||||
.builder("splitFiles")
|
||||
.impl(
|
||||
@ -142,55 +156,41 @@ object HiveConnectorUtils extends Logging {
|
||||
classOf[Boolean],
|
||||
classOf[Long],
|
||||
classOf[InternalRow])
|
||||
.build()
|
||||
.invoke[Seq[PartitionedFile]](
|
||||
.buildChecked()
|
||||
.invokeChecked[Seq[PartitionedFile]](
|
||||
null,
|
||||
sparkSession,
|
||||
file,
|
||||
filePath,
|
||||
isSplitable.asInstanceOf[JBoolean],
|
||||
maxSplitBytes.asInstanceOf[JLong],
|
||||
isSplitable,
|
||||
maxSplitBytes,
|
||||
partitionValues)
|
||||
} else {
|
||||
throw unsupportedSparkVersion()
|
||||
}
|
||||
}
|
||||
}.get
|
||||
|
||||
def createPartitionDirectory(values: InternalRow, files: Seq[FileStatus]): PartitionDirectory = {
|
||||
if (SPARK_RUNTIME_VERSION >= "3.5") {
|
||||
def createPartitionDirectory(values: InternalRow, files: Seq[FileStatus]): PartitionDirectory =
|
||||
Try { // SPARK-43039: 3.5.0
|
||||
new DynMethods.Builder("apply")
|
||||
.impl(classOf[PartitionDirectory], classOf[InternalRow], classOf[Array[FileStatus]])
|
||||
.buildChecked()
|
||||
.asStatic()
|
||||
.invoke[PartitionDirectory](values, files.toArray)
|
||||
} else if (SPARK_RUNTIME_VERSION >= "3.3") {
|
||||
}.recover { case _: Exception =>
|
||||
new DynMethods.Builder("apply")
|
||||
.impl(classOf[PartitionDirectory], classOf[InternalRow], classOf[Seq[FileStatus]])
|
||||
.buildChecked()
|
||||
.asStatic()
|
||||
.invoke[PartitionDirectory](values, files)
|
||||
} else {
|
||||
throw unsupportedSparkVersion()
|
||||
}
|
||||
}
|
||||
}.get
|
||||
|
||||
def getPartitionFilePath(file: AnyRef): Path = {
|
||||
if (SPARK_RUNTIME_VERSION >= "3.5") {
|
||||
def getPartitionFilePath(file: AnyRef): Path =
|
||||
Try { // SPARK-43039: 3.5.0
|
||||
new DynMethods.Builder("getPath")
|
||||
.impl("org.apache.spark.sql.execution.datasources.FileStatusWithMetadata")
|
||||
.build()
|
||||
.invoke[Path](file)
|
||||
} else if (SPARK_RUNTIME_VERSION >= "3.3") {
|
||||
}.recover { case _: Exception =>
|
||||
file.asInstanceOf[FileStatus].getPath
|
||||
} else {
|
||||
throw unsupportedSparkVersion()
|
||||
}
|
||||
}
|
||||
|
||||
private def unsupportedSparkVersion(): KyuubiHiveConnectorException = {
|
||||
KyuubiHiveConnectorException(s"Spark version $SPARK_VERSION " +
|
||||
"is not supported by Kyuubi spark hive connector.")
|
||||
}
|
||||
}.get
|
||||
|
||||
def calculateTotalSize(
|
||||
spark: SparkSession,
|
||||
|
||||
@ -56,7 +56,7 @@ class KyuubiOnKubernetesWithSparkTestsBase extends WithKyuubiServerOnKubernetes
|
||||
Map(
|
||||
"spark.master" -> s"k8s://$miniKubeApiMaster",
|
||||
// We should update spark docker image in ./github/workflows/master.yml at the same time
|
||||
"spark.kubernetes.container.image" -> "apache/spark:3.5.4",
|
||||
"spark.kubernetes.container.image" -> "apache/spark:3.5.5",
|
||||
"spark.kubernetes.container.image.pullPolicy" -> "IfNotPresent",
|
||||
"spark.executor.memory" -> "512M",
|
||||
"spark.driver.memory" -> "1024M",
|
||||
|
||||
@ -51,7 +51,7 @@ abstract class SparkOnKubernetesSuiteBase
|
||||
// TODO Support more Spark version
|
||||
// Spark official docker image: https://hub.docker.com/r/apache/spark/tags
|
||||
KyuubiConf().set("spark.master", s"k8s://$apiServerAddress")
|
||||
.set("spark.kubernetes.container.image", "apache/spark:3.5.4")
|
||||
.set("spark.kubernetes.container.image", "apache/spark:3.5.5")
|
||||
.set("spark.kubernetes.container.image.pullPolicy", "IfNotPresent")
|
||||
.set("spark.executor.instances", "1")
|
||||
.set("spark.executor.memory", "512M")
|
||||
|
||||
6
pom.xml
6
pom.xml
@ -200,7 +200,7 @@
|
||||
DO NOT forget to change the following properties when change the minor version of Spark:
|
||||
`delta.version`, `delta.artifact`, `maven.plugin.scalatest.exclude.tags`
|
||||
-->
|
||||
<spark.version>3.5.4</spark.version>
|
||||
<spark.version>3.5.5</spark.version>
|
||||
<spark.binary.version>3.5</spark.binary.version>
|
||||
<spark.archive.scala.suffix></spark.archive.scala.suffix>
|
||||
<spark.archive.name>spark-${spark.version}-bin-hadoop3${spark.archive.scala.suffix}.tgz</spark.archive.name>
|
||||
@ -1932,7 +1932,7 @@
|
||||
<java.version>8</java.version>
|
||||
<!--
|
||||
Iceberg drops support for Java 8 since 1.7.0.
|
||||
And it may have compatible issue with Spark 3.5.4, see Iceberg #11731
|
||||
And it may have compatible issue with Spark 3.5.4+, see Iceberg #11731
|
||||
-->
|
||||
<iceberg.version>1.6.1</iceberg.version>
|
||||
</properties>
|
||||
@ -2044,7 +2044,7 @@
|
||||
<module>extensions/spark/kyuubi-spark-connector-hive</module>
|
||||
</modules>
|
||||
<properties>
|
||||
<spark.version>3.5.4</spark.version>
|
||||
<spark.version>3.5.5</spark.version>
|
||||
<spark.binary.version>3.5</spark.binary.version>
|
||||
<delta.version>3.3.0</delta.version>
|
||||
<delta.artifact>delta-spark_${scala.binary.version}</delta.artifact>
|
||||
|
||||
Loading…
Reference in New Issue
Block a user