[KYUUBI #6413] SPARK_HOME detection supports Spark 4

# Description

When `SPARK_HOME` is not set explicitly, the Kyuubi server supports detecting it based on Scala versions, while the rules are not applicable for Spark 4.

This PR enhances the SPARK_HOME detection logic to make it support both Spark 3 and Spark 4.

The above logic is mainly used for testing purposes, the change does not affect users who configure `SPARK_HOME` in `kyuubi-env.sh`.

## Types of changes

- [ ] Bugfix (non-breaking change which fixes an issue)
- [x] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing functionality to change)

## Test Plan

#### Related Unit Tests

- `SparkProcessBuilderSuite`

---

# Checklist 📝

- [x] This patch was not authored or co-authored using [Generative Tooling](https://www.apache.org/legal/generative-tooling.html)

**Be nice. Be informative.**

Closes #6413 from pan3793/spark4-home.

Closes #6413

20e71fd7d [Cheng Pan] SPARK_HOME detection supports Spark 4

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: Cheng Pan <chengpan@apache.org>
This commit is contained in:
Cheng Pan 2024-05-23 14:26:38 +08:00
parent a95ff125fc
commit b89c185eec
No known key found for this signature in database
GPG Key ID: 8001952629BCC75D
2 changed files with 38 additions and 28 deletions

View File

@ -117,11 +117,11 @@ class SparkProcessBuilder(
} }
override protected lazy val engineHomeDirFilter: FileFilter = file => { override protected lazy val engineHomeDirFilter: FileFilter = file => {
val r = SCALA_COMPILE_VERSION match { val patterns = SCALA_COMPILE_VERSION match {
case "2.12" => SPARK_HOME_REGEX_SCALA_212 case "2.12" => Seq(SPARK3_HOME_REGEX_SCALA_212)
case "2.13" => SPARK_HOME_REGEX_SCALA_213 case "2.13" => Seq(SPARK3_HOME_REGEX_SCALA_213, SPARK4_HOME_REGEX_SCALA_213)
} }
file.isDirectory && r.findFirstMatchIn(file.getName).isDefined file.isDirectory && patterns.exists(_.findFirstMatchIn(file.getName).isDefined)
} }
override protected[kyuubi] lazy val commands: Iterable[String] = { override protected[kyuubi] lazy val commands: Iterable[String] = {
@ -364,11 +364,14 @@ object SparkProcessBuilder {
final private val SPARK_CONF_FILE_NAME = "spark-defaults.conf" final private val SPARK_CONF_FILE_NAME = "spark-defaults.conf"
final private[kyuubi] val SPARK_CORE_SCALA_VERSION_REGEX = final private[kyuubi] val SPARK_CORE_SCALA_VERSION_REGEX =
"""^spark-core_(\d\.\d+).*.jar$""".r """^spark-core_(\d\.\d+)-.*\.jar$""".r
final private[kyuubi] val SPARK_HOME_REGEX_SCALA_212 = final private[kyuubi] val SPARK3_HOME_REGEX_SCALA_212 =
"""^spark-\d+\.\d+\.\d+-bin-hadoop\d+(\.\d+)?$""".r """^spark-3\.\d+\.\d+-bin-hadoop\d+(\.\d+)?$""".r
final private[kyuubi] val SPARK_HOME_REGEX_SCALA_213 = final private[kyuubi] val SPARK3_HOME_REGEX_SCALA_213 =
"""^spark-\d+\.\d+\.\d+-bin-hadoop\d(\.\d+)?+-scala\d+(\.\d+)?$""".r """^spark-3\.\d+\.\d+-bin-hadoop\d(\.\d+)?+-scala2\.13$""".r
final private[kyuubi] val SPARK4_HOME_REGEX_SCALA_213 =
"""^spark-4\.\d+\.\d+(-\w*)?-bin-hadoop\d(\.\d+)?+$""".r
} }

View File

@ -403,36 +403,43 @@ class SparkProcessBuilderSuite extends KerberizedTestHelper with MockitoSugar {
"spark-core_2.13-3.5.0-abc-20230921.jar", "spark-core_2.13-3.5.0-abc-20230921.jar",
"spark-core_2.13-3.5.0-xyz-1.2.3.jar", "spark-core_2.13-3.5.0-xyz-1.2.3.jar",
"spark-core_2.13-3.5.0.1.jar", "spark-core_2.13-3.5.0.1.jar",
"spark-core_2.13.2-3.5.0.jar").foreach { f => "spark-core_2.13-4.0.0-preview1.jar",
"spark-core_2.13-4.0.0.jar").foreach { f =>
assertResult("2.13")(builder.extractSparkCoreScalaVersion(Seq(f))) assertResult("2.13")(builder.extractSparkCoreScalaVersion(Seq(f)))
} }
Seq( Seq(
"spark-dummy_2.13-3.5.0.jar", "spark-dummy_2.13-3.5.0.jar",
"spark-core_2.13-3.5.0.1.zip", "spark-core_2.13-3.5.0.1.zip",
"yummy-spark-core_2.13-3.5.0.jar").foreach { f => "yummy-spark-core_2.13-3.5.0.jar",
"spark-core_2.13.2-3.5.0.jar").foreach { f =>
assertThrows[KyuubiException](builder.extractSparkCoreScalaVersion(Seq(f))) assertThrows[KyuubiException](builder.extractSparkCoreScalaVersion(Seq(f)))
} }
} }
test("match scala version of spark home") { test("match scala version of spark home") {
SCALA_COMPILE_VERSION match { Seq(
case "2.12" => Seq( "spark-3.2.4-bin-hadoop3.2",
"spark-3.2.4-bin-hadoop3.2", "spark-3.2.4-bin-hadoop2.7",
"spark-3.2.4-bin-hadoop2.7", "spark-3.4.1-bin-hadoop3").foreach { SPARK3_HOME_SCALA_212 =>
"spark-3.4.1-bin-hadoop3") assertMatches(SPARK3_HOME_SCALA_212, SPARK3_HOME_REGEX_SCALA_212)
.foreach { sparkHome => assertNotMatches(SPARK3_HOME_SCALA_212, SPARK3_HOME_REGEX_SCALA_213)
assertMatches(sparkHome, SPARK_HOME_REGEX_SCALA_212) assertNotMatches(SPARK3_HOME_SCALA_212, SPARK4_HOME_REGEX_SCALA_213)
assertNotMatches(sparkHome, SPARK_HOME_REGEX_SCALA_213) }
} Seq(
case "2.13" => Seq( "spark-3.2.4-bin-hadoop3.2-scala2.13",
"spark-3.2.4-bin-hadoop3.2-scala2.13", "spark-3.4.1-bin-hadoop3-scala2.13",
"spark-3.4.1-bin-hadoop3-scala2.13", "spark-3.5.0-bin-hadoop3-scala2.13").foreach { SPARK3_HOME_SCALA_213 =>
"spark-3.5.0-bin-hadoop3-scala2.13") assertMatches(SPARK3_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_213)
.foreach { sparkHome => assertNotMatches(SPARK3_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_212)
assertMatches(sparkHome, SPARK_HOME_REGEX_SCALA_213) assertNotMatches(SPARK3_HOME_SCALA_213, SPARK4_HOME_REGEX_SCALA_213)
assertNotMatches(sparkHome, SPARK_HOME_REGEX_SCALA_212) }
} Seq(
"spark-4.0.0-preview1-bin-hadoop3",
"spark-4.0.0-bin-hadoop3").foreach { SPARK4_HOME_SCALA_213 =>
assertMatches(SPARK4_HOME_SCALA_213, SPARK4_HOME_REGEX_SCALA_213)
assertNotMatches(SPARK4_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_212)
assertNotMatches(SPARK4_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_213)
} }
} }