From b89c185eec330143c204eb84ddb60f24624d1079 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Thu, 23 May 2024 14:26:38 +0800 Subject: [PATCH] [KYUUBI #6413] SPARK_HOME detection supports Spark 4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description When `SPARK_HOME` is not set explicitly, the Kyuubi server supports detecting it based on Scala versions, while the rules are not applicable for Spark 4. This PR enhances the SPARK_HOME detection logic to make it support both Spark 3 and Spark 4. The above logic is mainly used for testing purposes, the change does not affect users who configure `SPARK_HOME` in `kyuubi-env.sh`. ## Types of changes - [ ] Bugfix (non-breaking change which fixes an issue) - [x] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) ## Test Plan #### Related Unit Tests - `SparkProcessBuilderSuite` --- # Checklist 📝 - [x] This patch was not authored or co-authored using [Generative Tooling](https://www.apache.org/legal/generative-tooling.html) **Be nice. Be informative.** Closes #6413 from pan3793/spark4-home. Closes #6413 20e71fd7d [Cheng Pan] SPARK_HOME detection supports Spark 4 Authored-by: Cheng Pan Signed-off-by: Cheng Pan --- .../engine/spark/SparkProcessBuilder.scala | 21 +++++---- .../spark/SparkProcessBuilderSuite.scala | 45 +++++++++++-------- 2 files changed, 38 insertions(+), 28 deletions(-) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala index a651e99ef..fcf24b930 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala @@ -117,11 +117,11 @@ class SparkProcessBuilder( } override protected lazy val engineHomeDirFilter: FileFilter = file => { - val r = SCALA_COMPILE_VERSION match { - case "2.12" => SPARK_HOME_REGEX_SCALA_212 - case "2.13" => SPARK_HOME_REGEX_SCALA_213 + val patterns = SCALA_COMPILE_VERSION match { + case "2.12" => Seq(SPARK3_HOME_REGEX_SCALA_212) + case "2.13" => Seq(SPARK3_HOME_REGEX_SCALA_213, SPARK4_HOME_REGEX_SCALA_213) } - file.isDirectory && r.findFirstMatchIn(file.getName).isDefined + file.isDirectory && patterns.exists(_.findFirstMatchIn(file.getName).isDefined) } override protected[kyuubi] lazy val commands: Iterable[String] = { @@ -364,11 +364,14 @@ object SparkProcessBuilder { final private val SPARK_CONF_FILE_NAME = "spark-defaults.conf" final private[kyuubi] val SPARK_CORE_SCALA_VERSION_REGEX = - """^spark-core_(\d\.\d+).*.jar$""".r + """^spark-core_(\d\.\d+)-.*\.jar$""".r - final private[kyuubi] val SPARK_HOME_REGEX_SCALA_212 = - """^spark-\d+\.\d+\.\d+-bin-hadoop\d+(\.\d+)?$""".r + final private[kyuubi] val SPARK3_HOME_REGEX_SCALA_212 = + """^spark-3\.\d+\.\d+-bin-hadoop\d+(\.\d+)?$""".r - final private[kyuubi] val SPARK_HOME_REGEX_SCALA_213 = - """^spark-\d+\.\d+\.\d+-bin-hadoop\d(\.\d+)?+-scala\d+(\.\d+)?$""".r + final private[kyuubi] val SPARK3_HOME_REGEX_SCALA_213 = + """^spark-3\.\d+\.\d+-bin-hadoop\d(\.\d+)?+-scala2\.13$""".r + + final private[kyuubi] val SPARK4_HOME_REGEX_SCALA_213 = + """^spark-4\.\d+\.\d+(-\w*)?-bin-hadoop\d(\.\d+)?+$""".r } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilderSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilderSuite.scala index 4ee98a080..5f3bae124 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilderSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilderSuite.scala @@ -403,36 +403,43 @@ class SparkProcessBuilderSuite extends KerberizedTestHelper with MockitoSugar { "spark-core_2.13-3.5.0-abc-20230921.jar", "spark-core_2.13-3.5.0-xyz-1.2.3.jar", "spark-core_2.13-3.5.0.1.jar", - "spark-core_2.13.2-3.5.0.jar").foreach { f => + "spark-core_2.13-4.0.0-preview1.jar", + "spark-core_2.13-4.0.0.jar").foreach { f => assertResult("2.13")(builder.extractSparkCoreScalaVersion(Seq(f))) } Seq( "spark-dummy_2.13-3.5.0.jar", "spark-core_2.13-3.5.0.1.zip", - "yummy-spark-core_2.13-3.5.0.jar").foreach { f => + "yummy-spark-core_2.13-3.5.0.jar", + "spark-core_2.13.2-3.5.0.jar").foreach { f => assertThrows[KyuubiException](builder.extractSparkCoreScalaVersion(Seq(f))) } } test("match scala version of spark home") { - SCALA_COMPILE_VERSION match { - case "2.12" => Seq( - "spark-3.2.4-bin-hadoop3.2", - "spark-3.2.4-bin-hadoop2.7", - "spark-3.4.1-bin-hadoop3") - .foreach { sparkHome => - assertMatches(sparkHome, SPARK_HOME_REGEX_SCALA_212) - assertNotMatches(sparkHome, SPARK_HOME_REGEX_SCALA_213) - } - case "2.13" => Seq( - "spark-3.2.4-bin-hadoop3.2-scala2.13", - "spark-3.4.1-bin-hadoop3-scala2.13", - "spark-3.5.0-bin-hadoop3-scala2.13") - .foreach { sparkHome => - assertMatches(sparkHome, SPARK_HOME_REGEX_SCALA_213) - assertNotMatches(sparkHome, SPARK_HOME_REGEX_SCALA_212) - } + Seq( + "spark-3.2.4-bin-hadoop3.2", + "spark-3.2.4-bin-hadoop2.7", + "spark-3.4.1-bin-hadoop3").foreach { SPARK3_HOME_SCALA_212 => + assertMatches(SPARK3_HOME_SCALA_212, SPARK3_HOME_REGEX_SCALA_212) + assertNotMatches(SPARK3_HOME_SCALA_212, SPARK3_HOME_REGEX_SCALA_213) + assertNotMatches(SPARK3_HOME_SCALA_212, SPARK4_HOME_REGEX_SCALA_213) + } + Seq( + "spark-3.2.4-bin-hadoop3.2-scala2.13", + "spark-3.4.1-bin-hadoop3-scala2.13", + "spark-3.5.0-bin-hadoop3-scala2.13").foreach { SPARK3_HOME_SCALA_213 => + assertMatches(SPARK3_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_213) + assertNotMatches(SPARK3_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_212) + assertNotMatches(SPARK3_HOME_SCALA_213, SPARK4_HOME_REGEX_SCALA_213) + } + Seq( + "spark-4.0.0-preview1-bin-hadoop3", + "spark-4.0.0-bin-hadoop3").foreach { SPARK4_HOME_SCALA_213 => + assertMatches(SPARK4_HOME_SCALA_213, SPARK4_HOME_REGEX_SCALA_213) + assertNotMatches(SPARK4_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_212) + assertNotMatches(SPARK4_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_213) } }