[KYUUBI #5414][KSHC] Reader should not pollut the global hiveConf instance

### _Why are the changes needed?_ This pr aims to fix https://github.com/apache/kyuubi/issues/5414. `HiveReader` initialization incorrectly uses the global hadoopConf as hiveconf, which causes reader to pollut the global hadoopConf and cause job read failure. ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5424 from Yikf/orc-read. Closes #5414 d6bdf7be4 [yikaifei] [KYUUBI #5414] Reader should not polluted the global hiveconf instance Authored-by: yikaifei <yikaifei@apache.org> Signed-off-by: Cheng Pan <chengpan@apache.org>
2023-10-17 13:09:18 +08:00 · 2023-10-17 13:09:18 +08:00 · 47555eb900
commit 47555eb900
parent dcaacc3ed5
2 changed files with 18 additions and 1 deletions
--- a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScan.scala
+++ b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScan.scala
@ -64,7 +64,7 @@ case class HiveScan(
  }

  override def createReaderFactory(): PartitionReaderFactory = {
-    val hiveConf = fileIndex.hiveCatalog.hadoopConfiguration()
+    val hiveConf = new Configuration(fileIndex.hiveCatalog.hadoopConfiguration())
    addCatalogTableConfToConf(hiveConf, catalogTable)

    val table = HiveClientImpl.toHiveTable(catalogTable)
--- a/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala
+++ b/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala
@ -175,6 +175,23 @@ class HiveQuerySuite extends KyuubiHiveTest {
    }
  }

+  test("[KYUUBI #5414] Reader should not polluted the global hiveconf instance") {
+    withSparkSession() { spark =>
+      val table = "hive.default.hiveconf_test"
+      withTempPartitionedTable(spark, table, "ORC", hiveTable = true) {
+        spark.sql(
+          s"""
+             | INSERT OVERWRITE
+             | $table PARTITION(year = '2022')
+             | VALUES("yi", "08")
+             |""".stripMargin).collect()
+
+        checkQueryResult(s"select * from $table", spark, Array(Row.apply("yi", "2022", "08")))
+        checkQueryResult(s"select count(*) as c from $table", spark, Array(Row.apply(1)))
+      }
+    }
+  }
+
  test("Partitioned table insert and static partition value is empty string") {
    withSparkSession() { spark =>
      val table = "hive.default.employee"