From 47555eb90098d63f5b0be28f33fc23911edfa0c8 Mon Sep 17 00:00:00 2001
From: yikaifei <yikaifei@apache.org>
Date: Tue, 17 Oct 2023 13:09:18 +0800
Subject: [PATCH] [KYUUBI #5414][KSHC] Reader should not pollut the global
 hiveConf instance

### _Why are the changes needed?_

This pr aims to fix https://github.com/apache/kyuubi/issues/5414.

`HiveReader` initialization incorrectly uses the global hadoopConf as hiveconf, which causes reader to pollut the global hadoopConf and cause job read failure.

### _How was this patch tested?_
- [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible

- [ ] Add screenshots for manual tests if appropriate

- [x] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request

### _Was this patch authored or co-authored using generative AI tooling?_

No

Closes #5424 from Yikf/orc-read.

Closes #5414

d6bdf7be4 [yikaifei] [KYUUBI #5414] Reader should not polluted the global hiveconf instance

Authored-by: yikaifei <yikaifei@apache.org>
Signed-off-by: Cheng Pan <chengpan@apache.org>
---
 .../spark/connector/hive/read/HiveScan.scala    |  2 +-
 .../spark/connector/hive/HiveQuerySuite.scala   | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScan.scala b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScan.scala
index 0b79d7307..ecdfc76c5 100644
--- a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScan.scala
+++ b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScan.scala
@@ -64,7 +64,7 @@ case class HiveScan(
   }
 
   override def createReaderFactory(): PartitionReaderFactory = {
-    val hiveConf = fileIndex.hiveCatalog.hadoopConfiguration()
+    val hiveConf = new Configuration(fileIndex.hiveCatalog.hadoopConfiguration())
     addCatalogTableConfToConf(hiveConf, catalogTable)
 
     val table = HiveClientImpl.toHiveTable(catalogTable)
diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala b/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala
index 1d3d5ae10..0dd1efdec 100644
--- a/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala
+++ b/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala
@@ -175,6 +175,23 @@ class HiveQuerySuite extends KyuubiHiveTest {
     }
   }
 
+  test("[KYUUBI #5414] Reader should not polluted the global hiveconf instance") {
+    withSparkSession() { spark =>
+      val table = "hive.default.hiveconf_test"
+      withTempPartitionedTable(spark, table, "ORC", hiveTable = true) {
+        spark.sql(
+          s"""
+             | INSERT OVERWRITE
+             | $table PARTITION(year = '2022')
+             | VALUES("yi", "08")
+             |""".stripMargin).collect()
+
+        checkQueryResult(s"select * from $table", spark, Array(Row.apply("yi", "2022", "08")))
+        checkQueryResult(s"select count(*) as c from $table", spark, Array(Row.apply(1)))
+      }
+    }
+  }
+
   test("Partitioned table insert and static partition value is empty string") {
     withSparkSession() { spark =>
       val table = "hive.default.employee"