From 47555eb90098d63f5b0be28f33fc23911edfa0c8 Mon Sep 17 00:00:00 2001 From: yikaifei Date: Tue, 17 Oct 2023 13:09:18 +0800 Subject: [PATCH] [KYUUBI #5414][KSHC] Reader should not pollut the global hiveConf instance ### _Why are the changes needed?_ This pr aims to fix https://github.com/apache/kyuubi/issues/5414. `HiveReader` initialization incorrectly uses the global hadoopConf as hiveconf, which causes reader to pollut the global hadoopConf and cause job read failure. ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5424 from Yikf/orc-read. Closes #5414 d6bdf7be4 [yikaifei] [KYUUBI #5414] Reader should not polluted the global hiveconf instance Authored-by: yikaifei Signed-off-by: Cheng Pan --- .../spark/connector/hive/read/HiveScan.scala | 2 +- .../spark/connector/hive/HiveQuerySuite.scala | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScan.scala b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScan.scala index 0b79d7307..ecdfc76c5 100644 --- a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScan.scala +++ b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScan.scala @@ -64,7 +64,7 @@ case class HiveScan( } override def createReaderFactory(): PartitionReaderFactory = { - val hiveConf = fileIndex.hiveCatalog.hadoopConfiguration() + val hiveConf = new Configuration(fileIndex.hiveCatalog.hadoopConfiguration()) addCatalogTableConfToConf(hiveConf, catalogTable) val table = HiveClientImpl.toHiveTable(catalogTable) diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala b/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala index 1d3d5ae10..0dd1efdec 100644 --- a/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala +++ b/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala @@ -175,6 +175,23 @@ class HiveQuerySuite extends KyuubiHiveTest { } } + test("[KYUUBI #5414] Reader should not polluted the global hiveconf instance") { + withSparkSession() { spark => + val table = "hive.default.hiveconf_test" + withTempPartitionedTable(spark, table, "ORC", hiveTable = true) { + spark.sql( + s""" + | INSERT OVERWRITE + | $table PARTITION(year = '2022') + | VALUES("yi", "08") + |""".stripMargin).collect() + + checkQueryResult(s"select * from $table", spark, Array(Row.apply("yi", "2022", "08"))) + checkQueryResult(s"select count(*) as c from $table", spark, Array(Row.apply(1))) + } + } + } + test("Partitioned table insert and static partition value is empty string") { withSparkSession() { spark => val table = "hive.default.employee"