[KYUUBI #5414][KSHC] Reader should not pollut the global hiveConf instance

### _Why are the changes needed?_

This pr aims to fix https://github.com/apache/kyuubi/issues/5414.

`HiveReader` initialization incorrectly uses the global hadoopConf as hiveconf, which causes reader to pollut the global hadoopConf and cause job read failure.

### _How was this patch tested?_
- [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible

- [ ] Add screenshots for manual tests if appropriate

- [x] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request

### _Was this patch authored or co-authored using generative AI tooling?_

No

Closes #5424 from Yikf/orc-read.

Closes #5414

d6bdf7be4 [yikaifei] [KYUUBI #5414] Reader should not polluted the global hiveconf instance

Authored-by: yikaifei <yikaifei@apache.org>
Signed-off-by: Cheng Pan <chengpan@apache.org>
This commit is contained in:
yikaifei 2023-10-17 13:09:18 +08:00 committed by Cheng Pan
parent dcaacc3ed5
commit 47555eb900
No known key found for this signature in database
GPG Key ID: 8001952629BCC75D
2 changed files with 18 additions and 1 deletions

View File

@ -64,7 +64,7 @@ case class HiveScan(
}
override def createReaderFactory(): PartitionReaderFactory = {
val hiveConf = fileIndex.hiveCatalog.hadoopConfiguration()
val hiveConf = new Configuration(fileIndex.hiveCatalog.hadoopConfiguration())
addCatalogTableConfToConf(hiveConf, catalogTable)
val table = HiveClientImpl.toHiveTable(catalogTable)

View File

@ -175,6 +175,23 @@ class HiveQuerySuite extends KyuubiHiveTest {
}
}
test("[KYUUBI #5414] Reader should not polluted the global hiveconf instance") {
withSparkSession() { spark =>
val table = "hive.default.hiveconf_test"
withTempPartitionedTable(spark, table, "ORC", hiveTable = true) {
spark.sql(
s"""
| INSERT OVERWRITE
| $table PARTITION(year = '2022')
| VALUES("yi", "08")
|""".stripMargin).collect()
checkQueryResult(s"select * from $table", spark, Array(Row.apply("yi", "2022", "08")))
checkQueryResult(s"select count(*) as c from $table", spark, Array(Row.apply(1)))
}
}
}
test("Partitioned table insert and static partition value is empty string") {
withSparkSession() { spark =>
val table = "hive.default.employee"