[KYUUBI #4525][KSHC] Partitioning predicates should take effect to filter data

### _Why are the changes needed?_

This PR aims to close https://github.com/apache/kyuubi/issues/4525.

The root cause of this problem is that Apache Spark does predicate push-down in `V2ScanRelationPushDown`, but the spark-hive-connector does not apply push-down predicates for data filtering.

### _How was this patch tested?_
- [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible

- [ ] Add screenshots for manual tests if appropriate

- [x] [Run test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests) locally before make a pull request

Closes #4528 from Yikf/KYUUBI-4525.

Closes #4525

a65a1873f [Yikf] Partitioning predicates should take effect to filter data

Authored-by: Yikf <yikaifei@apache.org>
Signed-off-by: Cheng Pan <chengpan@apache.org>
This commit is contained in:
Yikf 2023-03-16 10:12:44 +08:00 committed by Cheng Pan
parent 0f45f26a35
commit 41e9505722
No known key found for this signature in database
GPG Key ID: 8001952629BCC75D
2 changed files with 27 additions and 1 deletions

View File

@ -37,6 +37,8 @@ case class HiveScanBuilder(
catalogTable = table,
dataSchema = dataSchema,
readDataSchema = readDataSchema(),
readPartitionSchema = readPartitionSchema())
readPartitionSchema = readPartitionSchema(),
partitionFilters = partitionFilters,
dataFilters = dataFilters)
}
}

View File

@ -107,6 +107,30 @@ class HiveQuerySuite extends KyuubiHiveTest {
}
}
test("[KYUUBI #4525] Partitioning predicates should take effect to filter data") {
withSparkSession(Map("hive.exec.dynamic.partition.mode" -> "nonstrict")) { spark =>
val table = "hive.default.employee"
withTempPartitionedTable(spark, table) {
spark.sql(
s"""
| INSERT OVERWRITE
| $table
| VALUES("yi", "2022", "0808"),("yi", "2023", "0316")
|""".stripMargin).collect()
checkQueryResult(
s"select * from $table where year = '2022'",
spark,
Array(Row.apply("yi", "2022", "0808")))
checkQueryResult(
s"select * from $table where year = '2023'",
spark,
Array(Row.apply("yi", "2023", "0316")))
}
}
}
test("Partitioned table insert and all static insert") {
withSparkSession() { spark =>
val table = "hive.default.employee"