From 41e9505722ffe69a83fe43cce60cfbbb445e2a35 Mon Sep 17 00:00:00 2001 From: Yikf Date: Thu, 16 Mar 2023 10:12:44 +0800 Subject: [PATCH] [KYUUBI #4525][KSHC] Partitioning predicates should take effect to filter data ### _Why are the changes needed?_ This PR aims to close https://github.com/apache/kyuubi/issues/4525. The root cause of this problem is that Apache Spark does predicate push-down in `V2ScanRelationPushDown`, but the spark-hive-connector does not apply push-down predicates for data filtering. ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests) locally before make a pull request Closes #4528 from Yikf/KYUUBI-4525. Closes #4525 a65a1873f [Yikf] Partitioning predicates should take effect to filter data Authored-by: Yikf Signed-off-by: Cheng Pan --- .../connector/hive/read/HiveScanBuilder.scala | 4 +++- .../spark/connector/hive/HiveQuerySuite.scala | 24 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScanBuilder.scala b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScanBuilder.scala index 8e90cc3ab..89836e712 100644 --- a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScanBuilder.scala +++ b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScanBuilder.scala @@ -37,6 +37,8 @@ case class HiveScanBuilder( catalogTable = table, dataSchema = dataSchema, readDataSchema = readDataSchema(), - readPartitionSchema = readPartitionSchema()) + readPartitionSchema = readPartitionSchema(), + partitionFilters = partitionFilters, + dataFilters = dataFilters) } } diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala b/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala index e61325647..16ea03234 100644 --- a/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala +++ b/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala @@ -107,6 +107,30 @@ class HiveQuerySuite extends KyuubiHiveTest { } } + test("[KYUUBI #4525] Partitioning predicates should take effect to filter data") { + withSparkSession(Map("hive.exec.dynamic.partition.mode" -> "nonstrict")) { spark => + val table = "hive.default.employee" + withTempPartitionedTable(spark, table) { + spark.sql( + s""" + | INSERT OVERWRITE + | $table + | VALUES("yi", "2022", "0808"),("yi", "2023", "0316") + |""".stripMargin).collect() + + checkQueryResult( + s"select * from $table where year = '2022'", + spark, + Array(Row.apply("yi", "2022", "0808"))) + + checkQueryResult( + s"select * from $table where year = '2023'", + spark, + Array(Row.apply("yi", "2023", "0316"))) + } + } + } + test("Partitioned table insert and all static insert") { withSparkSession() { spark => val table = "hive.default.employee"