From 41e9505722ffe69a83fe43cce60cfbbb445e2a35 Mon Sep 17 00:00:00 2001
From: Yikf <yikaifei@apache.org>
Date: Thu, 16 Mar 2023 10:12:44 +0800
Subject: [PATCH] [KYUUBI #4525][KSHC] Partitioning predicates should take
 effect to filter data

### _Why are the changes needed?_

This PR aims to close https://github.com/apache/kyuubi/issues/4525.

The root cause of this problem is that Apache Spark does predicate push-down in `V2ScanRelationPushDown`, but the spark-hive-connector does not apply push-down predicates for data filtering.

### _How was this patch tested?_
- [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible

- [ ] Add screenshots for manual tests if appropriate

- [x] [Run test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests) locally before make a pull request

Closes #4528 from Yikf/KYUUBI-4525.

Closes #4525

a65a1873f [Yikf] Partitioning predicates should take effect to filter data

Authored-by: Yikf <yikaifei@apache.org>
Signed-off-by: Cheng Pan <chengpan@apache.org>
---
 .../connector/hive/read/HiveScanBuilder.scala |  4 +++-
 .../spark/connector/hive/HiveQuerySuite.scala | 24 +++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScanBuilder.scala b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScanBuilder.scala
index 8e90cc3ab..89836e712 100644
--- a/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScanBuilder.scala
+++ b/extensions/spark/kyuubi-spark-connector-hive/src/main/scala/org/apache/kyuubi/spark/connector/hive/read/HiveScanBuilder.scala
@@ -37,6 +37,8 @@ case class HiveScanBuilder(
       catalogTable = table,
       dataSchema = dataSchema,
       readDataSchema = readDataSchema(),
-      readPartitionSchema = readPartitionSchema())
+      readPartitionSchema = readPartitionSchema(),
+      partitionFilters = partitionFilters,
+      dataFilters = dataFilters)
   }
 }
diff --git a/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala b/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala
index e61325647..16ea03234 100644
--- a/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala
+++ b/extensions/spark/kyuubi-spark-connector-hive/src/test/scala/org/apache/kyuubi/spark/connector/hive/HiveQuerySuite.scala
@@ -107,6 +107,30 @@ class HiveQuerySuite extends KyuubiHiveTest {
     }
   }
 
+  test("[KYUUBI #4525] Partitioning predicates should take effect to filter data") {
+    withSparkSession(Map("hive.exec.dynamic.partition.mode" -> "nonstrict")) { spark =>
+      val table = "hive.default.employee"
+      withTempPartitionedTable(spark, table) {
+        spark.sql(
+          s"""
+             | INSERT OVERWRITE
+             | $table
+             | VALUES("yi", "2022", "0808"),("yi", "2023", "0316")
+             |""".stripMargin).collect()
+
+        checkQueryResult(
+          s"select * from $table where year = '2022'",
+          spark,
+          Array(Row.apply("yi", "2022", "0808")))
+
+        checkQueryResult(
+          s"select * from $table where year = '2023'",
+          spark,
+          Array(Row.apply("yi", "2023", "0316")))
+      }
+    }
+  }
+
   test("Partitioned table insert and all static insert") {
     withSparkSession() { spark =>
       val table = "hive.default.employee"