From d17effbe8c657a94ddd04769e294ec8962a104a3 Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Fri, 25 Jun 2021 20:48:51 +0800 Subject: [PATCH] [KYUUBI #715] [SQL] Check if need add repartition through project ### _Why are the changes needed?_ Avoid unnecessary shuffle ### _How was this patch tested?_ Add test Closes #715 from ulysses-you/through-project. Closes #715 5942320a [ulysses-you] through-project Authored-by: ulysses-you Signed-off-by: Cheng Pan <379377944@qq.com> --- .../main/scala/org/apache/kyuubi/sql/KyuubiAnalysis.scala | 1 + .../scala/org/apache/spark/sql/KyuubiExtensionSuite.scala | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/dev/kyuubi-extension-spark_3.1/src/main/scala/org/apache/kyuubi/sql/KyuubiAnalysis.scala b/dev/kyuubi-extension-spark_3.1/src/main/scala/org/apache/kyuubi/sql/KyuubiAnalysis.scala index c9a1d0bb2..3d9b408d2 100644 --- a/dev/kyuubi-extension-spark_3.1/src/main/scala/org/apache/kyuubi/sql/KyuubiAnalysis.scala +++ b/dev/kyuubi-extension-spark_3.1/src/main/scala/org/apache/kyuubi/sql/KyuubiAnalysis.scala @@ -153,6 +153,7 @@ case class RepartitionBeforeWriteHive(session: SparkSession) extends Rule[Logica object RepartitionBeforeWriteHelper { def canInsertRepartitionByExpression(plan: LogicalPlan): Boolean = plan match { + case Project(_, child) => canInsertRepartitionByExpression(child) case Limit(_, _) => false case _: Sort => false case _: RepartitionByExpression => false diff --git a/dev/kyuubi-extension-spark_3.1/src/test/scala/org/apache/spark/sql/KyuubiExtensionSuite.scala b/dev/kyuubi-extension-spark_3.1/src/test/scala/org/apache/spark/sql/KyuubiExtensionSuite.scala index d65455fa6..9afe6c43c 100644 --- a/dev/kyuubi-extension-spark_3.1/src/test/scala/org/apache/spark/sql/KyuubiExtensionSuite.scala +++ b/dev/kyuubi-extension-spark_3.1/src/test/scala/org/apache/spark/sql/KyuubiExtensionSuite.scala @@ -110,6 +110,11 @@ class KyuubiExtensionSuite extends QueryTest with SQLTestUtils with AdaptiveSpar sql(s"CREATE TABLE tmp1 $storage PARTITIONED BY(c2) AS " + s"SELECT * FROM VALUES(1, 'a'),(2, 'b') AS t(c1, c2)") } + + withTable("tmp1") { + sql(s"CREATE TABLE tmp1 $storage PARTITIONED BY(c2) AS " + + s"SELECT * FROM VALUES(1, 'a'),(2, 'b') AS t(c1, c2) DISTRIBUTE BY rand()") + } } } }