[KYUUBI #1321] Add config to control if zorder using global sort
<!-- Thanks for sending a pull request! Here are some tips for you: 1. If this is your first time, please read our contributor guidelines: https://kyuubi.readthedocs.io/en/latest/community/contributions.html 2. If the PR is related to an issue in https://github.com/apache/incubator-kyuubi/issues, add '[KYUUBI #XXXX]' in your PR title, e.g., '[KYUUBI #XXXX] Your PR title ...'. 3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP][KYUUBI #XXXX] Your PR title ...'. --> ### _Why are the changes needed?_ <!-- Please clarify why the changes are needed. For instance, 1. If you add a feature, you can talk about the use case of it. 2. If you fix a bug, you can clarify why it is a bug. --> Add a new config `spark.sql.optimizer.zorderGlobalSort.enabled` to control if we do a global sort using zorder. This is a trade-off with data skew if the zorder columns has low cardinality. ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/latest/develop_tools/testing.html#running-tests) locally before make a pull request Closes #1321 from ulysses-you/zorder-config. Closes #1321 422599e4 [ulysses-you] delete table c11e2f4f [ulysses-you] doc b984b537 [ulysses-you] Add config to control if zorder using global sort Authored-by: ulysses-you <ulyssesyou18@gmail.com> Signed-off-by: ulysses-you <ulyssesyou@apache.org>
This commit is contained in:
parent
22ad269dfe
commit
1003f5296c
@ -88,6 +88,15 @@ object KyuubiSQLConf {
|
||||
.booleanConf
|
||||
.createWithDefault(true)
|
||||
|
||||
val ZORDER_GLOBAL_SORT_ENABLED =
|
||||
buildConf("spark.sql.optimizer.zorderGlobalSort.enabled")
|
||||
.doc("When true, we do a global sort using zorder. Note that, it can cause data skew " +
|
||||
"issue if the zorder columns have less cardinality. When false, we only do local sort " +
|
||||
"using zorder.")
|
||||
.version("1.4.0")
|
||||
.booleanConf
|
||||
.createWithDefault(true)
|
||||
|
||||
val WATCHDOG_MAX_HIVEPARTITION =
|
||||
buildConf("spark.sql.watchdog.maxHivePartitions")
|
||||
.doc("Add maxHivePartitions Strategy to avoid scan excessive " +
|
||||
|
||||
@ -146,7 +146,7 @@ trait InsertZorderHelper extends Rule[LogicalPlan] with ZorderBuilder {
|
||||
}
|
||||
Sort(
|
||||
SortOrder(orderExpr, Ascending, NullsLast, Seq.empty) :: Nil,
|
||||
true,
|
||||
conf.getConf(KyuubiSQLConf.ZORDER_GLOBAL_SORT_ENABLED),
|
||||
plan
|
||||
)
|
||||
}
|
||||
|
||||
@ -40,6 +40,7 @@ import org.apache.spark.sql.internal.SQLConf
|
||||
import org.apache.spark.sql.types._
|
||||
import org.apache.spark.unsafe.types.UTF8String
|
||||
|
||||
import org.apache.kyuubi.sql.KyuubiSQLConf
|
||||
import org.apache.kyuubi.sql.zorder.ZorderSqlExtensionsParser.{BigDecimalLiteralContext, BigIntLiteralContext, BooleanLiteralContext, DecimalLiteralContext, DoubleLiteralContext, IntegerLiteralContext, LogicalBinaryContext, MultipartIdentifierContext, NullLiteralContext, NumberContext, OptimizeZorderContext, PassThroughContext, QueryContext, SingleStatementContext, SmallIntLiteralContext, StringLiteralContext, TinyIntLiteralContext, TypeConstructorContext, ZorderClauseContext}
|
||||
|
||||
abstract class ZorderSqlAstBuilderBase extends ZorderSqlExtensionsBaseVisitor[AnyRef] {
|
||||
@ -89,7 +90,7 @@ abstract class ZorderSqlAstBuilderBase extends ZorderSqlExtensionsBaseVisitor[An
|
||||
val query =
|
||||
Sort(
|
||||
SortOrder(orderExpr, Ascending, NullsLast, Seq.empty) :: Nil,
|
||||
true,
|
||||
conf.getConf(KyuubiSQLConf.ZORDER_GLOBAL_SORT_ENABLED),
|
||||
Project(Seq(UnresolvedStar(None)), tableWithFilter))
|
||||
|
||||
buildOptimizeZorderStatement(tableIdent, query)
|
||||
|
||||
@ -537,6 +537,28 @@ trait ZorderSuite extends KyuubiSparkSQLExtensionTest with ExpressionEvalHelper
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("Add config to control if zorder using global sort") {
|
||||
withTable("t") {
|
||||
withSQLConf(KyuubiSQLConf.ZORDER_GLOBAL_SORT_ENABLED.key -> "false") {
|
||||
sql(
|
||||
"""
|
||||
|CREATE TABLE t (c1 int, c2 string) TBLPROPERTIES (
|
||||
|'kyuubi.zorder.enabled'= 'true',
|
||||
|'kyuubi.zorder.cols'= 'c1,c2')
|
||||
|""".stripMargin)
|
||||
val p1 = sql("OPTIMIZE t ZORDER BY c1, c2").queryExecution.analyzed
|
||||
assert(p1.collect {
|
||||
case shuffle: Sort if !shuffle.global => shuffle
|
||||
}.size == 1)
|
||||
|
||||
val p2 = sql("INSERT INTO TABLE t SELECT * FROM VALUES(1,'a')").queryExecution.analyzed
|
||||
assert(p2.collect {
|
||||
case shuffle: Sort if !shuffle.global => shuffle
|
||||
}.size == 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class ZorderWithCodegenEnabledSuite extends ZorderSuite {
|
||||
|
||||
@ -75,4 +75,5 @@ spark.sql.optimizer.forceShuffleBeforeJoin.enabled | false | Ensure shuffle node
|
||||
spark.sql.optimizer.finalStageConfigIsolation.enabled | false | If true, the final stage support use different config with previous stage. The prefix of final stage config key should be `spark.sql.finalStage.`. For example, the raw spark config: `spark.sql.adaptive.advisoryPartitionSizeInBytes`, then the final stage config should be: `spark.sql.finalStage.adaptive.advisoryPartitionSizeInBytes`. | 1.2.0
|
||||
spark.sql.analyzer.classification.enabled | false | When true, allows Kyuubi engine to judge this SQL's classification and set `spark.sql.analyzer.classification` back into sessionConf. Through this configuration item, Spark can optimizing configuration dynamic. | 1.4.0
|
||||
spark.sql.optimizer.insertZorderBeforeWriting.enabled | true | When true, we will follow target table properties to insert zorder or not. The key properties are: 1) `kyuubi.zorder.enabled`: if this property is true, we will insert zorder before writing data. 2) `kyuubi.zorder.cols`: string split by comma, we will zorder by these cols. | 1.4.0
|
||||
spark.sql.optimizer.zorderGlobalSort.enabled | true | When true, we do a global sort using zorder. Note that, it can cause data skew issue if the zorder columns have less cardinality. When false, we only do local sort using zorder. | 1.4.0
|
||||
spark.sql.watchdog.maxHivePartitions | none | Add maxHivePartitions Strategy to avoid scan excessive hive partitions on partitioned table, it's optional that works with defined. | 1.4.0
|
||||
|
||||
Loading…
Reference in New Issue
Block a user