[KYUUBI #6582] [KYUUBI-6581] Zorder clause syntax does not support special characters

# 🔍 Description
## Issue References 🔗

This pull request fixes #6581

## Describe Your Solution 🔧

Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change.

I modified `KyuubiSparkSQLAstBuilder#visitMultipartIdentifier` and implemented `KyuubiSparkSQLAstBuilder#visitQuotedIdentifier` to process the quoted identifiers.

## Types of changes 🔖

- [x] Bugfix (non-breaking change which fixes an issue)
- [ ] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing functionality to change)

## Test Plan 🧪

#### Behavior Without This Pull Request ⚰️

#### Behavior With This Pull Request 🎉

#### Related Unit Tests

```
extensions/spark/kyuubi-extension-spark-3-3/src/test/scala/org/apache/spark/sql/ZorderSuiteBase.scala

test("optimize sort by backquoted column name")
```

---

# Checklist 📝

- [x] This patch was not authored or co-authored using [Generative Tooling](https://www.apache.org/legal/generative-tooling.html)

**Be nice. Be informative.**

Closes #6582 from XorSum/features/zorder-backquote.

Closes #6582

16ffa1238 [xorsum] zorder by support quote

Authored-by: xorsum <xorsum@outlook.com>
Signed-off-by: Cheng Pan <chengpan@apache.org>
This commit is contained in:
xorsum 2024-08-06 13:39:25 +08:00 committed by Cheng Pan
parent ae467c2b4e
commit d414535cb6
No known key found for this signature in database
GPG Key ID: 8001952629BCC75D
6 changed files with 183 additions and 6 deletions

View File

@ -131,14 +131,30 @@ class KyuubiSparkSQLAstBuilder extends KyuubiSparkSQLBaseVisitor[AnyRef] with SQ
override def visitMultipartIdentifier(ctx: MultipartIdentifierContext): Seq[String] =
withOrigin(ctx) {
ctx.parts.asScala.map(_.getText).toSeq
ctx.parts.asScala.map(typedVisit[String]).toSeq
}
override def visitIdentifier(ctx: IdentifierContext): String = {
withOrigin(ctx) {
ctx.strictIdentifier() match {
case quotedContext: QuotedIdentifierAlternativeContext =>
typedVisit[String](quotedContext)
case _ => ctx.getText
}
}
}
override def visitQuotedIdentifier(ctx: QuotedIdentifierContext): String = {
withOrigin(ctx) {
ctx.BACKQUOTED_IDENTIFIER().getText.stripPrefix("`").stripSuffix("`").replace("``", "`")
}
}
override def visitZorderClause(ctx: ZorderClauseContext): Seq[UnresolvedAttribute] =
withOrigin(ctx) {
val res = ListBuffer[UnresolvedAttribute]()
ctx.multipartIdentifier().forEach { identifier =>
res += UnresolvedAttribute(identifier.parts.asScala.map(_.getText).toSeq)
res += UnresolvedAttribute(identifier.parts.asScala.map(typedVisit[String]).toSeq)
}
res.toSeq
}

View File

@ -771,6 +771,49 @@ trait ZorderSuiteBase extends KyuubiSparkSQLExtensionTest with ExpressionEvalHel
}
}
test("optimize sort by backquoted column name") {
withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
withTable("up") {
sql(s"DROP TABLE IF EXISTS up")
val target = Seq(
Seq(0, 0),
Seq(1, 0),
Seq(0, 1),
Seq(1, 1),
Seq(2, 0),
Seq(3, 0),
Seq(2, 1),
Seq(3, 1),
Seq(0, 2),
Seq(1, 2),
Seq(0, 3),
Seq(1, 3),
Seq(2, 2),
Seq(3, 2),
Seq(2, 3),
Seq(3, 3))
sql(s"CREATE TABLE up (c1 INT, `@c2` INT, c3 INT)")
sql(s"INSERT INTO TABLE up VALUES" +
"(0,0,2),(0,1,2),(0,2,1),(0,3,3)," +
"(1,0,4),(1,1,2),(1,2,1),(1,3,3)," +
"(2,0,2),(2,1,1),(2,2,5),(2,3,5)," +
"(3,0,3),(3,1,4),(3,2,9),(3,3,0)")
sql("OPTIMIZE up ZORDER BY c1, `@c2`")
val res = sql("SELECT c1, `@c2` FROM up").collect()
assert(res.length == 16)
for (i <- target.indices) {
val t = target(i)
val r = res(i)
assert(t(0) == r.getInt(0))
assert(t(1) == r.getInt(1))
}
}
}
}
def createParser: ParserInterface
}

View File

@ -131,14 +131,30 @@ class KyuubiSparkSQLAstBuilder extends KyuubiSparkSQLBaseVisitor[AnyRef] with SQ
override def visitMultipartIdentifier(ctx: MultipartIdentifierContext): Seq[String] =
withOrigin(ctx) {
ctx.parts.asScala.map(_.getText).toSeq
ctx.parts.asScala.map(typedVisit[String]).toSeq
}
override def visitIdentifier(ctx: IdentifierContext): String = {
withOrigin(ctx) {
ctx.strictIdentifier() match {
case quotedContext: QuotedIdentifierAlternativeContext =>
typedVisit[String](quotedContext)
case _ => ctx.getText
}
}
}
override def visitQuotedIdentifier(ctx: QuotedIdentifierContext): String = {
withOrigin(ctx) {
ctx.BACKQUOTED_IDENTIFIER().getText.stripPrefix("`").stripSuffix("`").replace("``", "`")
}
}
override def visitZorderClause(ctx: ZorderClauseContext): Seq[UnresolvedAttribute] =
withOrigin(ctx) {
val res = ListBuffer[UnresolvedAttribute]()
ctx.multipartIdentifier().forEach { identifier =>
res += UnresolvedAttribute(identifier.parts.asScala.map(_.getText).toSeq)
res += UnresolvedAttribute(identifier.parts.asScala.map(typedVisit[String]).toSeq)
}
res.toSeq
}

View File

@ -772,6 +772,49 @@ trait ZorderSuiteBase extends KyuubiSparkSQLExtensionTest with ExpressionEvalHel
}
}
test("optimize sort by backquoted column name") {
withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
withTable("up") {
sql(s"DROP TABLE IF EXISTS up")
val target = Seq(
Seq(0, 0),
Seq(1, 0),
Seq(0, 1),
Seq(1, 1),
Seq(2, 0),
Seq(3, 0),
Seq(2, 1),
Seq(3, 1),
Seq(0, 2),
Seq(1, 2),
Seq(0, 3),
Seq(1, 3),
Seq(2, 2),
Seq(3, 2),
Seq(2, 3),
Seq(3, 3))
sql(s"CREATE TABLE up (c1 INT, `@c2` INT, c3 INT)")
sql(s"INSERT INTO TABLE up VALUES" +
"(0,0,2),(0,1,2),(0,2,1),(0,3,3)," +
"(1,0,4),(1,1,2),(1,2,1),(1,3,3)," +
"(2,0,2),(2,1,1),(2,2,5),(2,3,5)," +
"(3,0,3),(3,1,4),(3,2,9),(3,3,0)")
sql("OPTIMIZE up ZORDER BY c1, `@c2`")
val res = sql("SELECT c1, `@c2` FROM up").collect()
assert(res.length == 16)
for (i <- target.indices) {
val t = target(i)
val r = res(i)
assert(t(0) == r.getInt(0))
assert(t(1) == r.getInt(1))
}
}
}
}
def createParser: ParserInterface
}

View File

@ -131,14 +131,30 @@ class KyuubiSparkSQLAstBuilder extends KyuubiSparkSQLBaseVisitor[AnyRef] with SQ
override def visitMultipartIdentifier(ctx: MultipartIdentifierContext): Seq[String] =
withOrigin(ctx) {
ctx.parts.asScala.map(_.getText).toSeq
ctx.parts.asScala.map(typedVisit[String]).toSeq
}
override def visitIdentifier(ctx: IdentifierContext): String = {
withOrigin(ctx) {
ctx.strictIdentifier() match {
case quotedContext: QuotedIdentifierAlternativeContext =>
typedVisit[String](quotedContext)
case _ => ctx.getText
}
}
}
override def visitQuotedIdentifier(ctx: QuotedIdentifierContext): String = {
withOrigin(ctx) {
ctx.BACKQUOTED_IDENTIFIER().getText.stripPrefix("`").stripSuffix("`").replace("``", "`")
}
}
override def visitZorderClause(ctx: ZorderClauseContext): Seq[UnresolvedAttribute] =
withOrigin(ctx) {
val res = ListBuffer[UnresolvedAttribute]()
ctx.multipartIdentifier().forEach { identifier =>
res += UnresolvedAttribute(identifier.parts.asScala.map(_.getText).toSeq)
res += UnresolvedAttribute(identifier.parts.asScala.map(typedVisit[String]).toSeq)
}
res.toSeq
}

View File

@ -772,6 +772,49 @@ trait ZorderSuiteBase extends KyuubiSparkSQLExtensionTest with ExpressionEvalHel
}
}
test("optimize sort by backquoted column name") {
withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
withTable("up") {
sql(s"DROP TABLE IF EXISTS up")
val target = Seq(
Seq(0, 0),
Seq(1, 0),
Seq(0, 1),
Seq(1, 1),
Seq(2, 0),
Seq(3, 0),
Seq(2, 1),
Seq(3, 1),
Seq(0, 2),
Seq(1, 2),
Seq(0, 3),
Seq(1, 3),
Seq(2, 2),
Seq(3, 2),
Seq(2, 3),
Seq(3, 3))
sql(s"CREATE TABLE up (c1 INT, `@c2` INT, c3 INT)")
sql(s"INSERT INTO TABLE up VALUES" +
"(0,0,2),(0,1,2),(0,2,1),(0,3,3)," +
"(1,0,4),(1,1,2),(1,2,1),(1,3,3)," +
"(2,0,2),(2,1,1),(2,2,5),(2,3,5)," +
"(3,0,3),(3,1,4),(3,2,9),(3,3,0)")
sql("OPTIMIZE up ZORDER BY c1, `@c2`")
val res = sql("SELECT c1, `@c2` FROM up").collect()
assert(res.length == 16)
for (i <- target.indices) {
val t = target(i)
val r = res(i)
assert(t(0) == r.getInt(0))
assert(t(1) == r.getInt(1))
}
}
}
}
def createParser: ParserInterface
}