[KYUUBI #4098] Separate Trino lexer file
### _Why are the changes needed?_
There is something difference between Spark and Trino with string pattern.
Spark:
```
STRING
: '\'' ( ~'\'' | '\'\'' )* '\''
| 'R\'' (~'\'')* '\''
| 'R"'(~'"')* '"'
;
```
Trino:
```
STRING
: '\'' ( ~'\'' | '\'\'' )* '\''
```
Since this is for Trino only, so we should follow Trino behavior.
### _How was this patch tested?_
Pass CI
Closes #4098 from ulysses-you/string.
Closes #4098
00b77c54 [ulysses-you] address comments
f371f090 [ulysses-you] Separate Trino lexer file
Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: ulysses-you <ulyssesyou@apache.org>
This commit is contained in:
parent
15742ad3ac
commit
88e9498051
2
.gitignore
vendored
2
.gitignore
vendored
@ -81,5 +81,5 @@ conf/kyuubi-env.sh
|
||||
|
||||
# For Antlr
|
||||
kyuubi-server/gen
|
||||
kyuubi-server/src/main/antlr4/org/apache/kyuubi/sql/KyuubiSqlBaseLexer.tokens
|
||||
kyuubi-server/src/main/antlr4/org/apache/kyuubi/sql/*.tokens
|
||||
kyuubi-server/src/main/antlr4/org/apache/kyuubi/sql/gen/
|
||||
|
||||
@ -43,74 +43,13 @@ lexer grammar KyuubiSqlBaseLexer;
|
||||
|
||||
SEMICOLON: ';';
|
||||
|
||||
AND: 'AND';
|
||||
BQ: '`';
|
||||
BY: 'BY';
|
||||
COMMA: ',';
|
||||
DOT: '.';
|
||||
EQ : '=' | '==';
|
||||
NSEQ: '<=>';
|
||||
NEQ : '<>';
|
||||
NEQJ: '!=';
|
||||
LT : '<';
|
||||
LTE : '<=' | '!>';
|
||||
GT : '>';
|
||||
GTE : '>=' | '!<';
|
||||
LEFT_PAREN: '(';
|
||||
RIGHT_PAREN: ')';
|
||||
OR: 'OR';
|
||||
SEARCH_STRING_ESCAPE: '\'' '\\' '\'';
|
||||
|
||||
DESC: 'DESC';
|
||||
DESCRIBE: 'DESCRIBE';
|
||||
FROM: 'FROM';
|
||||
FALSE: 'FALSE';
|
||||
LIKE: 'LIKE';
|
||||
IN: 'IN';
|
||||
WHERE: 'WHERE';
|
||||
|
||||
KYUUBI: 'KYUUBI';
|
||||
KYUUBIADMIN: 'KYUUBIADMIN';
|
||||
|
||||
AUTO_INCREMENT: 'AUTO_INCREMENT';
|
||||
CASE_SENSITIVE: 'CASE_SENSITIVE';
|
||||
CREATE_PARAMS: 'CREATE_PARAMS';
|
||||
DATA_TYPE: 'DATA_TYPE';
|
||||
ESCAPE: 'ESCAPE';
|
||||
FIXED_PREC_SCALE: 'FIXED_PREC_SCALE';
|
||||
IS: 'IS';
|
||||
LITERAL_PREFIX: 'LITERAL_PREFIX';
|
||||
LITERAL_SUFFIX: 'LITERAL_SUFFIX';
|
||||
LOCAL_TYPE_NAME: 'LOCAL_TYPE_NAME';
|
||||
MAXIMUM_SCALE: 'MAXIMUM_SCALE';
|
||||
MINIMUM_SCALE: 'MINIMUM_SCALE';
|
||||
NULL: 'NULL';
|
||||
NULLABLE: 'NULLABLE';
|
||||
NUM_PREC_RADIX: 'NUM_PREC_RADIX';
|
||||
ORDER: 'ORDER';
|
||||
PRECISION: 'PRECISION';
|
||||
REMARKS: 'REMARKS';
|
||||
REF_GENERATION: 'REF_GENERATION';
|
||||
SEARCHABLE: 'SEARCHABLE';
|
||||
SELECT: 'SELECT';
|
||||
SESSION: 'SESSION';
|
||||
SQL_DATA_TYPE: 'SQL_DATA_TYPE';
|
||||
SQL_DATETIME_SUB: 'SQL_DATETIME_SUB';
|
||||
SYSTEM_JDBC_CATALOGS: 'SYSTEM.JDBC.CATALOGS';
|
||||
SYSTEM_JDBC_SCHEMAS: 'SYSTEM.JDBC.SCHEMAS';
|
||||
SYSTEM_JDBC_TABLES: 'SYSTEM.JDBC.TABLES';
|
||||
SYSTEM_JDBC_TABLE_TYPES: 'SYSTEM.JDBC.TABLE_TYPES';
|
||||
SYSTEM_JDBC_TYPES: 'SYSTEM.JDBC.TYPES';
|
||||
SELF_REFERENCING_COL_NAME: 'SELF_REFERENCING_COL_NAME';
|
||||
UNSIGNED_ATTRIBUTE: 'UNSIGNED_ATTRIBUTE';
|
||||
TABLE_CAT: 'TABLE_CAT';
|
||||
TABLE_CATALOG: 'TABLE_CATALOG';
|
||||
TABLE_NAME: 'TABLE_NAME';
|
||||
TABLE_SCHEM: 'TABLE_SCHEM';
|
||||
TABLE_TYPE: 'TABLE_TYPE';
|
||||
TYPE_CAT: 'TYPE_CAT';
|
||||
TYPE_NAME: 'TYPE_NAME';
|
||||
TYPE_SCHEM: 'TYPE_SCHEM';
|
||||
|
||||
BACKQUOTED_IDENTIFIER
|
||||
: '`' ( ~'`' | '``' )* '`'
|
||||
@ -129,7 +68,7 @@ IDENTIFIER
|
||||
;
|
||||
|
||||
STRING
|
||||
: '\'' ( ~'\'' | '\'\'' )* '\''
|
||||
: '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
|
||||
| 'R\'' (~'\'')* '\''
|
||||
| 'R"'(~'"')* '"'
|
||||
;
|
||||
|
||||
@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// This lexer should follow Trino `https://github.com/trinodb/trino/blob/master/core/trino-parser/src/main/antlr4/io/trino/sql/parser/SqlBase.g4`
|
||||
|
||||
lexer grammar KyuubiTrinoFeBaseLexer;
|
||||
|
||||
SEMICOLON: ';';
|
||||
LEFT_PAREN: '(';
|
||||
RIGHT_PAREN: ')';
|
||||
|
||||
AND: 'AND';
|
||||
BQ: '`';
|
||||
BY: 'BY';
|
||||
COMMA: ',';
|
||||
DOT: '.';
|
||||
EQ : '=' | '==';
|
||||
NSEQ: '<=>';
|
||||
NEQ : '<>';
|
||||
NEQJ: '!=';
|
||||
LT : '<';
|
||||
LTE : '<=' | '!>';
|
||||
GT : '>';
|
||||
GTE : '>=' | '!<';
|
||||
OR: 'OR';
|
||||
|
||||
FROM: 'FROM';
|
||||
FALSE: 'FALSE';
|
||||
LIKE: 'LIKE';
|
||||
IN: 'IN';
|
||||
WHERE: 'WHERE';
|
||||
|
||||
ESCAPE: 'ESCAPE';
|
||||
AUTO_INCREMENT: 'AUTO_INCREMENT';
|
||||
CASE_SENSITIVE: 'CASE_SENSITIVE';
|
||||
CREATE_PARAMS: 'CREATE_PARAMS';
|
||||
DATA_TYPE: 'DATA_TYPE';
|
||||
FIXED_PREC_SCALE: 'FIXED_PREC_SCALE';
|
||||
IS: 'IS';
|
||||
LITERAL_PREFIX: 'LITERAL_PREFIX';
|
||||
LITERAL_SUFFIX: 'LITERAL_SUFFIX';
|
||||
LOCAL_TYPE_NAME: 'LOCAL_TYPE_NAME';
|
||||
MAXIMUM_SCALE: 'MAXIMUM_SCALE';
|
||||
MINIMUM_SCALE: 'MINIMUM_SCALE';
|
||||
NULL: 'NULL';
|
||||
NULLABLE: 'NULLABLE';
|
||||
NUM_PREC_RADIX: 'NUM_PREC_RADIX';
|
||||
ORDER: 'ORDER';
|
||||
PRECISION: 'PRECISION';
|
||||
REMARKS: 'REMARKS';
|
||||
REF_GENERATION: 'REF_GENERATION';
|
||||
SEARCHABLE: 'SEARCHABLE';
|
||||
SELECT: 'SELECT';
|
||||
SQL_DATA_TYPE: 'SQL_DATA_TYPE';
|
||||
SQL_DATETIME_SUB: 'SQL_DATETIME_SUB';
|
||||
SYSTEM_JDBC_CATALOGS: 'SYSTEM.JDBC.CATALOGS';
|
||||
SYSTEM_JDBC_SCHEMAS: 'SYSTEM.JDBC.SCHEMAS';
|
||||
SYSTEM_JDBC_TABLES: 'SYSTEM.JDBC.TABLES';
|
||||
SYSTEM_JDBC_TABLE_TYPES: 'SYSTEM.JDBC.TABLE_TYPES';
|
||||
SYSTEM_JDBC_TYPES: 'SYSTEM.JDBC.TYPES';
|
||||
SELF_REFERENCING_COL_NAME: 'SELF_REFERENCING_COL_NAME';
|
||||
UNSIGNED_ATTRIBUTE: 'UNSIGNED_ATTRIBUTE';
|
||||
TABLE_CAT: 'TABLE_CAT';
|
||||
TABLE_CATALOG: 'TABLE_CATALOG';
|
||||
TABLE_NAME: 'TABLE_NAME';
|
||||
TABLE_SCHEM: 'TABLE_SCHEM';
|
||||
TABLE_TYPE: 'TABLE_TYPE';
|
||||
TYPE_CAT: 'TYPE_CAT';
|
||||
TYPE_NAME: 'TYPE_NAME';
|
||||
TYPE_SCHEM: 'TYPE_SCHEM';
|
||||
|
||||
fragment SEARCH_STRING_ESCAPE: '\'' '\\' '\'';
|
||||
|
||||
STRING_ESCAPE
|
||||
: SEARCH_STRING_ESCAPE
|
||||
;
|
||||
|
||||
STRING
|
||||
: '\'' ( ~'\'' | '\'\'' )* '\''
|
||||
;
|
||||
|
||||
SIMPLE_COMMENT
|
||||
: '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
|
||||
;
|
||||
|
||||
BRACKETED_COMMENT
|
||||
: '/*' .*? '*/' -> channel(HIDDEN)
|
||||
;
|
||||
|
||||
WS : [ \r\n\t]+ -> channel(HIDDEN)
|
||||
;
|
||||
|
||||
// Catch-all for anything we can't recognize.
|
||||
// We use this to be able to ignore and recover all the text
|
||||
// when splitting statements with DelimiterLexer
|
||||
UNRECOGNIZED
|
||||
: .
|
||||
;
|
||||
@ -17,7 +17,7 @@
|
||||
|
||||
parser grammar KyuubiTrinoFeBaseParser;
|
||||
|
||||
options { tokenVocab = KyuubiSqlBaseLexer; }
|
||||
options { tokenVocab = KyuubiTrinoFeBaseLexer; }
|
||||
|
||||
singleStatement
|
||||
: statement SEMICOLON* EOF
|
||||
@ -25,42 +25,42 @@ singleStatement
|
||||
|
||||
statement
|
||||
: SELECT TABLE_SCHEM COMMA TABLE_CATALOG FROM SYSTEM_JDBC_SCHEMAS
|
||||
(WHERE (TABLE_CATALOG EQ catalog=STRING+)? AND? (TABLE_SCHEM LIKE schema=STRING+)?)?
|
||||
ORDER BY TABLE_CATALOG COMMA TABLE_SCHEM #getSchemas
|
||||
| SELECT TABLE_CAT FROM SYSTEM_JDBC_CATALOGS ORDER BY TABLE_CAT #getCatalogs
|
||||
| SELECT TABLE_TYPE FROM SYSTEM_JDBC_TABLE_TYPES ORDER BY TABLE_TYPE #getTableTypes
|
||||
(WHERE (TABLE_CATALOG EQ catalog=stringLit)? AND? (TABLE_SCHEM LIKE schema=stringLit)?)?
|
||||
ORDER BY TABLE_CATALOG COMMA TABLE_SCHEM #getSchemas
|
||||
| SELECT TABLE_CAT FROM SYSTEM_JDBC_CATALOGS ORDER BY TABLE_CAT #getCatalogs
|
||||
| SELECT TABLE_TYPE FROM SYSTEM_JDBC_TABLE_TYPES ORDER BY TABLE_TYPE #getTableTypes
|
||||
| SELECT TYPE_NAME COMMA DATA_TYPE COMMA PRECISION COMMA LITERAL_PREFIX COMMA
|
||||
LITERAL_SUFFIX COMMA CREATE_PARAMS COMMA NULLABLE COMMA CASE_SENSITIVE COMMA
|
||||
SEARCHABLE COMMA UNSIGNED_ATTRIBUTE COMMA FIXED_PREC_SCALE COMMA AUTO_INCREMENT
|
||||
COMMA LOCAL_TYPE_NAME COMMA MINIMUM_SCALE COMMA MAXIMUM_SCALE COMMA SQL_DATA_TYPE
|
||||
COMMA SQL_DATETIME_SUB COMMA NUM_PREC_RADIX FROM SYSTEM_JDBC_TYPES ORDER BY DATA_TYPE #getTypeInfo
|
||||
COMMA SQL_DATETIME_SUB COMMA NUM_PREC_RADIX FROM SYSTEM_JDBC_TYPES ORDER BY DATA_TYPE #getTypeInfo
|
||||
| SELECT TABLE_CAT COMMA TABLE_SCHEM COMMA TABLE_NAME COMMA TABLE_TYPE COMMA REMARKS COMMA
|
||||
TYPE_CAT COMMA TYPE_SCHEM COMMA TYPE_NAME COMMA SELF_REFERENCING_COL_NAME COMMA REF_GENERATION
|
||||
FROM SYSTEM_JDBC_TABLES
|
||||
(WHERE tableCatalogFilter? AND? tableSchemaFilter? AND? tableNameFilter? AND? tableTypeFilter?)?
|
||||
ORDER BY TABLE_TYPE COMMA TABLE_CAT COMMA TABLE_SCHEM COMMA TABLE_NAME #getTables
|
||||
| .*? #passThrough
|
||||
ORDER BY TABLE_TYPE COMMA TABLE_CAT COMMA TABLE_SCHEM COMMA TABLE_NAME #getTables
|
||||
| .*? #passThrough
|
||||
;
|
||||
|
||||
tableCatalogFilter
|
||||
: TABLE_CAT IS NULL #nullCatalog
|
||||
| TABLE_CAT EQ catalog=STRING+ #catalogFilter
|
||||
: TABLE_CAT IS NULL #nullCatalog
|
||||
| TABLE_CAT EQ catalog=stringLit #catalogFilter
|
||||
;
|
||||
|
||||
tableSchemaFilter
|
||||
: TABLE_SCHEM IS NULL #nulTableSchema
|
||||
| TABLE_SCHEM LIKE schemaPattern=STRING+ ESCAPE SEARCH_STRING_ESCAPE #schemaFilter
|
||||
: TABLE_SCHEM IS NULL #nulTableSchema
|
||||
| TABLE_SCHEM LIKE schemaPattern=stringLit ESCAPE STRING_ESCAPE #schemaFilter
|
||||
;
|
||||
|
||||
tableNameFilter
|
||||
: TABLE_NAME LIKE tableNamePattern=STRING+ ESCAPE SEARCH_STRING_ESCAPE
|
||||
: TABLE_NAME LIKE tableNamePattern=stringLit ESCAPE STRING_ESCAPE
|
||||
;
|
||||
|
||||
tableTypeFilter
|
||||
: FALSE #tableTypesAlwaysFalse
|
||||
| TABLE_TYPE IN '(' stirngInValue (',' stirngInValue)* ')' #typesFilter
|
||||
: FALSE #tableTypesAlwaysFalse
|
||||
| TABLE_TYPE IN LEFT_PAREN stringLit (COMMA stringLit)* RIGHT_PAREN #typesFilter
|
||||
;
|
||||
|
||||
stirngInValue
|
||||
: STRING+
|
||||
stringLit
|
||||
: STRING
|
||||
;
|
||||
|
||||
@ -112,6 +112,6 @@ class KyuubiTrinoFeAstBuilder extends KyuubiTrinoFeBaseParserBaseVisitor[AnyRef]
|
||||
}
|
||||
|
||||
override def visitTypesFilter(ctx: TypesFilterContext): List[String] = {
|
||||
ctx.stirngInValue().asScala.map(v => unescapeSQLString(v.getText)).toList
|
||||
ctx.stringLit().asScala.map(v => unescapeSQLString(v.getText)).toList
|
||||
}
|
||||
}
|
||||
|
||||
@ -22,7 +22,7 @@ import org.antlr.v4.runtime.atn.PredictionMode
|
||||
import org.antlr.v4.runtime.misc.ParseCancellationException
|
||||
import org.antlr.v4.runtime.tree.ParseTree
|
||||
|
||||
import org.apache.kyuubi.sql.{KyuubiSqlBaseLexer, KyuubiTrinoFeBaseParser}
|
||||
import org.apache.kyuubi.sql.{KyuubiTrinoFeBaseLexer, KyuubiTrinoFeBaseParser}
|
||||
import org.apache.kyuubi.sql.parser.{KyuubiParserBase, PostProcessor, UpperCaseCharStream}
|
||||
|
||||
class KyuubiTrinoFeParser extends KyuubiParserBase[KyuubiTrinoFeBaseParser] {
|
||||
@ -30,7 +30,7 @@ class KyuubiTrinoFeParser extends KyuubiParserBase[KyuubiTrinoFeBaseParser] {
|
||||
override lazy val astBuilder = new KyuubiTrinoFeAstBuilder
|
||||
|
||||
protected def parse[T](command: String)(toResult: KyuubiTrinoFeBaseParser => T): T = {
|
||||
val lexer = new KyuubiSqlBaseLexer(
|
||||
val lexer = new KyuubiTrinoFeBaseLexer(
|
||||
new UpperCaseCharStream(CharStreams.fromString(command)))
|
||||
lexer.removeErrorListeners()
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user