[KYUUBI #4098] Separate Trino lexer file

### _Why are the changes needed?_

There is something difference between Spark and Trino with string pattern.
Spark:
```
STRING
    : '\'' ( ~'\'' | '\'\'' )* '\''
    | 'R\'' (~'\'')* '\''
    | 'R"'(~'"')* '"'
    ;
```

Trino:
```
STRING
    : '\'' ( ~'\'' | '\'\'' )* '\''
```

Since this is for Trino only, so we should follow Trino behavior.

### _How was this patch tested?_
Pass CI

Closes #4098 from ulysses-you/string.

Closes #4098

00b77c54 [ulysses-you] address comments
f371f090 [ulysses-you] Separate Trino lexer file

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: ulysses-you <ulyssesyou@apache.org>
This commit is contained in:
ulysses-you 2023-01-06 16:10:50 +08:00 committed by ulysses-you
parent 15742ad3ac
commit 88e9498051
No known key found for this signature in database
GPG Key ID: 4C500BC62D576766
6 changed files with 134 additions and 83 deletions

2
.gitignore vendored
View File

@ -81,5 +81,5 @@ conf/kyuubi-env.sh
# For Antlr
kyuubi-server/gen
kyuubi-server/src/main/antlr4/org/apache/kyuubi/sql/KyuubiSqlBaseLexer.tokens
kyuubi-server/src/main/antlr4/org/apache/kyuubi/sql/*.tokens
kyuubi-server/src/main/antlr4/org/apache/kyuubi/sql/gen/

View File

@ -43,74 +43,13 @@ lexer grammar KyuubiSqlBaseLexer;
SEMICOLON: ';';
AND: 'AND';
BQ: '`';
BY: 'BY';
COMMA: ',';
DOT: '.';
EQ : '=' | '==';
NSEQ: '<=>';
NEQ : '<>';
NEQJ: '!=';
LT : '<';
LTE : '<=' | '!>';
GT : '>';
GTE : '>=' | '!<';
LEFT_PAREN: '(';
RIGHT_PAREN: ')';
OR: 'OR';
SEARCH_STRING_ESCAPE: '\'' '\\' '\'';
DESC: 'DESC';
DESCRIBE: 'DESCRIBE';
FROM: 'FROM';
FALSE: 'FALSE';
LIKE: 'LIKE';
IN: 'IN';
WHERE: 'WHERE';
KYUUBI: 'KYUUBI';
KYUUBIADMIN: 'KYUUBIADMIN';
AUTO_INCREMENT: 'AUTO_INCREMENT';
CASE_SENSITIVE: 'CASE_SENSITIVE';
CREATE_PARAMS: 'CREATE_PARAMS';
DATA_TYPE: 'DATA_TYPE';
ESCAPE: 'ESCAPE';
FIXED_PREC_SCALE: 'FIXED_PREC_SCALE';
IS: 'IS';
LITERAL_PREFIX: 'LITERAL_PREFIX';
LITERAL_SUFFIX: 'LITERAL_SUFFIX';
LOCAL_TYPE_NAME: 'LOCAL_TYPE_NAME';
MAXIMUM_SCALE: 'MAXIMUM_SCALE';
MINIMUM_SCALE: 'MINIMUM_SCALE';
NULL: 'NULL';
NULLABLE: 'NULLABLE';
NUM_PREC_RADIX: 'NUM_PREC_RADIX';
ORDER: 'ORDER';
PRECISION: 'PRECISION';
REMARKS: 'REMARKS';
REF_GENERATION: 'REF_GENERATION';
SEARCHABLE: 'SEARCHABLE';
SELECT: 'SELECT';
SESSION: 'SESSION';
SQL_DATA_TYPE: 'SQL_DATA_TYPE';
SQL_DATETIME_SUB: 'SQL_DATETIME_SUB';
SYSTEM_JDBC_CATALOGS: 'SYSTEM.JDBC.CATALOGS';
SYSTEM_JDBC_SCHEMAS: 'SYSTEM.JDBC.SCHEMAS';
SYSTEM_JDBC_TABLES: 'SYSTEM.JDBC.TABLES';
SYSTEM_JDBC_TABLE_TYPES: 'SYSTEM.JDBC.TABLE_TYPES';
SYSTEM_JDBC_TYPES: 'SYSTEM.JDBC.TYPES';
SELF_REFERENCING_COL_NAME: 'SELF_REFERENCING_COL_NAME';
UNSIGNED_ATTRIBUTE: 'UNSIGNED_ATTRIBUTE';
TABLE_CAT: 'TABLE_CAT';
TABLE_CATALOG: 'TABLE_CATALOG';
TABLE_NAME: 'TABLE_NAME';
TABLE_SCHEM: 'TABLE_SCHEM';
TABLE_TYPE: 'TABLE_TYPE';
TYPE_CAT: 'TYPE_CAT';
TYPE_NAME: 'TYPE_NAME';
TYPE_SCHEM: 'TYPE_SCHEM';
BACKQUOTED_IDENTIFIER
: '`' ( ~'`' | '``' )* '`'
@ -129,7 +68,7 @@ IDENTIFIER
;
STRING
: '\'' ( ~'\'' | '\'\'' )* '\''
: '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
| 'R\'' (~'\'')* '\''
| 'R"'(~'"')* '"'
;

View File

@ -0,0 +1,112 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// This lexer should follow Trino `https://github.com/trinodb/trino/blob/master/core/trino-parser/src/main/antlr4/io/trino/sql/parser/SqlBase.g4`
lexer grammar KyuubiTrinoFeBaseLexer;
SEMICOLON: ';';
LEFT_PAREN: '(';
RIGHT_PAREN: ')';
AND: 'AND';
BQ: '`';
BY: 'BY';
COMMA: ',';
DOT: '.';
EQ : '=' | '==';
NSEQ: '<=>';
NEQ : '<>';
NEQJ: '!=';
LT : '<';
LTE : '<=' | '!>';
GT : '>';
GTE : '>=' | '!<';
OR: 'OR';
FROM: 'FROM';
FALSE: 'FALSE';
LIKE: 'LIKE';
IN: 'IN';
WHERE: 'WHERE';
ESCAPE: 'ESCAPE';
AUTO_INCREMENT: 'AUTO_INCREMENT';
CASE_SENSITIVE: 'CASE_SENSITIVE';
CREATE_PARAMS: 'CREATE_PARAMS';
DATA_TYPE: 'DATA_TYPE';
FIXED_PREC_SCALE: 'FIXED_PREC_SCALE';
IS: 'IS';
LITERAL_PREFIX: 'LITERAL_PREFIX';
LITERAL_SUFFIX: 'LITERAL_SUFFIX';
LOCAL_TYPE_NAME: 'LOCAL_TYPE_NAME';
MAXIMUM_SCALE: 'MAXIMUM_SCALE';
MINIMUM_SCALE: 'MINIMUM_SCALE';
NULL: 'NULL';
NULLABLE: 'NULLABLE';
NUM_PREC_RADIX: 'NUM_PREC_RADIX';
ORDER: 'ORDER';
PRECISION: 'PRECISION';
REMARKS: 'REMARKS';
REF_GENERATION: 'REF_GENERATION';
SEARCHABLE: 'SEARCHABLE';
SELECT: 'SELECT';
SQL_DATA_TYPE: 'SQL_DATA_TYPE';
SQL_DATETIME_SUB: 'SQL_DATETIME_SUB';
SYSTEM_JDBC_CATALOGS: 'SYSTEM.JDBC.CATALOGS';
SYSTEM_JDBC_SCHEMAS: 'SYSTEM.JDBC.SCHEMAS';
SYSTEM_JDBC_TABLES: 'SYSTEM.JDBC.TABLES';
SYSTEM_JDBC_TABLE_TYPES: 'SYSTEM.JDBC.TABLE_TYPES';
SYSTEM_JDBC_TYPES: 'SYSTEM.JDBC.TYPES';
SELF_REFERENCING_COL_NAME: 'SELF_REFERENCING_COL_NAME';
UNSIGNED_ATTRIBUTE: 'UNSIGNED_ATTRIBUTE';
TABLE_CAT: 'TABLE_CAT';
TABLE_CATALOG: 'TABLE_CATALOG';
TABLE_NAME: 'TABLE_NAME';
TABLE_SCHEM: 'TABLE_SCHEM';
TABLE_TYPE: 'TABLE_TYPE';
TYPE_CAT: 'TYPE_CAT';
TYPE_NAME: 'TYPE_NAME';
TYPE_SCHEM: 'TYPE_SCHEM';
fragment SEARCH_STRING_ESCAPE: '\'' '\\' '\'';
STRING_ESCAPE
: SEARCH_STRING_ESCAPE
;
STRING
: '\'' ( ~'\'' | '\'\'' )* '\''
;
SIMPLE_COMMENT
: '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
;
BRACKETED_COMMENT
: '/*' .*? '*/' -> channel(HIDDEN)
;
WS : [ \r\n\t]+ -> channel(HIDDEN)
;
// Catch-all for anything we can't recognize.
// We use this to be able to ignore and recover all the text
// when splitting statements with DelimiterLexer
UNRECOGNIZED
: .
;

View File

@ -17,7 +17,7 @@
parser grammar KyuubiTrinoFeBaseParser;
options { tokenVocab = KyuubiSqlBaseLexer; }
options { tokenVocab = KyuubiTrinoFeBaseLexer; }
singleStatement
: statement SEMICOLON* EOF
@ -25,42 +25,42 @@ singleStatement
statement
: SELECT TABLE_SCHEM COMMA TABLE_CATALOG FROM SYSTEM_JDBC_SCHEMAS
(WHERE (TABLE_CATALOG EQ catalog=STRING+)? AND? (TABLE_SCHEM LIKE schema=STRING+)?)?
ORDER BY TABLE_CATALOG COMMA TABLE_SCHEM #getSchemas
| SELECT TABLE_CAT FROM SYSTEM_JDBC_CATALOGS ORDER BY TABLE_CAT #getCatalogs
| SELECT TABLE_TYPE FROM SYSTEM_JDBC_TABLE_TYPES ORDER BY TABLE_TYPE #getTableTypes
(WHERE (TABLE_CATALOG EQ catalog=stringLit)? AND? (TABLE_SCHEM LIKE schema=stringLit)?)?
ORDER BY TABLE_CATALOG COMMA TABLE_SCHEM #getSchemas
| SELECT TABLE_CAT FROM SYSTEM_JDBC_CATALOGS ORDER BY TABLE_CAT #getCatalogs
| SELECT TABLE_TYPE FROM SYSTEM_JDBC_TABLE_TYPES ORDER BY TABLE_TYPE #getTableTypes
| SELECT TYPE_NAME COMMA DATA_TYPE COMMA PRECISION COMMA LITERAL_PREFIX COMMA
LITERAL_SUFFIX COMMA CREATE_PARAMS COMMA NULLABLE COMMA CASE_SENSITIVE COMMA
SEARCHABLE COMMA UNSIGNED_ATTRIBUTE COMMA FIXED_PREC_SCALE COMMA AUTO_INCREMENT
COMMA LOCAL_TYPE_NAME COMMA MINIMUM_SCALE COMMA MAXIMUM_SCALE COMMA SQL_DATA_TYPE
COMMA SQL_DATETIME_SUB COMMA NUM_PREC_RADIX FROM SYSTEM_JDBC_TYPES ORDER BY DATA_TYPE #getTypeInfo
COMMA SQL_DATETIME_SUB COMMA NUM_PREC_RADIX FROM SYSTEM_JDBC_TYPES ORDER BY DATA_TYPE #getTypeInfo
| SELECT TABLE_CAT COMMA TABLE_SCHEM COMMA TABLE_NAME COMMA TABLE_TYPE COMMA REMARKS COMMA
TYPE_CAT COMMA TYPE_SCHEM COMMA TYPE_NAME COMMA SELF_REFERENCING_COL_NAME COMMA REF_GENERATION
FROM SYSTEM_JDBC_TABLES
(WHERE tableCatalogFilter? AND? tableSchemaFilter? AND? tableNameFilter? AND? tableTypeFilter?)?
ORDER BY TABLE_TYPE COMMA TABLE_CAT COMMA TABLE_SCHEM COMMA TABLE_NAME #getTables
| .*? #passThrough
ORDER BY TABLE_TYPE COMMA TABLE_CAT COMMA TABLE_SCHEM COMMA TABLE_NAME #getTables
| .*? #passThrough
;
tableCatalogFilter
: TABLE_CAT IS NULL #nullCatalog
| TABLE_CAT EQ catalog=STRING+ #catalogFilter
: TABLE_CAT IS NULL #nullCatalog
| TABLE_CAT EQ catalog=stringLit #catalogFilter
;
tableSchemaFilter
: TABLE_SCHEM IS NULL #nulTableSchema
| TABLE_SCHEM LIKE schemaPattern=STRING+ ESCAPE SEARCH_STRING_ESCAPE #schemaFilter
: TABLE_SCHEM IS NULL #nulTableSchema
| TABLE_SCHEM LIKE schemaPattern=stringLit ESCAPE STRING_ESCAPE #schemaFilter
;
tableNameFilter
: TABLE_NAME LIKE tableNamePattern=STRING+ ESCAPE SEARCH_STRING_ESCAPE
: TABLE_NAME LIKE tableNamePattern=stringLit ESCAPE STRING_ESCAPE
;
tableTypeFilter
: FALSE #tableTypesAlwaysFalse
| TABLE_TYPE IN '(' stirngInValue (',' stirngInValue)* ')' #typesFilter
: FALSE #tableTypesAlwaysFalse
| TABLE_TYPE IN LEFT_PAREN stringLit (COMMA stringLit)* RIGHT_PAREN #typesFilter
;
stirngInValue
: STRING+
stringLit
: STRING
;

View File

@ -112,6 +112,6 @@ class KyuubiTrinoFeAstBuilder extends KyuubiTrinoFeBaseParserBaseVisitor[AnyRef]
}
override def visitTypesFilter(ctx: TypesFilterContext): List[String] = {
ctx.stirngInValue().asScala.map(v => unescapeSQLString(v.getText)).toList
ctx.stringLit().asScala.map(v => unescapeSQLString(v.getText)).toList
}
}

View File

@ -22,7 +22,7 @@ import org.antlr.v4.runtime.atn.PredictionMode
import org.antlr.v4.runtime.misc.ParseCancellationException
import org.antlr.v4.runtime.tree.ParseTree
import org.apache.kyuubi.sql.{KyuubiSqlBaseLexer, KyuubiTrinoFeBaseParser}
import org.apache.kyuubi.sql.{KyuubiTrinoFeBaseLexer, KyuubiTrinoFeBaseParser}
import org.apache.kyuubi.sql.parser.{KyuubiParserBase, PostProcessor, UpperCaseCharStream}
class KyuubiTrinoFeParser extends KyuubiParserBase[KyuubiTrinoFeBaseParser] {
@ -30,7 +30,7 @@ class KyuubiTrinoFeParser extends KyuubiParserBase[KyuubiTrinoFeBaseParser] {
override lazy val astBuilder = new KyuubiTrinoFeAstBuilder
protected def parse[T](command: String)(toResult: KyuubiTrinoFeBaseParser => T): T = {
val lexer = new KyuubiSqlBaseLexer(
val lexer = new KyuubiTrinoFeBaseLexer(
new UpperCaseCharStream(CharStreams.fromString(command)))
lexer.removeErrorListeners()