[jOOQ/jOOQ#10585] ParserImpl.KEYWORDS_IN_SELECT and other such lists should be tries for improved performance

This commit is contained in:
Lukas Eder 2024-05-22 16:19:28 +02:00
parent 545c218503
commit 8d7af79d42
3 changed files with 233 additions and 7 deletions

View File

@ -0,0 +1,191 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Other licenses:
* -----------------------------------------------------------------------------
* Commercial licenses for this work are available. These replace the above
* ASL 2.0 and offer limited warranties, support, maintenance, and commercial
* database integrations.
*
* For more information, please visit: https://www.jooq.org/legal/licensing
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*/
package org.jooq.impl;
import java.util.LinkedHashSet;
import java.util.Set;
/**
* A lookup utility for keywords stored in a Trie.
*
* @author Lukas Eder
*/
class KeywordLookup {
private final KeywordTrie trie;
KeywordLookup() {
this(new KeywordTrie());
}
private KeywordLookup(KeywordTrie trie) {
this.trie = trie;
}
static final KeywordLookup from(String... keywords) {
KeywordLookup result = new KeywordLookup();
for (String keyword : keywords)
result.insert(keyword);
return result;
}
final int skipWhitespace(String text, int i) {
int l = text.length();
while (Character.isWhitespace(text.charAt(i)) && i + 1 < l)
i++;
return i;
}
final boolean lookup(String text) {
return lookup(text.toCharArray(), 0, i -> skipWhitespace(text, i)) == text.length();
}
final int lookup(char[] text, int position, IntToIntFunction afterWhitespace) {
KeywordTrie t = trie;
for (int i = position; i < text.length && t != null; i++) {
char c = upper(character(text, i));
if ((t = t.next[encode(c)]) != null) {
if (t.terminal && !isIdentifierPart(character(text, i + 1)) && character(text, i + 1) != '.')
return i + 1;
if (Character.isWhitespace(c))
i = afterWhitespace.applyAsInt(i) - 1;
}
}
return position;
}
private final boolean isIdentifierPart(char character) {
return Character.isJavaIdentifierPart(character)
|| ((character == '@'
|| character == '#')
&& character != ';');
}
static final char character(char[] text, int pos) {
return pos >= 0 && pos < text.length ? text[pos] : ' ';
}
/**
* Insert a new keyword into the trie.
*
* @param keyword The keyword
* @return Whether the trie changed as a result of this operation.
*/
final boolean insert(String keyword) {
boolean result = false;
KeywordTrie t = trie;
for (int i = 0; i < keyword.length(); i++) {
int pos = encode(keyword.charAt(i));
if (result |= t.next[pos] == null)
t = t.next[pos] = new KeywordTrie();
else
t = t.next[pos];
}
if (t.terminal)
return result;
else
return t.terminal = true;
}
/**
* Get a {@link Set} representation of this lookup's trie.
*/
final Set<String> set() {
return set(new LinkedHashSet<>(), new StringBuilder(), trie);
}
private static final int encode(char c) {
if (c == ' ')
return 0;
char C = Character.toUpperCase(c);
if (C >= 'A' && C <= 'Z')
return C - '@';
else
return 0;
}
private static final char decode(int i) {
return i == 0 ? ' ' : (char) (i + '@');
}
private static final char upper(char c) {
return c >= 'a' && c <= 'z' ? (char) (c - ('a' - 'A')) : c;
}
private static final Set<String> set(Set<String> s, StringBuilder sb, KeywordTrie t) {
if (t.terminal)
s.add(sb.toString());
for (int i = 0; i < t.next.length; i++) {
if (t.next[i] != null) {
set(s, sb.append(decode(i)), t.next[i]);
sb.deleteCharAt(sb.length() - 1);
}
}
return s;
}
private static class KeywordTrie {
final KeywordTrie[] next = new KeywordTrie[27];
boolean terminal;
@Override
public String toString() {
return "Terminal: " + terminal + ", Trie: " + new KeywordLookup(this).toString();
}
}
@Override
public String toString() {
return set().toString();
}
}

View File

@ -2310,9 +2310,9 @@ final class DefaultParseContext extends AbstractScope implements ParseContext {
}
parseKeywordIf("FROM");
Table<?> table = scope.scope(parseJoinedTable(() -> peekKeyword(KEYWORDS_IN_DELETE_FROM)));
Table<?> table = scope.scope(parseJoinedTable(() -> peekKeyword(KEYWORD_LOOKUP_IN_DELETE_FROM)));
DeleteUsingStep<?> s1 = with == null ? dsl.delete(table) : with.delete(table);
DeleteWhereStep<?> s2 = parseKeywordIf("USING", "FROM") ? s1.using(parseList(',', t -> scope.scope(parseJoinedTable(() -> peekKeyword(KEYWORDS_IN_DELETE_FROM))))) : s1;
DeleteWhereStep<?> s2 = parseKeywordIf("USING", "FROM") ? s1.using(parseList(',', t -> scope.scope(parseJoinedTable(() -> peekKeyword(KEYWORD_LOOKUP_IN_DELETE_FROM))))) : s1;
DeleteOrderByStep<?> s3 = parseKeywordIf("ALL")
? s2
: parseKeywordIf("WHERE")
@ -2503,9 +2503,9 @@ final class DefaultParseContext extends AbstractScope implements ParseContext {
// percent = parseKeywordIf("PERCENT") && requireProEdition();
}
Table<?> table = scope.scope(parseJoinedTable(() -> peekKeyword(KEYWORDS_IN_UPDATE_FROM)));
Table<?> table = scope.scope(parseJoinedTable(() -> peekKeyword(KEYWORD_LOOKUP_IN_UPDATE_FROM)));
UpdateSetFirstStep<?> s1 = (with == null ? dsl.update(table) : with.update(table));
List<Table<?>> from = parseKeywordIf("FROM") ? parseList(',', t -> scope.scope(parseJoinedTable(() -> peekKeyword(KEYWORDS_IN_UPDATE_FROM)))) : null;
List<Table<?>> from = parseKeywordIf("FROM") ? parseList(',', t -> scope.scope(parseJoinedTable(() -> peekKeyword(KEYWORD_LOOKUP_IN_UPDATE_FROM)))) : null;
parseKeyword("SET");
UpdateFromStep<?> s2;
@ -2530,7 +2530,7 @@ final class DefaultParseContext extends AbstractScope implements ParseContext {
UpdateWhereStep<?> s3 = from != null
? s2.from(from)
: parseKeywordIf("FROM")
? s2.from(parseList(',', t -> parseJoinedTable(() -> peekKeyword(KEYWORDS_IN_UPDATE_FROM))))
? s2.from(parseList(',', t -> parseJoinedTable(() -> peekKeyword(KEYWORD_LOOKUP_IN_UPDATE_FROM))))
: s2;
UpdateOrderByStep<?> s4 = parseKeywordIf("ALL")
? s3
@ -7147,7 +7147,7 @@ final class DefaultParseContext extends AbstractScope implements ParseContext {
@Override
public final Table<?> parseTable() {
return parseJoinedTable(() -> peekKeyword(KEYWORDS_IN_SELECT_FROM));
return parseJoinedTable(() -> peekKeyword(KEYWORD_LOOKUP_IN_SELECT_FROM));
}
private final Table<?> parseLateral(BooleanSupplier forbiddenKeywords) {
@ -7972,7 +7972,7 @@ final class DefaultParseContext extends AbstractScope implements ParseContext {
if (parseKeywordIf("AS"))
alias = parseIdentifier(true, false);
else if (!peekKeyword(KEYWORDS_IN_SELECT) && !peekKeyword(KEYWORDS_IN_STATEMENTS))
else if (!peekKeyword(KEYWORD_LOOKUP_IN_SELECT) && !peekKeyword(KEYWORD_LOOKUP_IN_STATEMENTS))
alias = parseIdentifierIf(true, false);
}
@ -14966,6 +14966,20 @@ final class DefaultParseContext extends AbstractScope implements ParseContext {
return true;
}
private final boolean peekKeyword(KeywordLookup lookup) {
int pos = afterWhitespace(position(), false);
int p = lookup.lookup(sql, pos, i -> afterWhitespace(i, false));
if (p == pos)
return false;
// [#8806] A keyword that is followed by a period is very likely an identifier
if (isIdentifierPart(p) || character(p) == '.')
return false;
return true;
}
private final boolean parseWhitespaceIf() {
positionBeforeWhitespace = position();
position(afterWhitespace(positionBeforeWhitespace));
@ -15179,6 +15193,8 @@ final class DefaultParseContext extends AbstractScope implements ParseContext {
"WITH",
};
private static final KeywordLookup KEYWORD_LOOKUP_IN_STATEMENTS = KeywordLookup.from(KEYWORDS_IN_STATEMENTS);
private static final String[] KEYWORDS_IN_SELECT = {
"CONNECT BY",
"EXCEPT",
@ -15210,6 +15226,8 @@ final class DefaultParseContext extends AbstractScope implements ParseContext {
"WINDOW",
};
private static final KeywordLookup KEYWORD_LOOKUP_IN_SELECT = KeywordLookup.from(KEYWORDS_IN_SELECT);
private static final String[] KEYWORDS_IN_FROM = {
"ANTI JOIN",
"CROSS APPLY",
@ -15270,6 +15288,8 @@ final class DefaultParseContext extends AbstractScope implements ParseContext {
"USING"
};
private static final KeywordLookup KEYWORD_LOOKUP_IN_FROM = KeywordLookup.from(KEYWORDS_IN_FROM);
private static final String[] KEYWORDS_IN_SELECT_FROM;
static {
@ -15314,6 +15334,8 @@ final class DefaultParseContext extends AbstractScope implements ParseContext {
KEYWORDS_IN_SELECT_FROM = set.toArray(EMPTY_STRING);
}
private static final KeywordLookup KEYWORD_LOOKUP_IN_SELECT_FROM = KeywordLookup.from(KEYWORDS_IN_SELECT_FROM);
private static final String[] KEYWORDS_IN_UPDATE_FROM;
static {
@ -15322,6 +15344,8 @@ final class DefaultParseContext extends AbstractScope implements ParseContext {
KEYWORDS_IN_UPDATE_FROM = set.toArray(EMPTY_STRING);
}
private static final KeywordLookup KEYWORD_LOOKUP_IN_UPDATE_FROM = KeywordLookup.from(KEYWORDS_IN_UPDATE_FROM);
private static final String[] KEYWORDS_IN_DELETE_FROM;
static {
@ -15331,6 +15355,8 @@ final class DefaultParseContext extends AbstractScope implements ParseContext {
KEYWORDS_IN_DELETE_FROM = set.toArray(EMPTY_STRING);
}
private static final KeywordLookup KEYWORD_LOOKUP_IN_DELETE_FROM = KeywordLookup.from(KEYWORDS_IN_DELETE_FROM);
private static final String[] PIVOT_KEYWORDS = {
"FOR"
};

View File

@ -139,3 +139,12 @@ interface ObjIntFunction<T, R> {
interface ObjIntPredicate<T> {
boolean test(T t, int i);
}
/**
* A missing primitive type {@link Function} for ints resturning ints.
*/
@FunctionalInterface
interface IntToIntFunction {
int applyAsInt(int i);
}