[Refactor] Introducing a new fe-grammar module to removing ANTLR plugin and dependencies (#62239)
This commit is contained in:
parent
db8e484ed0
commit
108e5bd47d
|
|
@ -19,6 +19,7 @@ The Frontend (FE) is the Java layer of StarRocks that provides the SQL interface
|
|||
- `qe/` — Query execution coordination and session management
|
||||
- `privilege/` — Authentication and authorization
|
||||
- `scheduler/`, `load/`, `backup/` — Scheduling, data loading, backup/restore
|
||||
- `fe/fe-grammar/` — ANTLR grammars (.g4) for StarRocks SQL
|
||||
- `fe/fe-parser/` — StarRocks SQL parser and full AST definitions
|
||||
- `fe/fe-spi/` — FE Service Provider Interfaces (contracts for connectors/plugins)
|
||||
- `fe/connector/` — Data source connector implementations and FE-side integration
|
||||
|
|
@ -31,8 +32,9 @@ Related:
|
|||
|
||||
## Extensibility overview
|
||||
- Syntax/AST:
|
||||
- Extend grammar in `fe/fe-parser/` and add new AST nodes under `com.starrocks.sql.ast`.
|
||||
- Follow AST conventions: include source positions; keep nodes immutable.
|
||||
- Extend SQL grammar in `fe/fe-grammar/` (ANTLR .g4 files).
|
||||
- Parser and AST live in `fe/fe-parser/` (package `com.starrocks.sql.parser` and `com.starrocks.sql.ast`).
|
||||
- Follow AST conventions: include source positions; keep nodes immutable; push semantics to analyzer.
|
||||
- Update analyzer/optimizer and add tests.
|
||||
- Connectors (data sources):
|
||||
- Implement FE SPI in a dedicated module; provide discovery metadata (e.g., ServiceLoader or plugin manifest).
|
||||
|
|
@ -63,6 +65,7 @@ Related:
|
|||
- Prefer targeted changes and module-level verification.
|
||||
|
||||
## References
|
||||
- Grammar: `fe/fe-grammar/`
|
||||
- Parser and AST: `fe/fe-parser/`
|
||||
- SPI contracts: `fe/fe-spi/`
|
||||
- Connectors: `fe/connector/`
|
||||
|
|
@ -70,4 +73,3 @@ Related:
|
|||
- Utilities: `fe/fe-utils/`
|
||||
- Testing: `fe/fe-testing/`
|
||||
- Docs: https://docs.starrocks.io/
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ import org.gradle.api.tasks.testing.logging.TestLogEvent
|
|||
|
||||
plugins {
|
||||
java
|
||||
antlr
|
||||
id("com.baidu.jprotobuf") version "1.2.1"
|
||||
}
|
||||
|
||||
|
|
@ -29,7 +28,6 @@ java {
|
|||
main {
|
||||
java {
|
||||
srcDir("src/main/java")
|
||||
srcDir("build/generated-sources/antlr4")
|
||||
srcDir("build/generated-sources/proto")
|
||||
srcDir("build/generated-sources/thrift")
|
||||
srcDir("build/generated-sources/genscript")
|
||||
|
|
@ -46,15 +44,9 @@ java {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
configurations.configureEach {
|
||||
resolutionStrategy.force("org.antlr:antlr4-runtime:${project.ext["antlr.version"]}")
|
||||
}
|
||||
|
||||
dependencies {
|
||||
antlr("org.antlr:antlr4:${project.ext["antlr.version"]}")
|
||||
|
||||
// Internal project dependencies
|
||||
implementation(project(":fe-grammar"))
|
||||
implementation(project(":fe-testing"))
|
||||
implementation(project(":fe-utils"))
|
||||
implementation(project(":plugin:hive-udf"))
|
||||
|
|
@ -154,7 +146,6 @@ dependencies {
|
|||
implementation("javax.annotation:javax.annotation-api")
|
||||
implementation("javax.validation:validation-api")
|
||||
implementation("net.openhft:zero-allocation-hashing:0.16")
|
||||
implementation("org.antlr:antlr4-runtime")
|
||||
implementation("org.apache.arrow:arrow-jdbc")
|
||||
implementation("org.apache.arrow:arrow-memory-netty")
|
||||
implementation("org.apache.arrow:arrow-vector")
|
||||
|
|
@ -290,17 +281,6 @@ dependencies {
|
|||
implementation("net.openhft:zero-allocation-hashing:0.16")
|
||||
}
|
||||
|
||||
// Configure ANTLR plugin
|
||||
tasks.generateGrammarSource {
|
||||
maxHeapSize = "512m"
|
||||
// Add the -lib argument to tell ANTLR where to find imported grammars
|
||||
arguments = arguments + listOf(
|
||||
"-visitor",
|
||||
"-package", "com.starrocks.sql.parser",
|
||||
)
|
||||
outputDirectory = layout.buildDirectory.get().dir("generated-sources/antlr4/com/starrocks/sql/parser").asFile
|
||||
}
|
||||
|
||||
// Custom task for Protocol Buffer generation
|
||||
tasks.register<Task>("generateProtoSources") {
|
||||
description = "Generates Java source files from Protocol Buffer definitions"
|
||||
|
|
@ -426,7 +406,7 @@ tasks.register<Task>("generateByScripts") {
|
|||
|
||||
// Add source generation tasks to the build process
|
||||
tasks.compileJava {
|
||||
dependsOn("generateGrammarSource", "generateThriftSources", "generateProtoSources", "generateByScripts")
|
||||
dependsOn("generateThriftSources", "generateProtoSources", "generateByScripts")
|
||||
}
|
||||
|
||||
tasks.named<PrecompileTask>("jprotobuf_precompile") {
|
||||
|
|
|
|||
|
|
@ -97,6 +97,12 @@ under the License.
|
|||
</profiles>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.starrocks</groupId>
|
||||
<artifactId>fe-grammar</artifactId>
|
||||
<version>1.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.starrocks</groupId>
|
||||
<artifactId>spark-dpp</artifactId>
|
||||
|
|
@ -563,12 +569,6 @@ under the License.
|
|||
<classifier>shaded</classifier>
|
||||
</dependency>
|
||||
|
||||
<!-- https://mvnrepository.com/artifact/org.antlr/antlr4-runtime -->
|
||||
<dependency>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>antlr4-runtime</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.openjdk.jmh</groupId>
|
||||
<artifactId>jmh-core</artifactId>
|
||||
|
|
@ -973,32 +973,6 @@ under the License.
|
|||
</executions>
|
||||
</plugin>
|
||||
|
||||
<!--antlr-->
|
||||
<plugin>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>antlr4-maven-plugin</artifactId>
|
||||
<version>${antlr.version}</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>antlr</id>
|
||||
<phase>generate-sources</phase>
|
||||
<goals>
|
||||
<goal>antlr4</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<sourceDirectory>src/main/java/com/starrocks/sql/parser</sourceDirectory>
|
||||
<visitor>true</visitor>
|
||||
<arguments>
|
||||
<argument>-package</argument>
|
||||
<argument>com.starrocks.sql.parser</argument>
|
||||
<argument>-o</argument>
|
||||
<argument>${project.build.directory}/generated-sources/antlr4/com/starrocks/sql/parser</argument>
|
||||
</arguments>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
<!-- jmockit -->
|
||||
<plugin>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
|
|
|
|||
|
|
@ -1 +0,0 @@
|
|||
../java/com/starrocks/sql/parser/StarRocks.g4
|
||||
|
|
@ -1 +0,0 @@
|
|||
../java/com/starrocks/sql/parser/StarRocksLex.g4
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
# StarRocks FE Grammar (ANTLR)
|
||||
|
||||
This directory stores the ANTLR grammar files (.g4) for StarRocks SQL. These grammars are the source of truth for SQL syntax and are used to generate the lexer and parser consumed by the FE SQL parser and AST layers.
|
||||
|
||||
## Purpose
|
||||
- Define the StarRocks SQL syntax using ANTLR 4 grammars.
|
||||
- Generate lexer/parser sources consumed by the FE parsing module and the AST definitions under `com.starrocks.sql.ast`.
|
||||
- Keep the parsing stage focused on syntactic structure; delegate semantic checks to the analyzer.
|
||||
|
||||
## Relationship to other modules
|
||||
- fe-grammar (.g4) -> generated lexer/parser -> FE SQL parser and AST (see `fe/fe-parser/` and `com.starrocks.sql.ast`).
|
||||
- Analyzer/optimizer build on top of the parsed AST in `fe/fe-core`.
|
||||
|
||||
## Typical layout
|
||||
- `*.g4` files:
|
||||
- Lexer grammar (tokens, keywords, literals, whitespace/comments).
|
||||
- Parser grammar (SQL statements and expressions).
|
||||
- Optionally, shared fragments reused across rules.
|
||||
|
||||
## Guidelines
|
||||
- Keep grammar unambiguous and deterministic; prefer explicit rules over backtracking.
|
||||
- Do not embed semantic logic or state in the grammar; keep semantics in the analyzer.
|
||||
- Separate lexer and parser concerns; avoid mixing token definitions with complex parser actions.
|
||||
- Use predicates sparingly and document the rationale when needed.
|
||||
- Maintain backward compatibility for widely used constructs; document behavior changes.
|
||||
- Add comprehensive tests for new or changed syntax (positive, negative, and edge cases).
|
||||
|
||||
## Regenerating parser/lexer
|
||||
- Use ANTLR 4.x to regenerate the lexer/parser sources from `.g4` files.
|
||||
- Direct outputs to the FE parsing module as configured by the project.
|
||||
- Avoid heavy project-wide builds; prefer targeted generation as per development docs.
|
||||
|
||||
## Testing
|
||||
- Add/adjust SQL test cases under the project's SQL test framework.
|
||||
- Include syntax-only tests and analyzer/optimizer coverage where applicable.
|
||||
|
||||
## Notes
|
||||
- The AST is defined elsewhere; keep the grammar free of temporary semantic storage.
|
||||
- Coordinate grammar changes with the FE parser/analyzer owners to ensure compatibility.
|
||||
|
||||
References:
|
||||
- Parser and AST: `fe/fe-parser/`, `com/starrocks/sql/ast`
|
||||
- Development docs: https://docs.starrocks.io/
|
||||
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
plugins {
|
||||
antlr
|
||||
}
|
||||
|
||||
java {
|
||||
sourceCompatibility = JavaVersion.VERSION_17
|
||||
targetCompatibility = JavaVersion.VERSION_17
|
||||
}
|
||||
|
||||
configurations.configureEach {
|
||||
resolutionStrategy.force("org.antlr:antlr4-runtime:${project.ext["antlr.version"]}")
|
||||
}
|
||||
dependencies {
|
||||
antlr("org.antlr:antlr4:${project.ext["antlr.version"]}")
|
||||
|
||||
implementation("org.antlr:antlr4-runtime")
|
||||
}
|
||||
|
||||
// Configure ANTLR plugin
|
||||
tasks.generateGrammarSource {
|
||||
maxHeapSize = "512m"
|
||||
|
||||
val grammarDir = file("src/main/antlr/com/starrocks/grammar")
|
||||
|
||||
arguments = listOf(
|
||||
"-visitor",
|
||||
"-package", "com.starrocks.sql.parser",
|
||||
"-lib", grammarDir.absolutePath
|
||||
)
|
||||
}
|
||||
|
||||
// Add source generation tasks to the build process
|
||||
tasks.compileJava {
|
||||
dependsOn("generateGrammarSource")
|
||||
}
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>com.starrocks</groupId>
|
||||
<artifactId>starrocks-fe</artifactId>
|
||||
<version>3.4.0</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>fe-grammar</artifactId>
|
||||
<version>1.0.0</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>17</maven.compiler.source>
|
||||
<maven.compiler.target>17</maven.compiler.target>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<!-- https://mvnrepository.com/artifact/org.antlr/antlr4-runtime -->
|
||||
<dependency>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>antlr4-runtime</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<!--antlr-->
|
||||
<plugin>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>antlr4-maven-plugin</artifactId>
|
||||
<version>${antlr.version}</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>antlr</id>
|
||||
<phase>generate-sources</phase>
|
||||
<goals>
|
||||
<goal>antlr4</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<sourceDirectory>src/main/antlr/com/starrocks/grammar</sourceDirectory>
|
||||
<visitor>true</visitor>
|
||||
<arguments>
|
||||
<argument>-package</argument>
|
||||
<argument>com.starrocks.sql.parser</argument>
|
||||
</arguments>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
|
|
@ -15,7 +15,7 @@
|
|||
|
||||
lexer grammar StarRocksLex;
|
||||
@members {
|
||||
private long sqlMode = com.starrocks.qe.SqlModeHelper.MODE_DEFAULT;
|
||||
private long sqlMode = 32L; // MODE_DEFAULT = 32L;
|
||||
public void setSqlMode(long newSqlMode) {
|
||||
sqlMode = newSqlMode;
|
||||
}
|
||||
|
|
@ -528,7 +528,7 @@ ASTERISK_SYMBOL: '*';
|
|||
SLASH_SYMBOL: '/';
|
||||
PERCENT_SYMBOL: '%';
|
||||
|
||||
LOGICAL_OR: '||' {setType((sqlMode & com.starrocks.qe.SqlModeHelper.MODE_PIPES_AS_CONCAT) == 0 ? LOGICAL_OR : StarRocksParser.CONCAT);};
|
||||
LOGICAL_OR: '||' {setType((sqlMode & (1L << 1) /* MODE_PIPES_AS_CONCAT = 1L << 1 */) == 0 ? LOGICAL_OR : StarRocksParser.CONCAT);};
|
||||
LOGICAL_AND: '&&';
|
||||
LOGICAL_NOT: '!';
|
||||
|
||||
|
|
@ -29,6 +29,7 @@ under the License.
|
|||
<packaging>pom</packaging>
|
||||
|
||||
<modules>
|
||||
<module>fe-grammar</module>
|
||||
<module>fe-spi</module>
|
||||
<module>fe-utils</module>
|
||||
<module>fe-testing</module>
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
rootProject.name = "starrocks-fe"
|
||||
|
||||
include(
|
||||
"fe-grammar",
|
||||
"fe-utils",
|
||||
"fe-testing",
|
||||
"plugin:spark-dpp",
|
||||
|
|
|
|||
Loading…
Reference in New Issue