[Enhancement] Implement SQL standard JOIN USING with MySQL compatibility (#63312)

Signed-off-by: stephen <stephen5217@163.com>
This commit is contained in:
stephen 2025-09-23 10:05:50 +08:00 committed by GitHub
parent 67ceccc525
commit 30df6bb044
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 785 additions and 11 deletions

View File

@ -0,0 +1,83 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.starrocks.sql.analyzer;
import com.google.common.collect.ImmutableList;
import com.starrocks.sql.ast.expression.JoinOperator;
import com.starrocks.sql.ast.expression.SlotRef;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
/**
* Handles column resolution for JOIN USING clauses per SQL standard.
*
* In JOIN USING, each USING column appears only once in the result (coalesced),
* unlike JOIN ON where both L.col and R.col are visible. For unqualified
* references to USING columns, this class resolves to the appropriate table's
* field based on JOIN type (RIGHT JOIN prefers right table values).
*
* Examples:
* - SELECT id FROM t1 JOIN t2 USING(id) -> returns coalesced id column
* - SELECT t1.id FROM t1 JOIN t2 USING(id) -> returns original t1.id column
*/
public class CoalescedJoinFields extends RelationFields {
private final Set<String> usingColumns;
private final JoinOperator joinOperator;
public CoalescedJoinFields(List<Field> fields, List<String> usingColNames, JoinOperator joinOperator) {
super(fields);
this.usingColumns = usingColNames.stream()
.map(String::toLowerCase)
.collect(Collectors.toSet());
this.joinOperator = joinOperator;
}
@Override
public List<Field> resolveFields(SlotRef name) {
String columnName = name.getColumnName().toLowerCase();
// For unqualified USING columns, return single coalesced field per SQL standard
// This implements the "appears only once" semantics of JOIN USING
if (name.getTblNameWithoutAnalyzed() == null && usingColumns.contains(columnName)) {
List<Field> allMatches = super.resolveFields(name);
if (!allMatches.isEmpty()) {
Field selectedField = selectCoalescedField(allMatches);
return ImmutableList.of(selectedField); // Single field, not both L.col and R.col
}
}
// For qualified references (table.column) or non-USING columns, return all matches
// This allows access to original table columns when explicitly qualified
return super.resolveFields(name);
}
/**
* Selects which table's field to use for the coalesced USING column.
* This implements COALESCE semantics:
* - RIGHT JOIN: COALESCE(L.col, R.col) -> prefer non-null R.col
* - Other JOINs: COALESCE(L.col, R.col) -> prefer non-null L.col
*/
private Field selectCoalescedField(List<Field> allMatches) {
if (joinOperator != null && joinOperator.isRightJoin()) {
// RIGHT JOIN: right table is primary, prefer its field for coalesced column
return allMatches.get(allMatches.size() - 1);
} else {
// Other JOINs: left table is primary, prefer its field for coalesced column
return allMatches.get(0);
}
}
}

View File

@ -91,6 +91,7 @@ import com.starrocks.sql.ast.expression.TableName;
import com.starrocks.sql.common.MetaUtils;
import com.starrocks.sql.common.TypeManager;
import com.starrocks.sql.optimizer.dump.HiveMetaStoreTableDumpInfo;
import org.apache.commons.collections.CollectionUtils;
import java.util.ArrayList;
import java.util.Arrays;
@ -891,20 +892,20 @@ public class QueryAnalyzer {
scope = new Scope(RelationId.of(join), rightScope.getRelationFields());
} else if (join.getJoinOp().isAnyLeftOuterJoin()) {
List<Field> rightFields = getFieldsWithNullable(rightScope);
scope = new Scope(RelationId.of(join),
leftScope.getRelationFields().joinWith(new RelationFields(rightFields)));
RelationFields joinedFields = leftScope.getRelationFields().joinWith(new RelationFields(rightFields));
scope = new Scope(RelationId.of(join), createJoinRelationFields(joinedFields, join));
} else if (join.getJoinOp().isRightOuterJoin()) {
List<Field> leftFields = getFieldsWithNullable(leftScope);
scope = new Scope(RelationId.of(join),
new RelationFields(leftFields).joinWith(rightScope.getRelationFields()));
RelationFields joinedFields = new RelationFields(leftFields).joinWith(rightScope.getRelationFields());
scope = new Scope(RelationId.of(join), createJoinRelationFields(joinedFields, join));
} else if (join.getJoinOp().isFullOuterJoin()) {
List<Field> rightFields = getFieldsWithNullable(rightScope);
List<Field> leftFields = getFieldsWithNullable(leftScope);
scope = new Scope(RelationId.of(join),
new RelationFields(leftFields).joinWith(new RelationFields(rightFields)));
RelationFields joinedFields = new RelationFields(leftFields).joinWith(new RelationFields(rightFields));
scope = new Scope(RelationId.of(join), createJoinRelationFields(joinedFields, join));
} else {
scope = new Scope(RelationId.of(join),
leftScope.getRelationFields().joinWith(rightScope.getRelationFields()));
RelationFields joinedFields = leftScope.getRelationFields().joinWith(rightScope.getRelationFields());
scope = new Scope(RelationId.of(join), createJoinRelationFields(joinedFields, join));
}
join.setScope(scope);
@ -954,6 +955,15 @@ public class QueryAnalyzer {
return joinEqual;
}
private RelationFields createJoinRelationFields(RelationFields joinedFields, JoinRelation join) {
if (CollectionUtils.isNotEmpty(join.getUsingColNames()) && !join.getJoinOp().isFullOuterJoin()) {
return new CoalescedJoinFields(joinedFields.getAllFields(), join.getUsingColNames(), join.getJoinOp());
} else {
// TODO: Support FULL OUTER JOIN USING with proper COALESCE semantics
return joinedFields;
}
}
private void analyzeJoinHints(JoinRelation join) {
if (HintNode.HINT_JOIN_BROADCAST.equals(join.getJoinHint())) {
if (join.getJoinOp() == JoinOperator.RIGHT_OUTER_JOIN

View File

@ -26,6 +26,7 @@ import com.starrocks.common.TreeNode;
import com.starrocks.qe.ConnectContext;
import com.starrocks.sql.ast.AstVisitorExtendInterface;
import com.starrocks.sql.ast.GroupByClause;
import com.starrocks.sql.ast.JoinRelation;
import com.starrocks.sql.ast.OrderByElement;
import com.starrocks.sql.ast.ParseNode;
import com.starrocks.sql.ast.Relation;
@ -40,6 +41,7 @@ import com.starrocks.sql.ast.expression.GroupingFunctionCallExpr;
import com.starrocks.sql.ast.expression.IntLiteral;
import com.starrocks.sql.ast.expression.LimitElement;
import com.starrocks.sql.ast.expression.SlotRef;
import com.starrocks.sql.ast.expression.TableName;
import com.starrocks.sql.ast.expression.UserVariableExpr;
import com.starrocks.sql.common.StarRocksPlannerException;
import org.apache.commons.collections4.CollectionUtils;
@ -49,6 +51,7 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@ -211,9 +214,16 @@ public class SelectAnalyzer {
for (SelectListItem item : selectList.getItems()) {
if (item.isStar()) {
List<Field> fields = (item.getTblName() == null ? scope.getRelationFields().getAllFields()
: scope.getRelationFields().resolveFieldsWithPrefix(item.getTblName()))
.stream().filter(Field::isVisible)
List<Field> fields;
if (getJoinRelationWithUsing(fromRelation) != null) {
fields = getFieldsForJoinUsingStar(fromRelation, scope, item.getTblName());
} else {
fields = (item.getTblName() == null ? scope.getRelationFields().getAllFields()
: scope.getRelationFields().resolveFieldsWithPrefix(item.getTblName()));
}
fields = fields.stream().filter(Field::isVisible)
.filter(field -> !field.getName().startsWith(FeConstants.GENERATED_PARTITION_COLUMN_PREFIX))
.collect(Collectors.toList());
List<String> unknownTypeFields = fields.stream()
@ -872,4 +882,120 @@ public class SelectAnalyzer {
}
return allFields;
}
private JoinRelation getJoinRelationWithUsing(Relation relation) {
if (relation instanceof JoinRelation joinRelation) {
if (CollectionUtils.isNotEmpty(joinRelation.getUsingColNames())) {
return joinRelation;
}
}
return null;
}
/**
* Constructs field list for SELECT * in JOIN USING context per SQL standard.
*
* SQL standard specifies that JOIN USING columns should appear only once in SELECT *,
* unlike JOIN ON where both L.col and R.col would appear. The column ordering follows
* specific rules based on JOIN type:
*
* MYSQL Standard column order:
* - INNER/LEFT/FULL JOIN: [USING columns, left non-USING, right non-USING]
* - RIGHT JOIN: [USING columns, right non-USING, left non-USING]
*
* USING column selection (COALESCE semantics):
* - RIGHT JOIN: prefer right table's field (for non-null values)
* - Other JOINs: prefer left table's field
*
* Examples:
* - SELECT * FROM t1(a,b,c) JOIN t2(a,d) USING(a) -> [a, b, c, d]
* - SELECT * FROM t1(a,b,c) RIGHT JOIN t2(a,d) USING(a) -> [a, d, b, c]
*
* @param fromRelation The JOIN relation containing USING clause
* @param scope Current scope with all available fields
* @param tblName Optional table qualifier (e.g., t1.* vs *)
* @return Properly ordered field list for SELECT * with USING columns appearing once
*/
private List<Field> getFieldsForJoinUsingStar(Relation fromRelation, Scope scope, TableName tblName) {
if (tblName != null) {
// Qualified SELECT (e.g., SELECT t1.* FROM t1 JOIN t2 USING(id))
// Return only fields from specified table, no special USING handling needed
return scope.getRelationFields().resolveFieldsWithPrefix(tblName);
}
JoinRelation joinRelation = getJoinRelationWithUsing(fromRelation);
// TODO(stephen): Support FULL OUTER JOIN USING with proper COALESCE semantics
if (joinRelation.getJoinOp().isFullOuterJoin()) {
return scope.getRelationFields().getAllFields();
}
Set<String> usingColSet = joinRelation.getUsingColNames().stream()
.map(String::toLowerCase)
.collect(Collectors.toSet());
List<Field> allFields = scope.getRelationFields().getAllFields();
// Step 1: Add USING columns once with appropriate table preference
Map<String, Field> usingFields = new LinkedHashMap<>();
// Determine which table to prefer for USING columns (COALESCE semantics)
boolean preferRightTable = joinRelation.getJoinOp() != null && joinRelation.getJoinOp().isRightJoin();
// Iterate through all fields to find USING columns, selecting appropriate table's field
for (Field field : allFields) {
if (field.getName() != null && usingColSet.contains(field.getName().toLowerCase())) {
String key = field.getName().toLowerCase();
if (!usingFields.containsKey(key)) {
// First occurrence: always use it
usingFields.put(key, field);
} else if (preferRightTable) {
// Second occurrence in RIGHT JOIN: replace left table field with right table field
usingFields.put(key, field);
}
}
}
List<Field> result = new ArrayList<>(usingFields.values());
// Step 2: Add non-USING columns in MYSQL SQL standard compliant order
// Get original field counts to accurately separate left and right table fields
int leftFieldCount = joinRelation.getLeft().getScope().getRelationFields().getAllFields().size();
int rightFieldCount = joinRelation.getRight().getScope().getRelationFields().getAllFields().size();
if (preferRightTable) {
// RIGHT JOIN: [USING cols, right non-USING, left non-USING] per SQL standard
addNonUsingFieldsByCount(allFields, usingColSet, result, leftFieldCount, rightFieldCount, false);
addNonUsingFieldsByCount(allFields, usingColSet, result, leftFieldCount, rightFieldCount, true);
} else {
// Other JOINs: [USING cols, left non-USING, right non-USING] per SQL standard
addNonUsingFieldsByCount(allFields, usingColSet, result, leftFieldCount, rightFieldCount, true);
addNonUsingFieldsByCount(allFields, usingColSet, result, leftFieldCount, rightFieldCount, false);
}
return result;
}
private void addNonUsingFieldsByCount(List<Field> allFields, Set<String> usingColSet,
List<Field> result, int leftFieldCount, int rightFieldCount,
boolean addLeftTable) {
int startIdx;
int endIdx;
if (addLeftTable) {
// Add left table fields: indices [0, leftFieldCount)
startIdx = 0;
endIdx = leftFieldCount;
} else {
// Add right table fields: indices [leftFieldCount, leftFieldCount + rightFieldCount)
startIdx = leftFieldCount;
endIdx = leftFieldCount + rightFieldCount;
}
// Add non-USING fields from the specified table range
for (int i = startIdx; i < endIdx && i < allFields.size(); i++) {
Field field = allFields.get(i);
if (field.getName() == null || !usingColSet.contains(field.getName().toLowerCase())) {
result.add(field);
}
}
}
}

View File

@ -93,6 +93,7 @@ public class AnalyzeJoinTest {
"the most similar input is {<EOF>, ';'}.");
analyzeFail("select v1 from (t0 join tnotnull using(v1)), t1", "Column 'v1' is ambiguous");
analyzeSuccess("select a.v1 from (t0 a join tnotnull b using(v1)), t1");
analyzeSuccess("select v1 from t0 join tnotnull using(v1)");
}
@Test

View File

@ -1463,6 +1463,7 @@ public class DistributedEnvPlanWithCostTest extends DistributedEnvPlanTestBase {
" | equal join conjunct: [1: d_datekey, INT, true] = [18: d_datekey, INT, true]\n" +
" | build runtime filters:\n" +
" | - filter_id = 0, build_expr = (18: d_datekey), remote = false\n" +
" | output columns: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 35\n" +
" | cardinality: 2300\n" +
" | \n" +
" |----2:AGGREGATE (update finalize)");

View File

@ -0,0 +1,356 @@
-- name: test_join_with_using
DROP DATABASE IF EXISTS test_join_with_using;
-- result:
-- !result
CREATE DATABASE test_join_with_using;
-- result:
-- !result
use test_join_with_using;
-- result:
-- !result
CREATE TABLE left_table (
name VARCHAR(20),
id1 INT,
age INT,
city VARCHAR(20),
id2 INT,
salary DECIMAL(10,2),
status VARCHAR(10)
) DUPLICATE KEY(name) DISTRIBUTED BY HASH(name) BUCKETS 1 PROPERTIES ("replication_num" = "1");
-- result:
-- !result
CREATE TABLE right_table (
dept VARCHAR(20),
id1 INT,
bonus DECIMAL(8,2),
id2 INT
) DUPLICATE KEY(dept) DISTRIBUTED BY HASH(dept) BUCKETS 1 PROPERTIES ("replication_num" = "1");
-- result:
-- !result
INSERT INTO left_table VALUES
('Alice', 1, 25, 'New York', 100, 5000.00, 'Active'),
('Bob', 2, 30, 'Boston', 200, 6000.00, 'Active'),
('Charlie', 3, 35, 'Chicago', 300, 7000.00, 'Inactive'),
('David', 4, 28, 'Denver', NULL, 5500.00, 'Active'),
('Eve', 5, 32, 'Seattle', 500, 6500.00, 'Active'),
('Frank', 6, 40, NULL, 600, 8000.00, 'Inactive');
-- result:
-- !result
INSERT INTO right_table VALUES
('Engineering', 1, 1000.00, 100),
('Marketing', 2, 800.00, 200),
('Sales', 7, 1200.00, 700),
('HR', 8, 900.00, NULL),
('Finance', 9, NULL, 900);
-- result:
-- !result
SELECT * FROM left_table JOIN right_table USING(id1, id2) ORDER BY id1;
-- result:
1 100 Alice 25 New York 5000.00 Active Engineering 1000.00
2 200 Bob 30 Boston 6000.00 Active Marketing 800.00
-- !result
SELECT id1, id2 FROM left_table JOIN right_table USING(id1, id2) ORDER BY id1;
-- result:
1 100
2 200
-- !result
SELECT * FROM left_table LEFT JOIN right_table USING(id1, id2) ORDER BY id1;
-- result:
1 100 Alice 25 New York 5000.00 Active Engineering 1000.00
2 200 Bob 30 Boston 6000.00 Active Marketing 800.00
3 300 Charlie 35 Chicago 7000.00 Inactive None None
4 None David 28 Denver 5500.00 Active None None
5 500 Eve 32 Seattle 6500.00 Active None None
6 600 Frank 40 None 8000.00 Inactive None None
-- !result
SELECT id1, id2 FROM left_table LEFT JOIN right_table USING(id1, id2) ORDER BY id1;
-- result:
1 100
2 200
3 300
4 None
5 500
6 600
-- !result
SELECT * FROM left_table RIGHT JOIN right_table USING(id1, id2) ORDER BY id1;
-- result:
1 100 Engineering 1000.00 Alice 25 New York 5000.00 Active
2 200 Marketing 800.00 Bob 30 Boston 6000.00 Active
7 700 Sales 1200.00 None None None None None
8 None HR 900.00 None None None None None
9 900 Finance None None None None None None
-- !result
SELECT id1, id2 FROM left_table RIGHT JOIN right_table USING(id1, id2) ORDER BY id1;
-- result:
1 100
2 200
7 700
8 None
9 900
-- !result
SELECT * FROM left_table FULL OUTER JOIN right_table USING(id1, id2) ORDER BY right_table.dept, right_table.bonus;
-- result:
Eve 5 32 Seattle 500 6500.00 Active None None None None
Charlie 3 35 Chicago 300 7000.00 Inactive None None None None
David 4 28 Denver None 5500.00 Active None None None None
Frank 6 40 None 600 8000.00 Inactive None None None None
Alice 1 25 New York 100 5000.00 Active Engineering 1 1000.00 100
None None None None None None None Finance 9 None 900
None None None None None None None HR 8 900.00 None
Bob 2 30 Boston 200 6000.00 Active Marketing 2 800.00 200
None None None None None None None Sales 7 1200.00 700
-- !result
SELECT L.id1, R.id1, id1 FROM left_table L JOIN right_table R USING(id1, id2) ORDER BY L.id1;
-- result:
1 1 1
2 2 2
-- !result
SELECT * FROM left_table JOIN right_table USING(id1) ORDER BY id1;
-- result:
1 Alice 25 New York 100 5000.00 Active Engineering 1000.00 100
2 Bob 30 Boston 200 6000.00 Active Marketing 800.00 200
-- !result
SELECT * FROM left_table LEFT JOIN right_table USING(id1, id2) WHERE id1 IS NULL OR id2 IS NULL;
-- result:
4 None David 28 Denver 5500.00 Active None None
-- !result
SELECT * FROM left_table RIGHT SEMI JOIN right_table USING(id1, id2) ORDER BY id1;
-- result:
1 100 Engineering 1000.00
2 200 Marketing 800.00
-- !result
SELECT * FROM left_table RIGHT ANTI JOIN right_table USING(id1, id2) ORDER BY id1;
-- result:
7 700 Sales 1200.00
8 None HR 900.00
9 900 Finance None
-- !result
SELECT * FROM left_table CROSS JOIN right_table ORDER BY left_table.id1, right_table.id1 LIMIT 5;
-- result:
Alice 1 25 New York 100 5000.00 Active Engineering 1 1000.00 100
Alice 1 25 New York 100 5000.00 Active Marketing 2 800.00 200
Alice 1 25 New York 100 5000.00 Active Sales 7 1200.00 700
Alice 1 25 New York 100 5000.00 Active HR 8 900.00 None
Alice 1 25 New York 100 5000.00 Active Finance 9 None 900
-- !result
SELECT * FROM left_table LEFT SEMI JOIN right_table USING(id1, id2) ORDER BY id1;
-- result:
1 100 Alice 25 New York 5000.00 Active
2 200 Bob 30 Boston 6000.00 Active
-- !result
SELECT * FROM left_table LEFT ANTI JOIN right_table USING(id1, id2) ORDER BY id1;
-- result:
3 300 Charlie 35 Chicago 7000.00 Inactive
4 None David 28 Denver 5500.00 Active
5 500 Eve 32 Seattle 6500.00 Active
6 600 Frank 40 None 8000.00 Inactive
-- !result
SELECT * FROM left_table JOIN right_table USING(id1) ORDER BY id1;
-- result:
1 Alice 25 New York 100 5000.00 Active Engineering 1000.00 100
2 Bob 30 Boston 200 6000.00 Active Marketing 800.00 200
-- !result
SELECT * FROM left_table LEFT JOIN right_table USING(id1) ORDER BY id1;
-- result:
1 Alice 25 New York 100 5000.00 Active Engineering 1000.00 100
2 Bob 30 Boston 200 6000.00 Active Marketing 800.00 200
3 Charlie 35 Chicago 300 7000.00 Inactive None None None
4 David 28 Denver None 5500.00 Active None None None
5 Eve 32 Seattle 500 6500.00 Active None None None
6 Frank 40 None 600 8000.00 Inactive None None None
-- !result
SELECT * FROM left_table RIGHT JOIN right_table USING(id1) ORDER BY id1;
-- result:
1 Engineering 1000.00 100 Alice 25 New York 100 5000.00 Active
2 Marketing 800.00 200 Bob 30 Boston 200 6000.00 Active
7 Sales 1200.00 700 None None None None None None
8 HR 900.00 None None None None None None None
9 Finance None 900 None None None None None None
-- !result
SELECT * FROM left_table FULL OUTER JOIN right_table USING(id1) ORDER BY right_table.dept, right_table.bonus;
-- result:
Eve 5 32 Seattle 500 6500.00 Active None None None None
David 4 28 Denver None 5500.00 Active None None None None
Charlie 3 35 Chicago 300 7000.00 Inactive None None None None
Frank 6 40 None 600 8000.00 Inactive None None None None
Alice 1 25 New York 100 5000.00 Active Engineering 1 1000.00 100
None None None None None None None Finance 9 None 900
None None None None None None None HR 8 900.00 None
Bob 2 30 Boston 200 6000.00 Active Marketing 2 800.00 200
None None None None None None None Sales 7 1200.00 700
-- !result
SELECT * FROM left_table LEFT SEMI JOIN right_table USING(id1) ORDER BY id1;
-- result:
1 Alice 25 New York 100 5000.00 Active
2 Bob 30 Boston 200 6000.00 Active
-- !result
SELECT * FROM left_table LEFT ANTI JOIN right_table USING(id1) ORDER BY id1;
-- result:
3 Charlie 35 Chicago 300 7000.00 Inactive
4 David 28 Denver None 5500.00 Active
5 Eve 32 Seattle 500 6500.00 Active
6 Frank 40 None 600 8000.00 Inactive
-- !result
SELECT * FROM left_table RIGHT SEMI JOIN right_table USING(id1) ORDER BY id1;
-- result:
1 Engineering 1000.00 100
2 Marketing 800.00 200
-- !result
SELECT * FROM left_table RIGHT ANTI JOIN right_table USING(id1) ORDER BY id1;
-- result:
7 Sales 1200.00 700
8 HR 900.00 None
9 Finance None 900
-- !result
SELECT id1 AS join_key FROM left_table JOIN right_table USING(id1) ORDER BY join_key;
-- result:
1
2
-- !result
SELECT l.id1 l_id1 FROM left_table l JOIN right_table r USING(id1) ORDER BY l_id1;
-- result:
1
2
-- !result
SELECT l.id2 l_id1 FROM left_table l right JOIN right_table r USING(id1) ORDER BY l_id1;
-- result:
None
None
None
100
200
-- !result
SELECT r.id2 l_id1 FROM left_table l right JOIN right_table r USING(id1) ORDER BY l_id1;
-- result:
None
100
200
700
900
-- !result
SELECT id1 AS key1, id2 AS key2 FROM left_table JOIN right_table USING(id1, id2) ORDER BY key1;
-- result:
1 100
2 200
-- !result
SELECT L.id1 AS left_key, R.id1 AS right_key, id1 AS coalesced_key
FROM left_table L JOIN right_table R USING(id1) ORDER BY left_key;
-- result:
1 1 1
2 2 2
-- !result
SELECT id1 AS right_preferred_key FROM left_table RIGHT JOIN right_table USING(id1) ORDER BY right_preferred_key;
-- result:
1
2
7
8
9
-- !result
SELECT
id1 AS main_id,
name AS emp_name,
dept AS department,
salary AS emp_salary,
bonus AS emp_bonus
FROM left_table LEFT JOIN right_table USING(id1, id2)
ORDER BY main_id;
-- result:
1 Alice Engineering 5000.00 1000.00
2 Bob Marketing 6000.00 800.00
3 Charlie None 7000.00 None
4 David None 5500.00 None
5 Eve None 6500.00 None
6 Frank None 8000.00 None
-- !result
SELECT id1 AS pk, name, dept
FROM left_table LEFT JOIN right_table USING(id1)
WHERE id1 > 2
ORDER BY pk;
-- result:
3 Charlie None
4 David None
5 Eve None
6 Frank None
-- !result
SELECT * FROM left_table WHERE id1 NOT IN (SELECT id1 FROM right_table WHERE id1 IS NOT NULL) ORDER BY id1;
-- result:
Charlie 3 35 Chicago 300 7000.00 Inactive
David 4 28 Denver None 5500.00 Active
Eve 5 32 Seattle 500 6500.00 Active
Frank 6 40 None 600 8000.00 Inactive
-- !result
SELECT dept, id1, name FROM left_table LEFT JOIN right_table USING(id1) ORDER BY id1;
-- result:
Engineering 1 Alice
Marketing 2 Bob
None 3 Charlie
None 4 David
None 5 Eve
None 6 Frank
-- !result
SELECT * FROM (
SELECT id1, name, age FROM left_table WHERE id1 <= 5
) L JOIN (
SELECT id1, dept FROM right_table WHERE id1 <= 5
) R USING(id1) ORDER BY id1;
-- result:
1 Alice 25 Engineering
2 Bob 30 Marketing
-- !result
SELECT outer_query.id1, outer_query.total_count
FROM (
SELECT id1, COUNT(*) as total_count
FROM left_table L JOIN right_table R USING(id1)
GROUP BY id1
) outer_query
WHERE outer_query.total_count > 0
ORDER BY outer_query.id1;
-- result:
1 1
2 1
-- !result
SELECT id1, name, R.dept
FROM left_table L JOIN right_table R USING(id1)
WHERE L.id1 IN (
SELECT id1 FROM left_table WHERE salary > 6000
)
ORDER BY L.id1;
-- result:
-- !result
SELECT sub.id1, sub.id2, sub.emp_info, sub.dept_info
FROM (
SELECT
id1,
id2,
CONCAT(name, '-', CAST(age AS VARCHAR)) as emp_info,
COALESCE(dept, 'Unknown') as dept_info
FROM left_table L LEFT JOIN right_table R USING(id1, id2)
WHERE id1 IS NOT NULL
) sub
WHERE sub.emp_info LIKE '%Alice%' OR sub.dept_info = 'Engineering'
ORDER BY sub.id1;
-- result:
1 100 Alice-25 Engineering
-- !result
SELECT
L_AGG.id1,
L_AGG.avg_salary,
R_AGG.total_bonus
FROM (
SELECT id1, AVG(salary) as avg_salary
FROM left_table
WHERE salary IS NOT NULL
GROUP BY id1
) L_AGG
JOIN (
SELECT id1, SUM(bonus) as total_bonus
FROM right_table
WHERE bonus IS NOT NULL
GROUP BY id1
) R_AGG USING(id1)
ORDER BY L_AGG.id1;
-- result:
1 5000.00000000 1000.00
2 6000.00000000 800.00
-- !result

View File

@ -0,0 +1,197 @@
-- name: test_join_with_using
DROP DATABASE IF EXISTS test_join_with_using;
CREATE DATABASE test_join_with_using;
use test_join_with_using;
CREATE TABLE left_table (
name VARCHAR(20),
id1 INT,
age INT,
city VARCHAR(20),
id2 INT,
salary DECIMAL(10,2),
status VARCHAR(10)
) DUPLICATE KEY(name) DISTRIBUTED BY HASH(name) BUCKETS 1 PROPERTIES ("replication_num" = "1");
CREATE TABLE right_table (
dept VARCHAR(20),
id1 INT,
bonus DECIMAL(8,2),
id2 INT
) DUPLICATE KEY(dept) DISTRIBUTED BY HASH(dept) BUCKETS 1 PROPERTIES ("replication_num" = "1");
INSERT INTO left_table VALUES
('Alice', 1, 25, 'New York', 100, 5000.00, 'Active'),
('Bob', 2, 30, 'Boston', 200, 6000.00, 'Active'),
('Charlie', 3, 35, 'Chicago', 300, 7000.00, 'Inactive'),
('David', 4, 28, 'Denver', NULL, 5500.00, 'Active'),
('Eve', 5, 32, 'Seattle', 500, 6500.00, 'Active'),
('Frank', 6, 40, NULL, 600, 8000.00, 'Inactive');
INSERT INTO right_table VALUES
('Engineering', 1, 1000.00, 100),
('Marketing', 2, 800.00, 200),
('Sales', 7, 1200.00, 700),
('HR', 8, 900.00, NULL),
('Finance', 9, NULL, 900);
-- Test INNER JOIN with two USING columns
SELECT * FROM left_table JOIN right_table USING(id1, id2) ORDER BY id1;
-- Test INNER JOIN selecting only USING columns
SELECT id1, id2 FROM left_table JOIN right_table USING(id1, id2) ORDER BY id1;
-- Test LEFT OUTER JOIN
SELECT * FROM left_table LEFT JOIN right_table USING(id1, id2) ORDER BY id1;
-- Test LEFT OUTER JOIN selecting only USING columns
SELECT id1, id2 FROM left_table LEFT JOIN right_table USING(id1, id2) ORDER BY id1;
-- Test RIGHT OUTER JOIN (critical test for column ordering)
SELECT * FROM left_table RIGHT JOIN right_table USING(id1, id2) ORDER BY id1;
-- Test RIGHT OUTER JOIN selecting only USING columns (should prefer right table values)
SELECT id1, id2 FROM left_table RIGHT JOIN right_table USING(id1, id2) ORDER BY id1;
-- Test FULL OUTER JOIN
SELECT * FROM left_table FULL OUTER JOIN right_table USING(id1, id2) ORDER BY right_table.dept, right_table.bonus;
-- Test with table aliases and qualified references
SELECT L.id1, R.id1, id1 FROM left_table L JOIN right_table R USING(id1, id2) ORDER BY L.id1;
-- Test single USING column
SELECT * FROM left_table JOIN right_table USING(id1) ORDER BY id1;
-- Test NULL handling in USING columns
SELECT * FROM left_table LEFT JOIN right_table USING(id1, id2) WHERE id1 IS NULL OR id2 IS NULL;
-- Test RIGHT SEMI JOIN
SELECT * FROM left_table RIGHT SEMI JOIN right_table USING(id1, id2) ORDER BY id1;
-- Test RIGHT ANTI JOIN
SELECT * FROM left_table RIGHT ANTI JOIN right_table USING(id1, id2) ORDER BY id1;
-- Test CROSS JOIN
SELECT * FROM left_table CROSS JOIN right_table ORDER BY left_table.id1, right_table.id1 LIMIT 5;
-- Test LEFT SEMI JOIN
SELECT * FROM left_table LEFT SEMI JOIN right_table USING(id1, id2) ORDER BY id1;
-- Test LEFT ANTI JOIN
SELECT * FROM left_table LEFT ANTI JOIN right_table USING(id1, id2) ORDER BY id1;
-- Test single column USING cases
SELECT * FROM left_table JOIN right_table USING(id1) ORDER BY id1;
SELECT * FROM left_table LEFT JOIN right_table USING(id1) ORDER BY id1;
SELECT * FROM left_table RIGHT JOIN right_table USING(id1) ORDER BY id1;
SELECT * FROM left_table FULL OUTER JOIN right_table USING(id1) ORDER BY right_table.dept, right_table.bonus;
SELECT * FROM left_table LEFT SEMI JOIN right_table USING(id1) ORDER BY id1;
SELECT * FROM left_table LEFT ANTI JOIN right_table USING(id1) ORDER BY id1;
SELECT * FROM left_table RIGHT SEMI JOIN right_table USING(id1) ORDER BY id1;
SELECT * FROM left_table RIGHT ANTI JOIN right_table USING(id1) ORDER BY id1;
-- Test single column USING with aliases
SELECT id1 AS join_key FROM left_table JOIN right_table USING(id1) ORDER BY join_key;
SELECT l.id1 l_id1 FROM left_table l JOIN right_table r USING(id1) ORDER BY l_id1;
SELECT l.id2 l_id1 FROM left_table l right JOIN right_table r USING(id1) ORDER BY l_id1;
SELECT r.id2 l_id1 FROM left_table l right JOIN right_table r USING(id1) ORDER BY l_id1;
SELECT id1 AS key1, id2 AS key2 FROM left_table JOIN right_table USING(id1, id2) ORDER BY key1;
-- Test USING column with table alias and column alias
SELECT L.id1 AS left_key, R.id1 AS right_key, id1 AS coalesced_key
FROM left_table L JOIN right_table R USING(id1) ORDER BY left_key;
-- Test RIGHT JOIN with single column and alias (should prefer right table value)
SELECT id1 AS right_preferred_key FROM left_table RIGHT JOIN right_table USING(id1) ORDER BY right_preferred_key;
-- Test complex alias scenarios
SELECT
id1 AS main_id,
name AS emp_name,
dept AS department,
salary AS emp_salary,
bonus AS emp_bonus
FROM left_table LEFT JOIN right_table USING(id1, id2)
ORDER BY main_id;
-- Test USING with WHERE clause on aliased columns
SELECT id1 AS pk, name, dept
FROM left_table LEFT JOIN right_table USING(id1)
WHERE id1 > 2
ORDER BY pk;
-- Test NULL AWARE LEFT ANTI JOIN (if supported)
SELECT * FROM left_table WHERE id1 NOT IN (SELECT id1 FROM right_table WHERE id1 IS NOT NULL) ORDER BY id1;
-- Test USING columns in different SELECT positions
SELECT dept, id1, name FROM left_table LEFT JOIN right_table USING(id1) ORDER BY id1;
-- Test multiple single-column USING joins in sequence (subquery style)
SELECT * FROM (
SELECT id1, name, age FROM left_table WHERE id1 <= 5
) L JOIN (
SELECT id1, dept FROM right_table WHERE id1 <= 5
) R USING(id1) ORDER BY id1;
-- Test subquery with JOIN USING as inner query
SELECT outer_query.id1, outer_query.total_count
FROM (
SELECT id1, COUNT(*) as total_count
FROM left_table L JOIN right_table R USING(id1)
GROUP BY id1
) outer_query
WHERE outer_query.total_count > 0
ORDER BY outer_query.id1;
-- Test JOIN USING with subquery in WHERE clause
SELECT id1, name, R.dept
FROM left_table L JOIN right_table R USING(id1)
WHERE L.id1 IN (
SELECT id1 FROM left_table WHERE salary > 6000
)
ORDER BY L.id1;
-- Test complex subquery with multiple USING columns
SELECT sub.id1, sub.id2, sub.emp_info, sub.dept_info
FROM (
SELECT
id1,
id2,
CONCAT(name, '-', CAST(age AS VARCHAR)) as emp_info,
COALESCE(dept, 'Unknown') as dept_info
FROM left_table L LEFT JOIN right_table R USING(id1, id2)
WHERE id1 IS NOT NULL
) sub
WHERE sub.emp_info LIKE '%Alice%' OR sub.dept_info = 'Engineering'
ORDER BY sub.id1;
-- Test JOIN USING between subqueries with aggregation
SELECT
L_AGG.id1,
L_AGG.avg_salary,
R_AGG.total_bonus
FROM (
SELECT id1, AVG(salary) as avg_salary
FROM left_table
WHERE salary IS NOT NULL
GROUP BY id1
) L_AGG
JOIN (
SELECT id1, SUM(bonus) as total_bonus
FROM right_table
WHERE bonus IS NOT NULL
GROUP BY id1
) R_AGG USING(id1)
ORDER BY L_AGG.id1;