[Enhancement] Implement SQL standard JOIN USING with MySQL compatibility (#63312)
Signed-off-by: stephen <stephen5217@163.com>
This commit is contained in:
parent
67ceccc525
commit
30df6bb044
|
|
@ -0,0 +1,83 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.starrocks.sql.analyzer;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.starrocks.sql.ast.expression.JoinOperator;
|
||||
import com.starrocks.sql.ast.expression.SlotRef;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Handles column resolution for JOIN USING clauses per SQL standard.
|
||||
*
|
||||
* In JOIN USING, each USING column appears only once in the result (coalesced),
|
||||
* unlike JOIN ON where both L.col and R.col are visible. For unqualified
|
||||
* references to USING columns, this class resolves to the appropriate table's
|
||||
* field based on JOIN type (RIGHT JOIN prefers right table values).
|
||||
*
|
||||
* Examples:
|
||||
* - SELECT id FROM t1 JOIN t2 USING(id) -> returns coalesced id column
|
||||
* - SELECT t1.id FROM t1 JOIN t2 USING(id) -> returns original t1.id column
|
||||
*/
|
||||
public class CoalescedJoinFields extends RelationFields {
|
||||
private final Set<String> usingColumns;
|
||||
private final JoinOperator joinOperator;
|
||||
|
||||
public CoalescedJoinFields(List<Field> fields, List<String> usingColNames, JoinOperator joinOperator) {
|
||||
super(fields);
|
||||
this.usingColumns = usingColNames.stream()
|
||||
.map(String::toLowerCase)
|
||||
.collect(Collectors.toSet());
|
||||
this.joinOperator = joinOperator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Field> resolveFields(SlotRef name) {
|
||||
String columnName = name.getColumnName().toLowerCase();
|
||||
|
||||
// For unqualified USING columns, return single coalesced field per SQL standard
|
||||
// This implements the "appears only once" semantics of JOIN USING
|
||||
if (name.getTblNameWithoutAnalyzed() == null && usingColumns.contains(columnName)) {
|
||||
List<Field> allMatches = super.resolveFields(name);
|
||||
if (!allMatches.isEmpty()) {
|
||||
Field selectedField = selectCoalescedField(allMatches);
|
||||
return ImmutableList.of(selectedField); // Single field, not both L.col and R.col
|
||||
}
|
||||
}
|
||||
|
||||
// For qualified references (table.column) or non-USING columns, return all matches
|
||||
// This allows access to original table columns when explicitly qualified
|
||||
return super.resolveFields(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Selects which table's field to use for the coalesced USING column.
|
||||
* This implements COALESCE semantics:
|
||||
* - RIGHT JOIN: COALESCE(L.col, R.col) -> prefer non-null R.col
|
||||
* - Other JOINs: COALESCE(L.col, R.col) -> prefer non-null L.col
|
||||
*/
|
||||
private Field selectCoalescedField(List<Field> allMatches) {
|
||||
if (joinOperator != null && joinOperator.isRightJoin()) {
|
||||
// RIGHT JOIN: right table is primary, prefer its field for coalesced column
|
||||
return allMatches.get(allMatches.size() - 1);
|
||||
} else {
|
||||
// Other JOINs: left table is primary, prefer its field for coalesced column
|
||||
return allMatches.get(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -91,6 +91,7 @@ import com.starrocks.sql.ast.expression.TableName;
|
|||
import com.starrocks.sql.common.MetaUtils;
|
||||
import com.starrocks.sql.common.TypeManager;
|
||||
import com.starrocks.sql.optimizer.dump.HiveMetaStoreTableDumpInfo;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
|
@ -891,20 +892,20 @@ public class QueryAnalyzer {
|
|||
scope = new Scope(RelationId.of(join), rightScope.getRelationFields());
|
||||
} else if (join.getJoinOp().isAnyLeftOuterJoin()) {
|
||||
List<Field> rightFields = getFieldsWithNullable(rightScope);
|
||||
scope = new Scope(RelationId.of(join),
|
||||
leftScope.getRelationFields().joinWith(new RelationFields(rightFields)));
|
||||
RelationFields joinedFields = leftScope.getRelationFields().joinWith(new RelationFields(rightFields));
|
||||
scope = new Scope(RelationId.of(join), createJoinRelationFields(joinedFields, join));
|
||||
} else if (join.getJoinOp().isRightOuterJoin()) {
|
||||
List<Field> leftFields = getFieldsWithNullable(leftScope);
|
||||
scope = new Scope(RelationId.of(join),
|
||||
new RelationFields(leftFields).joinWith(rightScope.getRelationFields()));
|
||||
RelationFields joinedFields = new RelationFields(leftFields).joinWith(rightScope.getRelationFields());
|
||||
scope = new Scope(RelationId.of(join), createJoinRelationFields(joinedFields, join));
|
||||
} else if (join.getJoinOp().isFullOuterJoin()) {
|
||||
List<Field> rightFields = getFieldsWithNullable(rightScope);
|
||||
List<Field> leftFields = getFieldsWithNullable(leftScope);
|
||||
scope = new Scope(RelationId.of(join),
|
||||
new RelationFields(leftFields).joinWith(new RelationFields(rightFields)));
|
||||
RelationFields joinedFields = new RelationFields(leftFields).joinWith(new RelationFields(rightFields));
|
||||
scope = new Scope(RelationId.of(join), createJoinRelationFields(joinedFields, join));
|
||||
} else {
|
||||
scope = new Scope(RelationId.of(join),
|
||||
leftScope.getRelationFields().joinWith(rightScope.getRelationFields()));
|
||||
RelationFields joinedFields = leftScope.getRelationFields().joinWith(rightScope.getRelationFields());
|
||||
scope = new Scope(RelationId.of(join), createJoinRelationFields(joinedFields, join));
|
||||
}
|
||||
join.setScope(scope);
|
||||
|
||||
|
|
@ -954,6 +955,15 @@ public class QueryAnalyzer {
|
|||
return joinEqual;
|
||||
}
|
||||
|
||||
private RelationFields createJoinRelationFields(RelationFields joinedFields, JoinRelation join) {
|
||||
if (CollectionUtils.isNotEmpty(join.getUsingColNames()) && !join.getJoinOp().isFullOuterJoin()) {
|
||||
return new CoalescedJoinFields(joinedFields.getAllFields(), join.getUsingColNames(), join.getJoinOp());
|
||||
} else {
|
||||
// TODO: Support FULL OUTER JOIN USING with proper COALESCE semantics
|
||||
return joinedFields;
|
||||
}
|
||||
}
|
||||
|
||||
private void analyzeJoinHints(JoinRelation join) {
|
||||
if (HintNode.HINT_JOIN_BROADCAST.equals(join.getJoinHint())) {
|
||||
if (join.getJoinOp() == JoinOperator.RIGHT_OUTER_JOIN
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ import com.starrocks.common.TreeNode;
|
|||
import com.starrocks.qe.ConnectContext;
|
||||
import com.starrocks.sql.ast.AstVisitorExtendInterface;
|
||||
import com.starrocks.sql.ast.GroupByClause;
|
||||
import com.starrocks.sql.ast.JoinRelation;
|
||||
import com.starrocks.sql.ast.OrderByElement;
|
||||
import com.starrocks.sql.ast.ParseNode;
|
||||
import com.starrocks.sql.ast.Relation;
|
||||
|
|
@ -40,6 +41,7 @@ import com.starrocks.sql.ast.expression.GroupingFunctionCallExpr;
|
|||
import com.starrocks.sql.ast.expression.IntLiteral;
|
||||
import com.starrocks.sql.ast.expression.LimitElement;
|
||||
import com.starrocks.sql.ast.expression.SlotRef;
|
||||
import com.starrocks.sql.ast.expression.TableName;
|
||||
import com.starrocks.sql.ast.expression.UserVariableExpr;
|
||||
import com.starrocks.sql.common.StarRocksPlannerException;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
|
|
@ -49,6 +51,7 @@ import java.util.Arrays;
|
|||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
|
@ -211,9 +214,16 @@ public class SelectAnalyzer {
|
|||
|
||||
for (SelectListItem item : selectList.getItems()) {
|
||||
if (item.isStar()) {
|
||||
List<Field> fields = (item.getTblName() == null ? scope.getRelationFields().getAllFields()
|
||||
: scope.getRelationFields().resolveFieldsWithPrefix(item.getTblName()))
|
||||
.stream().filter(Field::isVisible)
|
||||
List<Field> fields;
|
||||
|
||||
if (getJoinRelationWithUsing(fromRelation) != null) {
|
||||
fields = getFieldsForJoinUsingStar(fromRelation, scope, item.getTblName());
|
||||
} else {
|
||||
fields = (item.getTblName() == null ? scope.getRelationFields().getAllFields()
|
||||
: scope.getRelationFields().resolveFieldsWithPrefix(item.getTblName()));
|
||||
}
|
||||
|
||||
fields = fields.stream().filter(Field::isVisible)
|
||||
.filter(field -> !field.getName().startsWith(FeConstants.GENERATED_PARTITION_COLUMN_PREFIX))
|
||||
.collect(Collectors.toList());
|
||||
List<String> unknownTypeFields = fields.stream()
|
||||
|
|
@ -872,4 +882,120 @@ public class SelectAnalyzer {
|
|||
}
|
||||
return allFields;
|
||||
}
|
||||
|
||||
private JoinRelation getJoinRelationWithUsing(Relation relation) {
|
||||
if (relation instanceof JoinRelation joinRelation) {
|
||||
if (CollectionUtils.isNotEmpty(joinRelation.getUsingColNames())) {
|
||||
return joinRelation;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs field list for SELECT * in JOIN USING context per SQL standard.
|
||||
*
|
||||
* SQL standard specifies that JOIN USING columns should appear only once in SELECT *,
|
||||
* unlike JOIN ON where both L.col and R.col would appear. The column ordering follows
|
||||
* specific rules based on JOIN type:
|
||||
*
|
||||
* MYSQL Standard column order:
|
||||
* - INNER/LEFT/FULL JOIN: [USING columns, left non-USING, right non-USING]
|
||||
* - RIGHT JOIN: [USING columns, right non-USING, left non-USING]
|
||||
*
|
||||
* USING column selection (COALESCE semantics):
|
||||
* - RIGHT JOIN: prefer right table's field (for non-null values)
|
||||
* - Other JOINs: prefer left table's field
|
||||
*
|
||||
* Examples:
|
||||
* - SELECT * FROM t1(a,b,c) JOIN t2(a,d) USING(a) -> [a, b, c, d]
|
||||
* - SELECT * FROM t1(a,b,c) RIGHT JOIN t2(a,d) USING(a) -> [a, d, b, c]
|
||||
*
|
||||
* @param fromRelation The JOIN relation containing USING clause
|
||||
* @param scope Current scope with all available fields
|
||||
* @param tblName Optional table qualifier (e.g., t1.* vs *)
|
||||
* @return Properly ordered field list for SELECT * with USING columns appearing once
|
||||
*/
|
||||
private List<Field> getFieldsForJoinUsingStar(Relation fromRelation, Scope scope, TableName tblName) {
|
||||
if (tblName != null) {
|
||||
// Qualified SELECT (e.g., SELECT t1.* FROM t1 JOIN t2 USING(id))
|
||||
// Return only fields from specified table, no special USING handling needed
|
||||
return scope.getRelationFields().resolveFieldsWithPrefix(tblName);
|
||||
}
|
||||
|
||||
JoinRelation joinRelation = getJoinRelationWithUsing(fromRelation);
|
||||
|
||||
// TODO(stephen): Support FULL OUTER JOIN USING with proper COALESCE semantics
|
||||
if (joinRelation.getJoinOp().isFullOuterJoin()) {
|
||||
return scope.getRelationFields().getAllFields();
|
||||
}
|
||||
|
||||
Set<String> usingColSet = joinRelation.getUsingColNames().stream()
|
||||
.map(String::toLowerCase)
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
List<Field> allFields = scope.getRelationFields().getAllFields();
|
||||
|
||||
// Step 1: Add USING columns once with appropriate table preference
|
||||
Map<String, Field> usingFields = new LinkedHashMap<>();
|
||||
|
||||
// Determine which table to prefer for USING columns (COALESCE semantics)
|
||||
boolean preferRightTable = joinRelation.getJoinOp() != null && joinRelation.getJoinOp().isRightJoin();
|
||||
|
||||
// Iterate through all fields to find USING columns, selecting appropriate table's field
|
||||
for (Field field : allFields) {
|
||||
if (field.getName() != null && usingColSet.contains(field.getName().toLowerCase())) {
|
||||
String key = field.getName().toLowerCase();
|
||||
if (!usingFields.containsKey(key)) {
|
||||
// First occurrence: always use it
|
||||
usingFields.put(key, field);
|
||||
} else if (preferRightTable) {
|
||||
// Second occurrence in RIGHT JOIN: replace left table field with right table field
|
||||
usingFields.put(key, field);
|
||||
}
|
||||
}
|
||||
}
|
||||
List<Field> result = new ArrayList<>(usingFields.values());
|
||||
|
||||
// Step 2: Add non-USING columns in MYSQL SQL standard compliant order
|
||||
// Get original field counts to accurately separate left and right table fields
|
||||
int leftFieldCount = joinRelation.getLeft().getScope().getRelationFields().getAllFields().size();
|
||||
int rightFieldCount = joinRelation.getRight().getScope().getRelationFields().getAllFields().size();
|
||||
|
||||
if (preferRightTable) {
|
||||
// RIGHT JOIN: [USING cols, right non-USING, left non-USING] per SQL standard
|
||||
addNonUsingFieldsByCount(allFields, usingColSet, result, leftFieldCount, rightFieldCount, false);
|
||||
addNonUsingFieldsByCount(allFields, usingColSet, result, leftFieldCount, rightFieldCount, true);
|
||||
} else {
|
||||
// Other JOINs: [USING cols, left non-USING, right non-USING] per SQL standard
|
||||
addNonUsingFieldsByCount(allFields, usingColSet, result, leftFieldCount, rightFieldCount, true);
|
||||
addNonUsingFieldsByCount(allFields, usingColSet, result, leftFieldCount, rightFieldCount, false);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private void addNonUsingFieldsByCount(List<Field> allFields, Set<String> usingColSet,
|
||||
List<Field> result, int leftFieldCount, int rightFieldCount,
|
||||
boolean addLeftTable) {
|
||||
int startIdx;
|
||||
int endIdx;
|
||||
if (addLeftTable) {
|
||||
// Add left table fields: indices [0, leftFieldCount)
|
||||
startIdx = 0;
|
||||
endIdx = leftFieldCount;
|
||||
} else {
|
||||
// Add right table fields: indices [leftFieldCount, leftFieldCount + rightFieldCount)
|
||||
startIdx = leftFieldCount;
|
||||
endIdx = leftFieldCount + rightFieldCount;
|
||||
}
|
||||
|
||||
// Add non-USING fields from the specified table range
|
||||
for (int i = startIdx; i < endIdx && i < allFields.size(); i++) {
|
||||
Field field = allFields.get(i);
|
||||
if (field.getName() == null || !usingColSet.contains(field.getName().toLowerCase())) {
|
||||
result.add(field);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -93,6 +93,7 @@ public class AnalyzeJoinTest {
|
|||
"the most similar input is {<EOF>, ';'}.");
|
||||
analyzeFail("select v1 from (t0 join tnotnull using(v1)), t1", "Column 'v1' is ambiguous");
|
||||
analyzeSuccess("select a.v1 from (t0 a join tnotnull b using(v1)), t1");
|
||||
analyzeSuccess("select v1 from t0 join tnotnull using(v1)");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -1463,6 +1463,7 @@ public class DistributedEnvPlanWithCostTest extends DistributedEnvPlanTestBase {
|
|||
" | equal join conjunct: [1: d_datekey, INT, true] = [18: d_datekey, INT, true]\n" +
|
||||
" | build runtime filters:\n" +
|
||||
" | - filter_id = 0, build_expr = (18: d_datekey), remote = false\n" +
|
||||
" | output columns: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 35\n" +
|
||||
" | cardinality: 2300\n" +
|
||||
" | \n" +
|
||||
" |----2:AGGREGATE (update finalize)");
|
||||
|
|
|
|||
|
|
@ -0,0 +1,356 @@
|
|||
-- name: test_join_with_using
|
||||
DROP DATABASE IF EXISTS test_join_with_using;
|
||||
-- result:
|
||||
-- !result
|
||||
CREATE DATABASE test_join_with_using;
|
||||
-- result:
|
||||
-- !result
|
||||
use test_join_with_using;
|
||||
-- result:
|
||||
-- !result
|
||||
CREATE TABLE left_table (
|
||||
name VARCHAR(20),
|
||||
id1 INT,
|
||||
age INT,
|
||||
city VARCHAR(20),
|
||||
id2 INT,
|
||||
salary DECIMAL(10,2),
|
||||
status VARCHAR(10)
|
||||
) DUPLICATE KEY(name) DISTRIBUTED BY HASH(name) BUCKETS 1 PROPERTIES ("replication_num" = "1");
|
||||
-- result:
|
||||
-- !result
|
||||
CREATE TABLE right_table (
|
||||
dept VARCHAR(20),
|
||||
id1 INT,
|
||||
bonus DECIMAL(8,2),
|
||||
id2 INT
|
||||
) DUPLICATE KEY(dept) DISTRIBUTED BY HASH(dept) BUCKETS 1 PROPERTIES ("replication_num" = "1");
|
||||
-- result:
|
||||
-- !result
|
||||
INSERT INTO left_table VALUES
|
||||
('Alice', 1, 25, 'New York', 100, 5000.00, 'Active'),
|
||||
('Bob', 2, 30, 'Boston', 200, 6000.00, 'Active'),
|
||||
('Charlie', 3, 35, 'Chicago', 300, 7000.00, 'Inactive'),
|
||||
('David', 4, 28, 'Denver', NULL, 5500.00, 'Active'),
|
||||
('Eve', 5, 32, 'Seattle', 500, 6500.00, 'Active'),
|
||||
('Frank', 6, 40, NULL, 600, 8000.00, 'Inactive');
|
||||
-- result:
|
||||
-- !result
|
||||
INSERT INTO right_table VALUES
|
||||
('Engineering', 1, 1000.00, 100),
|
||||
('Marketing', 2, 800.00, 200),
|
||||
('Sales', 7, 1200.00, 700),
|
||||
('HR', 8, 900.00, NULL),
|
||||
('Finance', 9, NULL, 900);
|
||||
-- result:
|
||||
-- !result
|
||||
SELECT * FROM left_table JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
-- result:
|
||||
1 100 Alice 25 New York 5000.00 Active Engineering 1000.00
|
||||
2 200 Bob 30 Boston 6000.00 Active Marketing 800.00
|
||||
-- !result
|
||||
SELECT id1, id2 FROM left_table JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
-- result:
|
||||
1 100
|
||||
2 200
|
||||
-- !result
|
||||
SELECT * FROM left_table LEFT JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
-- result:
|
||||
1 100 Alice 25 New York 5000.00 Active Engineering 1000.00
|
||||
2 200 Bob 30 Boston 6000.00 Active Marketing 800.00
|
||||
3 300 Charlie 35 Chicago 7000.00 Inactive None None
|
||||
4 None David 28 Denver 5500.00 Active None None
|
||||
5 500 Eve 32 Seattle 6500.00 Active None None
|
||||
6 600 Frank 40 None 8000.00 Inactive None None
|
||||
-- !result
|
||||
SELECT id1, id2 FROM left_table LEFT JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
-- result:
|
||||
1 100
|
||||
2 200
|
||||
3 300
|
||||
4 None
|
||||
5 500
|
||||
6 600
|
||||
-- !result
|
||||
SELECT * FROM left_table RIGHT JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
-- result:
|
||||
1 100 Engineering 1000.00 Alice 25 New York 5000.00 Active
|
||||
2 200 Marketing 800.00 Bob 30 Boston 6000.00 Active
|
||||
7 700 Sales 1200.00 None None None None None
|
||||
8 None HR 900.00 None None None None None
|
||||
9 900 Finance None None None None None None
|
||||
-- !result
|
||||
SELECT id1, id2 FROM left_table RIGHT JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
-- result:
|
||||
1 100
|
||||
2 200
|
||||
7 700
|
||||
8 None
|
||||
9 900
|
||||
-- !result
|
||||
SELECT * FROM left_table FULL OUTER JOIN right_table USING(id1, id2) ORDER BY right_table.dept, right_table.bonus;
|
||||
-- result:
|
||||
Eve 5 32 Seattle 500 6500.00 Active None None None None
|
||||
Charlie 3 35 Chicago 300 7000.00 Inactive None None None None
|
||||
David 4 28 Denver None 5500.00 Active None None None None
|
||||
Frank 6 40 None 600 8000.00 Inactive None None None None
|
||||
Alice 1 25 New York 100 5000.00 Active Engineering 1 1000.00 100
|
||||
None None None None None None None Finance 9 None 900
|
||||
None None None None None None None HR 8 900.00 None
|
||||
Bob 2 30 Boston 200 6000.00 Active Marketing 2 800.00 200
|
||||
None None None None None None None Sales 7 1200.00 700
|
||||
-- !result
|
||||
SELECT L.id1, R.id1, id1 FROM left_table L JOIN right_table R USING(id1, id2) ORDER BY L.id1;
|
||||
-- result:
|
||||
1 1 1
|
||||
2 2 2
|
||||
-- !result
|
||||
SELECT * FROM left_table JOIN right_table USING(id1) ORDER BY id1;
|
||||
-- result:
|
||||
1 Alice 25 New York 100 5000.00 Active Engineering 1000.00 100
|
||||
2 Bob 30 Boston 200 6000.00 Active Marketing 800.00 200
|
||||
-- !result
|
||||
SELECT * FROM left_table LEFT JOIN right_table USING(id1, id2) WHERE id1 IS NULL OR id2 IS NULL;
|
||||
-- result:
|
||||
4 None David 28 Denver 5500.00 Active None None
|
||||
-- !result
|
||||
SELECT * FROM left_table RIGHT SEMI JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
-- result:
|
||||
1 100 Engineering 1000.00
|
||||
2 200 Marketing 800.00
|
||||
-- !result
|
||||
SELECT * FROM left_table RIGHT ANTI JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
-- result:
|
||||
7 700 Sales 1200.00
|
||||
8 None HR 900.00
|
||||
9 900 Finance None
|
||||
-- !result
|
||||
SELECT * FROM left_table CROSS JOIN right_table ORDER BY left_table.id1, right_table.id1 LIMIT 5;
|
||||
-- result:
|
||||
Alice 1 25 New York 100 5000.00 Active Engineering 1 1000.00 100
|
||||
Alice 1 25 New York 100 5000.00 Active Marketing 2 800.00 200
|
||||
Alice 1 25 New York 100 5000.00 Active Sales 7 1200.00 700
|
||||
Alice 1 25 New York 100 5000.00 Active HR 8 900.00 None
|
||||
Alice 1 25 New York 100 5000.00 Active Finance 9 None 900
|
||||
-- !result
|
||||
SELECT * FROM left_table LEFT SEMI JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
-- result:
|
||||
1 100 Alice 25 New York 5000.00 Active
|
||||
2 200 Bob 30 Boston 6000.00 Active
|
||||
-- !result
|
||||
SELECT * FROM left_table LEFT ANTI JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
-- result:
|
||||
3 300 Charlie 35 Chicago 7000.00 Inactive
|
||||
4 None David 28 Denver 5500.00 Active
|
||||
5 500 Eve 32 Seattle 6500.00 Active
|
||||
6 600 Frank 40 None 8000.00 Inactive
|
||||
-- !result
|
||||
SELECT * FROM left_table JOIN right_table USING(id1) ORDER BY id1;
|
||||
-- result:
|
||||
1 Alice 25 New York 100 5000.00 Active Engineering 1000.00 100
|
||||
2 Bob 30 Boston 200 6000.00 Active Marketing 800.00 200
|
||||
-- !result
|
||||
SELECT * FROM left_table LEFT JOIN right_table USING(id1) ORDER BY id1;
|
||||
-- result:
|
||||
1 Alice 25 New York 100 5000.00 Active Engineering 1000.00 100
|
||||
2 Bob 30 Boston 200 6000.00 Active Marketing 800.00 200
|
||||
3 Charlie 35 Chicago 300 7000.00 Inactive None None None
|
||||
4 David 28 Denver None 5500.00 Active None None None
|
||||
5 Eve 32 Seattle 500 6500.00 Active None None None
|
||||
6 Frank 40 None 600 8000.00 Inactive None None None
|
||||
-- !result
|
||||
SELECT * FROM left_table RIGHT JOIN right_table USING(id1) ORDER BY id1;
|
||||
-- result:
|
||||
1 Engineering 1000.00 100 Alice 25 New York 100 5000.00 Active
|
||||
2 Marketing 800.00 200 Bob 30 Boston 200 6000.00 Active
|
||||
7 Sales 1200.00 700 None None None None None None
|
||||
8 HR 900.00 None None None None None None None
|
||||
9 Finance None 900 None None None None None None
|
||||
-- !result
|
||||
SELECT * FROM left_table FULL OUTER JOIN right_table USING(id1) ORDER BY right_table.dept, right_table.bonus;
|
||||
-- result:
|
||||
Eve 5 32 Seattle 500 6500.00 Active None None None None
|
||||
David 4 28 Denver None 5500.00 Active None None None None
|
||||
Charlie 3 35 Chicago 300 7000.00 Inactive None None None None
|
||||
Frank 6 40 None 600 8000.00 Inactive None None None None
|
||||
Alice 1 25 New York 100 5000.00 Active Engineering 1 1000.00 100
|
||||
None None None None None None None Finance 9 None 900
|
||||
None None None None None None None HR 8 900.00 None
|
||||
Bob 2 30 Boston 200 6000.00 Active Marketing 2 800.00 200
|
||||
None None None None None None None Sales 7 1200.00 700
|
||||
-- !result
|
||||
SELECT * FROM left_table LEFT SEMI JOIN right_table USING(id1) ORDER BY id1;
|
||||
-- result:
|
||||
1 Alice 25 New York 100 5000.00 Active
|
||||
2 Bob 30 Boston 200 6000.00 Active
|
||||
-- !result
|
||||
SELECT * FROM left_table LEFT ANTI JOIN right_table USING(id1) ORDER BY id1;
|
||||
-- result:
|
||||
3 Charlie 35 Chicago 300 7000.00 Inactive
|
||||
4 David 28 Denver None 5500.00 Active
|
||||
5 Eve 32 Seattle 500 6500.00 Active
|
||||
6 Frank 40 None 600 8000.00 Inactive
|
||||
-- !result
|
||||
SELECT * FROM left_table RIGHT SEMI JOIN right_table USING(id1) ORDER BY id1;
|
||||
-- result:
|
||||
1 Engineering 1000.00 100
|
||||
2 Marketing 800.00 200
|
||||
-- !result
|
||||
SELECT * FROM left_table RIGHT ANTI JOIN right_table USING(id1) ORDER BY id1;
|
||||
-- result:
|
||||
7 Sales 1200.00 700
|
||||
8 HR 900.00 None
|
||||
9 Finance None 900
|
||||
-- !result
|
||||
SELECT id1 AS join_key FROM left_table JOIN right_table USING(id1) ORDER BY join_key;
|
||||
-- result:
|
||||
1
|
||||
2
|
||||
-- !result
|
||||
SELECT l.id1 l_id1 FROM left_table l JOIN right_table r USING(id1) ORDER BY l_id1;
|
||||
-- result:
|
||||
1
|
||||
2
|
||||
-- !result
|
||||
SELECT l.id2 l_id1 FROM left_table l right JOIN right_table r USING(id1) ORDER BY l_id1;
|
||||
-- result:
|
||||
None
|
||||
None
|
||||
None
|
||||
100
|
||||
200
|
||||
-- !result
|
||||
SELECT r.id2 l_id1 FROM left_table l right JOIN right_table r USING(id1) ORDER BY l_id1;
|
||||
-- result:
|
||||
None
|
||||
100
|
||||
200
|
||||
700
|
||||
900
|
||||
-- !result
|
||||
SELECT id1 AS key1, id2 AS key2 FROM left_table JOIN right_table USING(id1, id2) ORDER BY key1;
|
||||
-- result:
|
||||
1 100
|
||||
2 200
|
||||
-- !result
|
||||
SELECT L.id1 AS left_key, R.id1 AS right_key, id1 AS coalesced_key
|
||||
FROM left_table L JOIN right_table R USING(id1) ORDER BY left_key;
|
||||
-- result:
|
||||
1 1 1
|
||||
2 2 2
|
||||
-- !result
|
||||
SELECT id1 AS right_preferred_key FROM left_table RIGHT JOIN right_table USING(id1) ORDER BY right_preferred_key;
|
||||
-- result:
|
||||
1
|
||||
2
|
||||
7
|
||||
8
|
||||
9
|
||||
-- !result
|
||||
SELECT
|
||||
id1 AS main_id,
|
||||
name AS emp_name,
|
||||
dept AS department,
|
||||
salary AS emp_salary,
|
||||
bonus AS emp_bonus
|
||||
FROM left_table LEFT JOIN right_table USING(id1, id2)
|
||||
ORDER BY main_id;
|
||||
-- result:
|
||||
1 Alice Engineering 5000.00 1000.00
|
||||
2 Bob Marketing 6000.00 800.00
|
||||
3 Charlie None 7000.00 None
|
||||
4 David None 5500.00 None
|
||||
5 Eve None 6500.00 None
|
||||
6 Frank None 8000.00 None
|
||||
-- !result
|
||||
SELECT id1 AS pk, name, dept
|
||||
FROM left_table LEFT JOIN right_table USING(id1)
|
||||
WHERE id1 > 2
|
||||
ORDER BY pk;
|
||||
-- result:
|
||||
3 Charlie None
|
||||
4 David None
|
||||
5 Eve None
|
||||
6 Frank None
|
||||
-- !result
|
||||
SELECT * FROM left_table WHERE id1 NOT IN (SELECT id1 FROM right_table WHERE id1 IS NOT NULL) ORDER BY id1;
|
||||
-- result:
|
||||
Charlie 3 35 Chicago 300 7000.00 Inactive
|
||||
David 4 28 Denver None 5500.00 Active
|
||||
Eve 5 32 Seattle 500 6500.00 Active
|
||||
Frank 6 40 None 600 8000.00 Inactive
|
||||
-- !result
|
||||
SELECT dept, id1, name FROM left_table LEFT JOIN right_table USING(id1) ORDER BY id1;
|
||||
-- result:
|
||||
Engineering 1 Alice
|
||||
Marketing 2 Bob
|
||||
None 3 Charlie
|
||||
None 4 David
|
||||
None 5 Eve
|
||||
None 6 Frank
|
||||
-- !result
|
||||
SELECT * FROM (
|
||||
SELECT id1, name, age FROM left_table WHERE id1 <= 5
|
||||
) L JOIN (
|
||||
SELECT id1, dept FROM right_table WHERE id1 <= 5
|
||||
) R USING(id1) ORDER BY id1;
|
||||
-- result:
|
||||
1 Alice 25 Engineering
|
||||
2 Bob 30 Marketing
|
||||
-- !result
|
||||
SELECT outer_query.id1, outer_query.total_count
|
||||
FROM (
|
||||
SELECT id1, COUNT(*) as total_count
|
||||
FROM left_table L JOIN right_table R USING(id1)
|
||||
GROUP BY id1
|
||||
) outer_query
|
||||
WHERE outer_query.total_count > 0
|
||||
ORDER BY outer_query.id1;
|
||||
-- result:
|
||||
1 1
|
||||
2 1
|
||||
-- !result
|
||||
SELECT id1, name, R.dept
|
||||
FROM left_table L JOIN right_table R USING(id1)
|
||||
WHERE L.id1 IN (
|
||||
SELECT id1 FROM left_table WHERE salary > 6000
|
||||
)
|
||||
ORDER BY L.id1;
|
||||
-- result:
|
||||
-- !result
|
||||
SELECT sub.id1, sub.id2, sub.emp_info, sub.dept_info
|
||||
FROM (
|
||||
SELECT
|
||||
id1,
|
||||
id2,
|
||||
CONCAT(name, '-', CAST(age AS VARCHAR)) as emp_info,
|
||||
COALESCE(dept, 'Unknown') as dept_info
|
||||
FROM left_table L LEFT JOIN right_table R USING(id1, id2)
|
||||
WHERE id1 IS NOT NULL
|
||||
) sub
|
||||
WHERE sub.emp_info LIKE '%Alice%' OR sub.dept_info = 'Engineering'
|
||||
ORDER BY sub.id1;
|
||||
-- result:
|
||||
1 100 Alice-25 Engineering
|
||||
-- !result
|
||||
SELECT
|
||||
L_AGG.id1,
|
||||
L_AGG.avg_salary,
|
||||
R_AGG.total_bonus
|
||||
FROM (
|
||||
SELECT id1, AVG(salary) as avg_salary
|
||||
FROM left_table
|
||||
WHERE salary IS NOT NULL
|
||||
GROUP BY id1
|
||||
) L_AGG
|
||||
JOIN (
|
||||
SELECT id1, SUM(bonus) as total_bonus
|
||||
FROM right_table
|
||||
WHERE bonus IS NOT NULL
|
||||
GROUP BY id1
|
||||
) R_AGG USING(id1)
|
||||
ORDER BY L_AGG.id1;
|
||||
-- result:
|
||||
1 5000.00000000 1000.00
|
||||
2 6000.00000000 800.00
|
||||
-- !result
|
||||
|
|
@ -0,0 +1,197 @@
|
|||
-- name: test_join_with_using
|
||||
DROP DATABASE IF EXISTS test_join_with_using;
|
||||
CREATE DATABASE test_join_with_using;
|
||||
use test_join_with_using;
|
||||
|
||||
CREATE TABLE left_table (
|
||||
name VARCHAR(20),
|
||||
id1 INT,
|
||||
age INT,
|
||||
city VARCHAR(20),
|
||||
id2 INT,
|
||||
salary DECIMAL(10,2),
|
||||
status VARCHAR(10)
|
||||
) DUPLICATE KEY(name) DISTRIBUTED BY HASH(name) BUCKETS 1 PROPERTIES ("replication_num" = "1");
|
||||
|
||||
CREATE TABLE right_table (
|
||||
dept VARCHAR(20),
|
||||
id1 INT,
|
||||
bonus DECIMAL(8,2),
|
||||
id2 INT
|
||||
) DUPLICATE KEY(dept) DISTRIBUTED BY HASH(dept) BUCKETS 1 PROPERTIES ("replication_num" = "1");
|
||||
|
||||
INSERT INTO left_table VALUES
|
||||
('Alice', 1, 25, 'New York', 100, 5000.00, 'Active'),
|
||||
('Bob', 2, 30, 'Boston', 200, 6000.00, 'Active'),
|
||||
('Charlie', 3, 35, 'Chicago', 300, 7000.00, 'Inactive'),
|
||||
('David', 4, 28, 'Denver', NULL, 5500.00, 'Active'),
|
||||
('Eve', 5, 32, 'Seattle', 500, 6500.00, 'Active'),
|
||||
('Frank', 6, 40, NULL, 600, 8000.00, 'Inactive');
|
||||
|
||||
INSERT INTO right_table VALUES
|
||||
('Engineering', 1, 1000.00, 100),
|
||||
('Marketing', 2, 800.00, 200),
|
||||
('Sales', 7, 1200.00, 700),
|
||||
('HR', 8, 900.00, NULL),
|
||||
('Finance', 9, NULL, 900);
|
||||
|
||||
-- Test INNER JOIN with two USING columns
|
||||
SELECT * FROM left_table JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
|
||||
-- Test INNER JOIN selecting only USING columns
|
||||
SELECT id1, id2 FROM left_table JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
|
||||
-- Test LEFT OUTER JOIN
|
||||
SELECT * FROM left_table LEFT JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
|
||||
-- Test LEFT OUTER JOIN selecting only USING columns
|
||||
SELECT id1, id2 FROM left_table LEFT JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
|
||||
-- Test RIGHT OUTER JOIN (critical test for column ordering)
|
||||
SELECT * FROM left_table RIGHT JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
|
||||
-- Test RIGHT OUTER JOIN selecting only USING columns (should prefer right table values)
|
||||
SELECT id1, id2 FROM left_table RIGHT JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
|
||||
-- Test FULL OUTER JOIN
|
||||
SELECT * FROM left_table FULL OUTER JOIN right_table USING(id1, id2) ORDER BY right_table.dept, right_table.bonus;
|
||||
|
||||
-- Test with table aliases and qualified references
|
||||
SELECT L.id1, R.id1, id1 FROM left_table L JOIN right_table R USING(id1, id2) ORDER BY L.id1;
|
||||
|
||||
-- Test single USING column
|
||||
SELECT * FROM left_table JOIN right_table USING(id1) ORDER BY id1;
|
||||
|
||||
-- Test NULL handling in USING columns
|
||||
SELECT * FROM left_table LEFT JOIN right_table USING(id1, id2) WHERE id1 IS NULL OR id2 IS NULL;
|
||||
|
||||
-- Test RIGHT SEMI JOIN
|
||||
SELECT * FROM left_table RIGHT SEMI JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
|
||||
-- Test RIGHT ANTI JOIN
|
||||
SELECT * FROM left_table RIGHT ANTI JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
|
||||
-- Test CROSS JOIN
|
||||
SELECT * FROM left_table CROSS JOIN right_table ORDER BY left_table.id1, right_table.id1 LIMIT 5;
|
||||
|
||||
-- Test LEFT SEMI JOIN
|
||||
SELECT * FROM left_table LEFT SEMI JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
|
||||
-- Test LEFT ANTI JOIN
|
||||
SELECT * FROM left_table LEFT ANTI JOIN right_table USING(id1, id2) ORDER BY id1;
|
||||
|
||||
-- Test single column USING cases
|
||||
SELECT * FROM left_table JOIN right_table USING(id1) ORDER BY id1;
|
||||
|
||||
SELECT * FROM left_table LEFT JOIN right_table USING(id1) ORDER BY id1;
|
||||
|
||||
SELECT * FROM left_table RIGHT JOIN right_table USING(id1) ORDER BY id1;
|
||||
|
||||
SELECT * FROM left_table FULL OUTER JOIN right_table USING(id1) ORDER BY right_table.dept, right_table.bonus;
|
||||
|
||||
SELECT * FROM left_table LEFT SEMI JOIN right_table USING(id1) ORDER BY id1;
|
||||
|
||||
SELECT * FROM left_table LEFT ANTI JOIN right_table USING(id1) ORDER BY id1;
|
||||
|
||||
SELECT * FROM left_table RIGHT SEMI JOIN right_table USING(id1) ORDER BY id1;
|
||||
|
||||
SELECT * FROM left_table RIGHT ANTI JOIN right_table USING(id1) ORDER BY id1;
|
||||
|
||||
-- Test single column USING with aliases
|
||||
SELECT id1 AS join_key FROM left_table JOIN right_table USING(id1) ORDER BY join_key;
|
||||
|
||||
SELECT l.id1 l_id1 FROM left_table l JOIN right_table r USING(id1) ORDER BY l_id1;
|
||||
|
||||
SELECT l.id2 l_id1 FROM left_table l right JOIN right_table r USING(id1) ORDER BY l_id1;
|
||||
|
||||
SELECT r.id2 l_id1 FROM left_table l right JOIN right_table r USING(id1) ORDER BY l_id1;
|
||||
|
||||
SELECT id1 AS key1, id2 AS key2 FROM left_table JOIN right_table USING(id1, id2) ORDER BY key1;
|
||||
|
||||
-- Test USING column with table alias and column alias
|
||||
SELECT L.id1 AS left_key, R.id1 AS right_key, id1 AS coalesced_key
|
||||
FROM left_table L JOIN right_table R USING(id1) ORDER BY left_key;
|
||||
|
||||
-- Test RIGHT JOIN with single column and alias (should prefer right table value)
|
||||
SELECT id1 AS right_preferred_key FROM left_table RIGHT JOIN right_table USING(id1) ORDER BY right_preferred_key;
|
||||
|
||||
-- Test complex alias scenarios
|
||||
SELECT
|
||||
id1 AS main_id,
|
||||
name AS emp_name,
|
||||
dept AS department,
|
||||
salary AS emp_salary,
|
||||
bonus AS emp_bonus
|
||||
FROM left_table LEFT JOIN right_table USING(id1, id2)
|
||||
ORDER BY main_id;
|
||||
|
||||
-- Test USING with WHERE clause on aliased columns
|
||||
SELECT id1 AS pk, name, dept
|
||||
FROM left_table LEFT JOIN right_table USING(id1)
|
||||
WHERE id1 > 2
|
||||
ORDER BY pk;
|
||||
|
||||
-- Test NULL AWARE LEFT ANTI JOIN (if supported)
|
||||
SELECT * FROM left_table WHERE id1 NOT IN (SELECT id1 FROM right_table WHERE id1 IS NOT NULL) ORDER BY id1;
|
||||
|
||||
-- Test USING columns in different SELECT positions
|
||||
SELECT dept, id1, name FROM left_table LEFT JOIN right_table USING(id1) ORDER BY id1;
|
||||
|
||||
-- Test multiple single-column USING joins in sequence (subquery style)
|
||||
SELECT * FROM (
|
||||
SELECT id1, name, age FROM left_table WHERE id1 <= 5
|
||||
) L JOIN (
|
||||
SELECT id1, dept FROM right_table WHERE id1 <= 5
|
||||
) R USING(id1) ORDER BY id1;
|
||||
|
||||
-- Test subquery with JOIN USING as inner query
|
||||
SELECT outer_query.id1, outer_query.total_count
|
||||
FROM (
|
||||
SELECT id1, COUNT(*) as total_count
|
||||
FROM left_table L JOIN right_table R USING(id1)
|
||||
GROUP BY id1
|
||||
) outer_query
|
||||
WHERE outer_query.total_count > 0
|
||||
ORDER BY outer_query.id1;
|
||||
|
||||
-- Test JOIN USING with subquery in WHERE clause
|
||||
SELECT id1, name, R.dept
|
||||
FROM left_table L JOIN right_table R USING(id1)
|
||||
WHERE L.id1 IN (
|
||||
SELECT id1 FROM left_table WHERE salary > 6000
|
||||
)
|
||||
ORDER BY L.id1;
|
||||
|
||||
-- Test complex subquery with multiple USING columns
|
||||
SELECT sub.id1, sub.id2, sub.emp_info, sub.dept_info
|
||||
FROM (
|
||||
SELECT
|
||||
id1,
|
||||
id2,
|
||||
CONCAT(name, '-', CAST(age AS VARCHAR)) as emp_info,
|
||||
COALESCE(dept, 'Unknown') as dept_info
|
||||
FROM left_table L LEFT JOIN right_table R USING(id1, id2)
|
||||
WHERE id1 IS NOT NULL
|
||||
) sub
|
||||
WHERE sub.emp_info LIKE '%Alice%' OR sub.dept_info = 'Engineering'
|
||||
ORDER BY sub.id1;
|
||||
|
||||
-- Test JOIN USING between subqueries with aggregation
|
||||
SELECT
|
||||
L_AGG.id1,
|
||||
L_AGG.avg_salary,
|
||||
R_AGG.total_bonus
|
||||
FROM (
|
||||
SELECT id1, AVG(salary) as avg_salary
|
||||
FROM left_table
|
||||
WHERE salary IS NOT NULL
|
||||
GROUP BY id1
|
||||
) L_AGG
|
||||
JOIN (
|
||||
SELECT id1, SUM(bonus) as total_bonus
|
||||
FROM right_table
|
||||
WHERE bonus IS NOT NULL
|
||||
GROUP BY id1
|
||||
) R_AGG USING(id1)
|
||||
ORDER BY L_AGG.id1;
|
||||
|
||||
Loading…
Reference in New Issue