[Enhancement] Optimize removeDuplicateField performance (#62938)
Signed-off-by: shuming.li <ming.moriarty@gmail.com>
This commit is contained in:
parent
54317498bc
commit
daa33f095e
|
|
@ -48,8 +48,10 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
|
@ -808,27 +810,65 @@ public class SelectAnalyzer {
|
|||
ExpressionAnalyzer.analyzeExpression(expr, analyzeState, scope, session);
|
||||
}
|
||||
|
||||
// Use a HashSet to store unique pairs of (name, originExpression)
|
||||
// Use a custom key class or a string representation for the pair
|
||||
private static class NameExprKey {
|
||||
private final String name;
|
||||
private final Expr originExpr;
|
||||
|
||||
NameExprKey(String name, Expr originExpr) {
|
||||
this.name = name;
|
||||
this.originExpr = originExpr;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
NameExprKey that = (NameExprKey) o;
|
||||
return Objects.equals(name, that.name) &&
|
||||
Objects.equals(originExpr, that.originExpr);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(name, originExpr);
|
||||
}
|
||||
}
|
||||
|
||||
// The Scope used by order by allows parsing of the same column,
|
||||
// such as 'select v1 as v, v1 as v from t0 order by v'
|
||||
// but normal parsing does not allow it. So add a de-duplication operation here.
|
||||
private List<Field> removeDuplicateField(List<Field> originalFields) {
|
||||
public List<Field> removeDuplicateField(List<Field> originalFields) {
|
||||
List<Field> allFields = Lists.newArrayList();
|
||||
for (Field field : originalFields) {
|
||||
if (session.getSessionVariable().isEnableStrictOrderBy()) {
|
||||
if (field.getName() != null && field.getOriginExpression() != null &&
|
||||
allFields.stream().anyMatch(f -> f.getOriginExpression() != null
|
||||
&& f.getName() != null && field.getName().equals(f.getName())
|
||||
&& field.getOriginExpression().equals(f.getOriginExpression()))) {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
if (field.getName() != null &&
|
||||
allFields.stream().anyMatch(f -> f.getName() != null && field.getName().equals(f.getName()))) {
|
||||
continue;
|
||||
if (session.getSessionVariable().isEnableStrictOrderBy()) {
|
||||
Set<NameExprKey> visited = new HashSet<>();
|
||||
for (Field field : originalFields) {
|
||||
if (field.getName() != null && field.getOriginExpression() != null) {
|
||||
NameExprKey key = new NameExprKey(field.getName(), field.getOriginExpression());
|
||||
if (visited.contains(key)) {
|
||||
continue;
|
||||
}
|
||||
visited.add(key);
|
||||
}
|
||||
allFields.add(field);
|
||||
}
|
||||
} else {
|
||||
// Use a HashSet to store unique field names
|
||||
Set<String> visited = new HashSet<>();
|
||||
for (Field field : originalFields) {
|
||||
if (field.getName() != null) {
|
||||
if (visited.contains(field.getName())) {
|
||||
continue;
|
||||
}
|
||||
visited.add(field.getName());
|
||||
}
|
||||
allFields.add(field);
|
||||
}
|
||||
allFields.add(field);
|
||||
}
|
||||
return allFields;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,133 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package com.starrocks.sql.plan;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.starrocks.qe.ConnectContext;
|
||||
import com.starrocks.sql.analyzer.Field;
|
||||
import com.starrocks.sql.analyzer.SelectAnalyzer;
|
||||
import com.starrocks.sql.ast.expression.Expr;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Old Deduped size: 1000, time: 501 ms
|
||||
* New Deduped size: 1000, time: 18 ms
|
||||
*/
|
||||
public class SelectAnalyzerBench {
|
||||
private static final int N = 100_000;
|
||||
|
||||
// Benchmark test for removeDuplicateField optimization
|
||||
// This is a simple micro-benchmark for development/verification purposes.
|
||||
// In production, consider using JMH or a proper benchmarking framework.
|
||||
public static void main(String[] args) {
|
||||
// Mock Field and Expr classes for the benchmark
|
||||
class MockExpr extends Expr {
|
||||
private final int id;
|
||||
MockExpr(int id) {
|
||||
super();
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Expr clone() {
|
||||
return new MockExpr(id);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
MockExpr mockExpr = (MockExpr) o;
|
||||
return id == mockExpr.id;
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Integer.hashCode(id);
|
||||
}
|
||||
}
|
||||
class MockField extends Field {
|
||||
private final String name;
|
||||
private final MockExpr expr;
|
||||
MockField(String name, MockExpr expr) {
|
||||
super(name, null, null, null); // Only name and originExpression are used
|
||||
this.name = name;
|
||||
this.expr = expr;
|
||||
}
|
||||
@Override
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
@Override
|
||||
public Expr getOriginExpression() {
|
||||
return (Expr) expr;
|
||||
}
|
||||
}
|
||||
|
||||
// Generate a large list of fields with duplicates
|
||||
List<Field> fields = new ArrayList<>();
|
||||
for (int i = 0; i < N; ++i) {
|
||||
String name = "col" + (i % 1000); // 1000 unique names
|
||||
MockExpr expr = new MockExpr(i % 500); // 500 unique exprs
|
||||
fields.add(new MockField(name, expr));
|
||||
}
|
||||
|
||||
ConnectContext session = new ConnectContext();
|
||||
|
||||
{
|
||||
long start = System.currentTimeMillis();
|
||||
List<Field> deduped = removeDuplicateFieldV1(session, fields);
|
||||
long end = System.currentTimeMillis();
|
||||
System.out.println("Old Deduped size: " + deduped.size() + ", time: " + (end - start) + " ms");
|
||||
}
|
||||
{
|
||||
SelectAnalyzer analyzer = new SelectAnalyzer(session);
|
||||
long start = System.currentTimeMillis();
|
||||
List<Field> deduped = analyzer.removeDuplicateField(fields);
|
||||
long end = System.currentTimeMillis();
|
||||
System.out.println("New Deduped size: " + deduped.size() + ", time: " + (end - start) + " ms");
|
||||
}
|
||||
}
|
||||
|
||||
// The Scope used by order by allows parsing of the same column,
|
||||
// such as 'select v1 as v, v1 as v from t0 order by v'
|
||||
// but normal parsing does not allow it. So add a de-duplication operation here.
|
||||
private static List<Field> removeDuplicateFieldV1(ConnectContext session,
|
||||
List<Field> originalFields) {
|
||||
List<Field> allFields = Lists.newArrayList();
|
||||
for (Field field : originalFields) {
|
||||
if (session.getSessionVariable().isEnableStrictOrderBy()) {
|
||||
if (field.getName() != null && field.getOriginExpression() != null &&
|
||||
allFields.stream().anyMatch(f -> f.getOriginExpression() != null
|
||||
&& f.getName() != null && field.getName().equals(f.getName())
|
||||
&& field.getOriginExpression().equals(f.getOriginExpression()))) {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
if (field.getName() != null &&
|
||||
allFields.stream().anyMatch(f -> f.getName() != null && field.getName().equals(f.getName()))) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
allFields.add(field);
|
||||
}
|
||||
return allFields;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue