[Enhancement] Low cardinality optimization on analytic operator obove table functions (#63378)

Signed-off-by: satanson <ranpanf@gmail.com>
This commit is contained in:
satanson 2025-09-24 14:43:35 +08:00 committed by GitHub
parent 6904592d66
commit 0c45329a51
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 764 additions and 109 deletions

View File

@ -724,6 +724,10 @@ public abstract class Type implements Cloneable {
return PrimitiveType.STRING_TYPE_LIST.contains(this.getPrimitiveType());
}
public boolean isStringArrayType() {
return isArrayType() && ((ArrayType) this).getItemType().isStringType();
}
// only metric types have the following constraint:
// 1. don't support as key column
// 2. don't support filter

View File

@ -41,6 +41,7 @@ import com.starrocks.sql.optimizer.base.DistributionCol;
import com.starrocks.sql.optimizer.base.DistributionSpec;
import com.starrocks.sql.optimizer.base.EquivalentDescriptor;
import com.starrocks.sql.optimizer.base.HashDistributionSpec;
import com.starrocks.sql.optimizer.base.Ordering;
import com.starrocks.sql.optimizer.operator.Operator;
import com.starrocks.sql.optimizer.operator.physical.PhysicalHashAggregateOperator;
import com.starrocks.sql.optimizer.operator.physical.PhysicalHiveScanOperator;
@ -51,6 +52,7 @@ import com.starrocks.sql.optimizer.operator.physical.PhysicalScanOperator;
import com.starrocks.sql.optimizer.operator.physical.PhysicalSetOperation;
import com.starrocks.sql.optimizer.operator.physical.PhysicalTableFunctionOperator;
import com.starrocks.sql.optimizer.operator.physical.PhysicalTopNOperator;
import com.starrocks.sql.optimizer.operator.physical.PhysicalWindowOperator;
import com.starrocks.sql.optimizer.operator.scalar.BinaryPredicateOperator;
import com.starrocks.sql.optimizer.operator.scalar.CallOperator;
import com.starrocks.sql.optimizer.operator.scalar.CaseWhenOperator;
@ -85,6 +87,7 @@ import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import static com.starrocks.sql.ast.expression.BinaryType.EQ_FOR_NULL;
@ -103,6 +106,16 @@ public class DecodeCollector extends OptExpressionVisitor<DecodeInfo, DecodeInfo
public static final Set<String> LOW_CARD_AGGREGATE_FUNCTIONS = Sets.newHashSet(FunctionSet.COUNT,
FunctionSet.MULTI_DISTINCT_COUNT, FunctionSet.MAX, FunctionSet.MIN, FunctionSet.APPROX_COUNT_DISTINCT);
//TODO(by satanson): it seems that we can support more windows functions in future, at present, we only support
// LAG/LEAD/FIRST_VALUE/LAST_VALUE and the aggregations functions which can adopt low cardinality optimization
// and used as window function.
public static final Set<String> LOW_CARD_WINDOW_FUNCTIONS = Sets.newHashSet(FunctionSet.LAG, FunctionSet.LEAD,
FunctionSet.FIRST_VALUE, FunctionSet.LAST_VALUE);
static {
LOW_CARD_WINDOW_FUNCTIONS.addAll(LOW_CARD_AGGREGATE_FUNCTIONS);
}
public static final Set<String> LOW_CARD_LOCAL_AGG_FUNCTIONS = Sets.newHashSet(FunctionSet.COUNT,
FunctionSet.MAX, FunctionSet.MIN);
@ -562,6 +575,96 @@ public class DecodeCollector extends OptExpressionVisitor<DecodeInfo, DecodeInfo
return result;
}
@Override
public DecodeInfo visitPhysicalAnalytic(OptExpression optExpression, DecodeInfo context) {
if (context.outputStringColumns.isEmpty()) {
return DecodeInfo.EMPTY;
}
PhysicalWindowOperator windowOp = optExpression.getOp().cast();
DecodeInfo info = context.createOutputInfo();
ColumnRefSet disableColumns = new ColumnRefSet();
for (ColumnRefOperator key : windowOp.getAnalyticCall().keySet()) {
CallOperator windowCallOp = windowOp.getAnalyticCall().get(key);
if (!LOW_CARD_WINDOW_FUNCTIONS.contains(windowCallOp.getFnName())) {
disableColumns.union(windowCallOp.getUsedColumns());
disableColumns.union(key);
continue;
}
Map<Boolean, List<ScalarOperator>> argGroups = windowCallOp.getChildren().stream()
.filter(Predicate.not(ScalarOperator::isConstant))
.collect(Collectors.partitioningBy(ScalarOperator::isColumnRef));
List<ScalarOperator> columnRefArgs = argGroups.get(true);
List<ScalarOperator> exprArgs = argGroups.get(false);
// window function must have only one string-type column-ref argument.
if (!exprArgs.isEmpty() || columnRefArgs.size() != 1) {
disableColumns.union(windowCallOp.getUsedColumns());
disableColumns.union(key);
}
}
if (!disableColumns.isEmpty()) {
info.decodeStringColumns.union(info.inputStringColumns);
info.decodeStringColumns.intersect(disableColumns);
info.inputStringColumns.except(info.decodeStringColumns);
}
info.outputStringColumns.clear();
for (ColumnRefOperator key : windowOp.getAnalyticCall().keySet()) {
if (disableColumns.contains(key)) {
continue;
}
CallOperator value = windowOp.getAnalyticCall().get(key);
if (!info.inputStringColumns.containsAll(value.getUsedColumns())) {
continue;
}
stringAggregateExpressions.computeIfAbsent(key.getId(), x -> Lists.newArrayList()).add(value);
// if the function return type is not string or array<string>, then its output column can not be
// encoded, however the function evaluation can adopt encoded columns, for examples:
// 1. select v1, count(t.a1) over(partition by v1) select t0, unnest(t0.a1) t(a1);
// 2. select v1, count(distinct t.a1) over(partition by v1) select t0, unnest(t0.a1) t(a1);
// t0.a1 is array<string> column and low-cardinality encoded.
if (value.getType().isStringType() || value.getType().isStringArrayType()) {
info.outputStringColumns.union(key.getId());
stringRefToDefineExprMap.putIfAbsent(key.getId(), value);
expressionStringRefCounter.put(key.getId(), 1);
}
}
for (ScalarOperator partitionBy : windowOp.getPartitionExpressions()) {
Preconditions.checkArgument(partitionBy instanceof ColumnRefOperator);
ColumnRefOperator partitionByColumnRef = (ColumnRefOperator) partitionBy;
if (info.inputStringColumns.contains(partitionByColumnRef) &&
!info.decodeStringColumns.contains(partitionByColumnRef)) {
info.outputStringColumns.union(partitionByColumnRef);
}
}
for (Ordering orderBy : windowOp.getOrderByElements()) {
ColumnRefOperator orderByColumnRef = orderBy.getColumnRef();
if (info.inputStringColumns.contains(orderByColumnRef) &&
!info.decodeStringColumns.contains(orderByColumnRef)) {
info.outputStringColumns.union(orderByColumnRef);
}
}
// the columns which are not arguments of window functions can also use encoded column;
// for an example:
// select t.a1, t.a2, lead(t.a1) over(partition by t.a2) select t0, unnest(t0.a1, t0.a2) t(a1,a2);
// both t0.a1 and t0.a2 are array<string> and low-cardinality encoded, the output columns: t.a1, t.a2,
// lead(t.a1) and t.a2 in partition-by all can adopt encoded columns.
ColumnRefSet outerColumnSet = new ColumnRefSet();
outerColumnSet.union(context.outputStringColumns);
outerColumnSet.intersect(info.inputStringColumns);
outerColumnSet.except(info.decodeStringColumns);
outerColumnSet.except(disableColumns);
info.outputStringColumns.union(outerColumnSet);
return info;
}
@Override
public DecodeInfo visitPhysicalHashAggregate(OptExpression optExpression, DecodeInfo context) {
if (context.outputStringColumns.isEmpty()) {

View File

@ -42,6 +42,7 @@ import java.util.Optional;
import java.util.Set;
import static com.starrocks.sql.optimizer.rule.tree.lowcardinality.DecodeCollector.LOW_CARD_ARRAY_FUNCTIONS;
import static com.starrocks.sql.optimizer.rule.tree.lowcardinality.DecodeCollector.LOW_CARD_WINDOW_FUNCTIONS;
/*
* DecodeContext is used to store the information needed for decoding
@ -338,6 +339,9 @@ class DecodeContext {
// return type don't update
return new CallOperator(call.getFnName(), call.getType(), newChildren, fn,
call.isDistinct(), call.isRemovedDistinct());
} else if (LOW_CARD_WINDOW_FUNCTIONS.contains(call.getFnName())) {
return new CallOperator(call.getFnName(), fn.getReturnType(), newChildren, fn,
call.isDistinct(), call.isRemovedDistinct());
}
}

View File

@ -14,6 +14,7 @@
package com.starrocks.sql.optimizer.rule.tree.lowcardinality;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
@ -47,6 +48,7 @@ import com.starrocks.sql.optimizer.operator.physical.PhysicalOperator;
import com.starrocks.sql.optimizer.operator.physical.PhysicalScanOperator;
import com.starrocks.sql.optimizer.operator.physical.PhysicalTableFunctionOperator;
import com.starrocks.sql.optimizer.operator.physical.PhysicalTopNOperator;
import com.starrocks.sql.optimizer.operator.physical.PhysicalWindowOperator;
import com.starrocks.sql.optimizer.operator.scalar.CallOperator;
import com.starrocks.sql.optimizer.operator.scalar.ColumnRefOperator;
import com.starrocks.sql.optimizer.operator.scalar.ScalarOperator;
@ -205,6 +207,67 @@ public class DecodeRewriter extends OptExpressionVisitor<OptExpression, ColumnRe
return rewriteOptExpression(optExpression, op, info.outputStringColumns);
}
@Override
public OptExpression visitPhysicalAnalytic(OptExpression optExpression, ColumnRefSet fragmentUseDictExprs) {
PhysicalWindowOperator windowOp = optExpression.getOp().cast();
DecodeInfo info = context.operatorDecodeInfo.getOrDefault(windowOp, DecodeInfo.EMPTY);
ColumnRefSet inputStringRefs = new ColumnRefSet();
inputStringRefs.union(info.inputStringColumns);
List<Ordering> orderByList = windowOp.getOrderByElements().stream()
.map(ord -> {
ColumnRefOperator c = ord.getColumnRef();
ColumnRefOperator newColRef =
inputStringRefs.contains(c) ? context.stringRefToDictRefMap.getOrDefault(c, c) : c;
return new Ordering(newColRef, ord.isAscending(), ord.isNullsFirst());
})
.collect(Collectors.toList());
List<ScalarOperator> partitions = windowOp.getPartitionExpressions().stream()
.map(p -> {
Preconditions.checkArgument(p.isColumnRef());
ColumnRefOperator c = p.cast();
return inputStringRefs.contains(c) ? context.stringRefToDictRefMap.getOrDefault(c, c) : c;
})
.collect(Collectors.toList());
Map<ColumnRefOperator, CallOperator> analyticFunctions = Maps.newLinkedHashMap();
for (ColumnRefOperator analyticRef : windowOp.getAnalyticCall().keySet()) {
CallOperator analyticFn = windowOp.getAnalyticCall().get(analyticRef);
if (!context.stringExprToDictExprMap.containsKey(analyticFn)) {
analyticFunctions.put(analyticRef, analyticFn);
continue;
}
// propagate low-cardinality encoded columns
if (analyticFn.getType().isStringType() || analyticFn.getType().isStringArrayType()) {
ColumnRefOperator newAnalyticRef = context.stringRefToDictRefMap.getOrDefault(analyticRef, analyticRef);
analyticFunctions.put(newAnalyticRef, context.stringExprToDictExprMap.get(analyticFn).cast());
inputStringRefs.union(analyticRef.getId());
} else {
// for count and count(distinct), which return neither non-string types nor non-string-array types/
// not propagate low-cardinality encoded columns, however function evaluation adopt encoded columns.
analyticFunctions.put(analyticRef, context.stringExprToDictExprMap.get(analyticFn).cast());
}
}
ScalarOperator predicate = rewritePredicate(windowOp.getPredicate(), inputStringRefs);
Projection projection = rewriteProjection(windowOp.getProjection(), inputStringRefs);
PhysicalWindowOperator op = new PhysicalWindowOperator(
analyticFunctions,
partitions,
orderByList,
windowOp.getAnalyticWindow(),
windowOp.getEnforceOrderBy(),
windowOp.isUseHashBasedPartition(),
windowOp.isSkewed(),
windowOp.isInputIsBinary(),
windowOp.getLimit(),
predicate,
projection);
return rewriteOptExpression(optExpression, op, info.outputStringColumns);
}
@Override
public OptExpression visitPhysicalDistribution(OptExpression optExpression, ColumnRefSet fragmentUseDictExprs) {
PhysicalDistributionOperator exchange = optExpression.getOp().cast();

View File

@ -15,6 +15,7 @@
package com.starrocks.sql.plan;
import com.google.common.collect.Lists;
import com.starrocks.catalog.FunctionSet;
import com.starrocks.common.FeConstants;
import com.starrocks.planner.TableFunctionNode;
import com.starrocks.utframe.StarRocksAssert;
@ -24,6 +25,8 @@ import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
public class LowCardinalityArrayTest extends PlanTestBase {
@ -828,4 +831,235 @@ public class LowCardinalityArrayTest extends PlanTestBase {
TableFunctionNode tableFunctionNode = tfNodes.get(0);
Assertions.assertTrue(tableFunctionNode.getTableFunction().isLeftJoin());
}
@Test
public void testWindowFunOptimizationInWindowAboveUnnest() throws Exception {
String sql = "with cte as(\n" +
" select s1.v1, s1.v2, t.a1 a1\n" +
" from s1,unnest(s1.a1)t(a1) \n" +
")\n" +
"select v1, v2, lead(a1) over(partition by v1 order by v2)\n" +
"from cte;";
String plan = getVerboseExplain(sql);
Assertions.assertTrue(plan.contains(" 6:Decode\n" +
" | <dict id 9> : <string id 6>\n" +
" | cardinality: 1\n" +
" | \n" +
" 5:Project\n" +
" | output columns:\n" +
" | 1 <-> [1: v1, BIGINT, true]\n" +
" | 2 <-> [2: v2, INT, true]\n" +
" | 9 <-> [9: lead(5: a1, 1, null), INT, true]\n" +
" | cardinality: 1\n" +
" | \n" +
" 4:ANALYTIC\n" +
" | functions: [, lead[([8: a1, INT, true], 1, NULL); " +
"args: INT; result: INT; args nullable: true; result nullable: true], ]\n" +
" | partition by: [1: v1, BIGINT, true]\n" +
" | order by: [2: v2, INT, true] ASC\n" +
" | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING\n" +
" | cardinality: 1"), plan);
}
@Test
public void testOrderByClauseOptimizationInWindowAboveUnnest() throws Exception {
String sql = "with cte as(\n" +
" select s1.v1, s1.v2, t.a1 a1\n" +
" from s1,unnest(s1.a1)t(a1) \n" +
")\n" +
"select v1, a1, lead(v2) over(partition by v1 order by a1)\n" +
"from cte;";
String plan = getVerboseExplain(sql);
Assertions.assertTrue(plan.contains(" 6:Decode\n" +
" | <dict id 8> : <string id 5>\n" +
" | cardinality: 1\n" +
" | \n" +
" 5:Project\n" +
" | output columns:\n" +
" | 1 <-> [1: v1, BIGINT, true]\n" +
" | 6 <-> [6: lead(2: v2, 1, null), INT, true]\n" +
" | 8 <-> [8: a1, INT, true]\n" +
" | cardinality: 1\n" +
" | \n" +
" 4:ANALYTIC\n" +
" | functions: [, lead[([2: v2, INT, true], 1, NULL); " +
"args: INT; result: INT; args nullable: true; result nullable: true], ]\n" +
" | partition by: [1: v1, BIGINT, true]\n" +
" | order by: [8: a1, INT, true] ASC\n" +
" | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING\n" +
" | cardinality: 1"), plan);
}
@Test
public void testSupportedWindowFunctions() throws Exception {
String sqlFmt = "with cte as(\n" +
" select s1.v1, s1.v2, t.a1 a1\n" +
" from s1,unnest(s1.a1)t(a1) \n" +
")\n" +
"select v1, a1, {WINDOW_FUN}(a1) over(partition by v1 order by a1)\n" +
"from cte;";
String[] windowFuncs = new String[] {
FunctionSet.LEAD,
FunctionSet.LAG,
FunctionSet.FIRST_VALUE,
FunctionSet.LAST_VALUE,
FunctionSet.MAX,
FunctionSet.MIN,
FunctionSet.COUNT};
for (String wf : windowFuncs) {
String q = sqlFmt.replace("{WINDOW_FUN}", wf);
String plan = getVerboseExplain(q);
String[] lines = plan.split("\n");
List<Integer> decodeNodeLines = IntStream.range(0, lines.length).boxed()
.filter(lineno -> lines[lineno]
.matches("^\\s*\\d+:Decode\\s*$"))
.collect(Collectors.toList());
List<Integer> analyticNodeLines = IntStream.range(0, lines.length).boxed()
.filter(lineno -> lines[lineno]
.matches("^\\s*\\d+:ANALYTIC\\s*$"))
.collect(Collectors.toList());
Assertions.assertEquals(1, decodeNodeLines.size(), plan);
Assertions.assertEquals(1, analyticNodeLines.size(), plan);
Assertions.assertTrue(decodeNodeLines.get(0) < analyticNodeLines.get(0), plan);
}
}
@Test
public void testNotSupportedWindowFunctions() throws Exception {
String sqlFmt = "with cte as(\n" +
" select s1.v1, s1.v2, t.a1 a1\n" +
" from s1,unnest(s1.a1)t(a1) \n" +
")\n" +
"select v1, a1, {WINDOW_FUN}(a1) over(partition by v1)\n" +
"from cte;";
String[] windowFuncs = new String[] {
FunctionSet.SUM,
FunctionSet.AVG,
};
for (String wf : windowFuncs) {
String q = sqlFmt.replace("{WINDOW_FUN}", wf);
String plan = getVerboseExplain(q);
String[] lines = plan.split("\n");
List<Integer> decodeNodeLines = IntStream.range(0, lines.length).boxed()
.filter(lineno -> lines[lineno]
.matches("^\\s*\\d+:Decode\\s*$"))
.collect(Collectors.toList());
List<Integer> analyticNodeLines = IntStream.range(0, lines.length).boxed()
.filter(lineno -> lines[lineno]
.matches("^\\s*\\d+:ANALYTIC\\s*$"))
.collect(Collectors.toList());
Assertions.assertEquals(1, decodeNodeLines.size(), plan);
Assertions.assertEquals(1, analyticNodeLines.size(), plan);
Assertions.assertTrue(decodeNodeLines.get(0) > analyticNodeLines.get(0), plan);
}
}
@Test
public void testdWindowFunctionCount() throws Exception {
String sql1 = "with cte as(\n" +
" select s1.v1, s1.v2, t.a1 a1\n" +
" from s1,unnest(s1.a1)t(a1) \n" +
")\n" +
"select v1, count(a1) over (partition by v1)\n" +
"from cte;";
String sql2 = "with cte as(\n" +
" select s1.v1, s1.v2, t.a1 a1\n" +
" from s1,unnest(s1.a1)t(a1) \n" +
")\n" +
"select v1, count(a1) over (partition by v1)\n" +
"from cte;";
String plan1 = getVerboseExplain(sql1);
Assertions.assertTrue(plan1.contains(" 5:Project\n" +
" | output columns:\n" +
" | 1 <-> [1: v1, BIGINT, true]\n" +
" | 6 <-> [6: count(5: a1), BIGINT, false]\n" +
" | cardinality: 1\n" +
" | \n" +
" 4:ANALYTIC\n" +
" | functions: [, count[([8: a1, INT, true]); args: INT; result: BIGINT;" +
" args nullable: true; result nullable: false], ]\n" +
" | partition by: [1: v1, BIGINT, true]\n" +
" | cardinality: 1"), plan1);
String plan2 = getVerboseExplain(sql2);
Assertions.assertTrue(plan2.contains(" 5:Project\n" +
" | output columns:\n" +
" | 1 <-> [1: v1, BIGINT, true]\n" +
" | 6 <-> [6: count(5: a1), BIGINT, false]\n" +
" | cardinality: 1\n" +
" | \n" +
" 4:ANALYTIC\n" +
" | functions: [, count[([8: a1, INT, true]); args: INT; result: BIGINT;" +
" args nullable: true; result nullable: false], ]\n" +
" | partition by: [1: v1, BIGINT, true]\n" +
" | cardinality: 1"), plan2);
}
@Test
public void testPartitionByClauseOptimizationInWindowAboveUnnest() throws Exception {
String sql = "with cte as(\n" +
" select s1.v1, s1.v2, t.a1 a1\n" +
" from s1,unnest(s1.a1)t(a1) \n" +
")\n" +
"select v1, a1, lead(v2) over(partition by a1 order by v1)\n" +
"from cte;";
String plan = getVerboseExplain(sql);
Assertions.assertTrue(plan.contains(" 6:Decode\n" +
" | <dict id 8> : <string id 5>\n" +
" | cardinality: 1\n" +
" | \n" +
" 5:Project\n" +
" | output columns:\n" +
" | 1 <-> [1: v1, BIGINT, true]\n" +
" | 6 <-> [6: lead(2: v2, 1, null), INT, true]\n" +
" | 8 <-> [8: a1, INT, true]\n" +
" | cardinality: 1\n" +
" | \n" +
" 4:ANALYTIC\n" +
" | functions: [, lead[([2: v2, INT, true], 1, NULL); args: INT; result: INT; " +
"args nullable: true; result nullable: true], ]\n" +
" | partition by: [8: a1, INT, true]\n" +
" | order by: [1: v1, BIGINT, true] ASC\n" +
" | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING\n" +
" | cardinality: 1"), plan);
}
@Test
public void testOptimizationInWindowAboveUnnest() throws Exception {
String sql = "with cte as(\n" +
" select s1.v1, s1.v2, t.a1 a1, t.a2 a2\n" +
" from s1,unnest(s1.a1, s1.a2)t(a1,a2) \n" +
")\n" +
"select v1, v2, a1, a2, lead(a1) over(partition by a2 order by substr(a1, 1, 3))\n" +
"from cte;";
String plan = getVerboseExplain(sql);
Assertions.assertTrue(plan.contains(" 7:Decode\n" +
" | <dict id 17> : <string id 9>\n" +
" | <dict id 19> : <string id 12>\n" +
" | <dict id 16> : <string id 6>\n" +
" | cardinality: 1\n" +
" | \n" +
" 6:Project\n" +
" | output columns:\n" +
" | 7 <-> [7: v1, BIGINT, true]\n" +
" | 8 <-> [8: v2, INT, true]\n" +
" | 16 <-> [16: a2, INT, true]\n" +
" | 17 <-> [17: a1, INT, true]\n" +
" | 19 <-> [19: lead(9: a1, 1, null), INT, true]\n" +
" | cardinality: 1\n" +
" | \n" +
" 5:ANALYTIC\n" +
" | functions: [, lead[([17: a1, INT, true], 1, NULL); args: INT; result: INT; " +
"args nullable: true; result nullable: true], ]\n" +
" | partition by: [16: a2, INT, true]\n" +
" | order by: [18: substr, INT, true] ASC\n" +
" | window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING\n" +
" | cardinality: 1"), plan);
}
}

View File

@ -1270,119 +1270,106 @@ public class LowCardinalityTest2 extends PlanTestBase {
"select row_number() over( partition by L_COMMENT order by L_PARTKEY) as rm from lineitem" +
") t where rm < 10";
plan = getCostExplain(sql);
assertContains(plan, " 4:ANALYTIC\n" +
" | functions: [, row_number[(); args: ; result: BIGINT; args nullable: false; result nullable: true], ]\n" +
" | partition by: [16: L_COMMENT, VARCHAR(44), false]\n" +
" | order by: [2: L_PARTKEY, INT, false] ASC");
assertContains(plan, " 3:Decode\n" +
assertContains(plan, " 4:Decode\n" +
" | <dict id 20> : <string id 16>");
assertContains(plan, " 2:SORT\n" +
" | order by: [20, INT, false] ASC, [2, INT, false] ASC\n" +
" | analytic partition by: [20, INT, false]");
assertContains(plan, " 1:PARTITION-TOP-N\n" +
" | partition by: [20: L_COMMENT, INT, false] ");
assertContains(plan, " | order by: [20, INT, false] ASC, [2, INT, false] ASC");
assertContains(plan, " 3:ANALYTIC\n" +
" | functions: [, row_number[(); args: ; result: BIGINT; " +
"args nullable: false; result nullable: true], ]\n" +
" | partition by: [20: L_COMMENT, INT, false]\n" +
" | order by: [2: L_PARTKEY, INT, false] ASC\n" +
" | window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW");
// row number
sql = "select * from (select L_COMMENT,l_quantity, row_number() over " +
"(partition by L_COMMENT order by l_quantity desc) rn from lineitem )t where rn <= 10;";
plan = getCostExplain(sql);
assertContains(plan, " 4:ANALYTIC\n" +
" | functions: [, row_number[(); args: ; result: BIGINT; args nullable: false; result nullable: true], ]\n" +
" | partition by: [16: L_COMMENT, VARCHAR(44), false]\n" +
" | order by: [5: L_QUANTITY, DOUBLE, false] DESC");
assertContains(plan, " 3:Decode\n" +
assertContains(plan, " 4:Decode\n" +
" | <dict id 19> : <string id 16>");
assertContains(plan, " 2:SORT\n" +
" | order by: [19, INT, false] ASC, [5, DOUBLE, false] DESC\n" +
" | analytic partition by: [19, INT, false]");
assertContains(plan, " 1:PARTITION-TOP-N\n" +
" | partition by: [19: L_COMMENT, INT, false] \n" +
" | partition limit: 10\n" +
" | order by: [19, INT, false] ASC, [5, DOUBLE, false] DESC\n" +
" | offset: 0");
assertContains(plan, " 3:ANALYTIC\n" +
" | functions: [, row_number[(); args: ; result: BIGINT; " +
"args nullable: false; result nullable: true], ]\n" +
" | partition by: [19: L_COMMENT, INT, false]\n" +
" | order by: [5: L_QUANTITY, DOUBLE, false] DESC\n" +
" | window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW");
// rank
sql = "select * from (select L_COMMENT,l_quantity, rank() over " +
"(partition by L_COMMENT order by l_quantity desc) rn from lineitem )t where rn <= 10;";
plan = getCostExplain(sql);
assertContains(plan, " 4:ANALYTIC\n" +
" | functions: [, rank[(); args: ; result: BIGINT; args nullable: false; result nullable: true], ]\n" +
" | partition by: [16: L_COMMENT, VARCHAR(44), false]\n" +
" | order by: [5: L_QUANTITY, DOUBLE, false] DESC");
assertContains(plan, " 3:Decode\n" +
assertContains(plan, " 4:Decode\n" +
" | <dict id 19> : <string id 16>");
assertContains(plan, " 2:SORT\n" +
" | order by: [19, INT, false] ASC, [5, DOUBLE, false] DESC\n" +
" | analytic partition by: [19, INT, false]");
assertContains(plan, " 1:PARTITION-TOP-N\n" +
" | type: RANK\n" +
" | partition by: [19: L_COMMENT, INT, false] \n" +
" | partition limit: 10\n" +
" | order by: [19, INT, false] ASC, [5, DOUBLE, false] DESC");
assertContains(plan, " 3:ANALYTIC\n" +
" | functions: [, rank[(); args: ; result: BIGINT; " +
"args nullable: false; result nullable: true], ]\n" +
" | partition by: [19: L_COMMENT, INT, false]\n" +
" | order by: [5: L_QUANTITY, DOUBLE, false] DESC\n" +
" | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW");
// mul-column partition by
sql = "select * from (select L_COMMENT,l_quantity, rank() over " +
"(partition by L_COMMENT, l_shipmode order by l_quantity desc) rn from lineitem )t where rn <= 10;";
plan = getCostExplain(sql);
assertContains(plan, " 4:ANALYTIC\n" +
" | functions: [, rank[(); args: ; result: BIGINT; args nullable: false; result nullable: true], ]\n" +
" | partition by: [16: L_COMMENT, VARCHAR(44), false], [15: L_SHIPMODE, VARCHAR, false]\n" +
" | order by: [5: L_QUANTITY, DOUBLE, false] DESC");
assertContains(plan, " 3:Decode\n" +
assertContains(plan, " 4:Decode\n" +
" | <dict id 19> : <string id 16>");
assertContains(plan, " 2:SORT\n" +
" | order by: [19, INT, false] ASC, [15, VARCHAR, false] ASC, [5, DOUBLE, false] DESC\n" +
" | analytic partition by: [19, INT, false], [15: L_SHIPMODE, VARCHAR, false]");
assertContains(plan, " 1:PARTITION-TOP-N\n" +
" | type: RANK\n" +
" | partition by: [19: L_COMMENT, INT, false] , [15: L_SHIPMODE, CHAR, false] \n" +
" | partition limit: 10\n" +
" | order by: [19, INT, false] ASC, [15, VARCHAR, false] ASC, [5, DOUBLE, false] DESC\n" +
" | offset: 0");
assertContains(plan, " 3:ANALYTIC\n" +
" | functions: [, rank[(); args: ; result: BIGINT; " +
"args nullable: false; result nullable: true], ]\n" +
" | partition by: [19: L_COMMENT, INT, false], [15: L_SHIPMODE, VARCHAR, false]\n" +
" | order by: [5: L_QUANTITY, DOUBLE, false] DESC\n" +
" | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW");
// partition column with expr
sql = "SELECT S_ADDRESS, MAX(S_SUPPKEY) over(partition by S_COMMENT order by S_NAME) FROM supplier_nullable";
plan = getCostExplain(sql);
assertContains(plan, " 3:ANALYTIC\n" +
" | functions: [, max[([1: S_SUPPKEY, INT, false]); args: INT; result: INT; " +
"args nullable: false; result nullable: true], ]\n" +
" | partition by: [7: S_COMMENT, VARCHAR(101), false]\n" +
" | order by: [2: S_NAME, VARCHAR, false] ASC");
assertContains(plan, " 2:Decode\n" +
" | <dict id 10> : <string id 3>\n" +
" | <dict id 11> : <string id 7>");
assertContains(plan, " 1:SORT\n" +
" | order by: [11, INT, false] ASC, [2, VARCHAR, false] ASC\n" +
" | analytic partition by: [11, INT, false]");
assertContains(plan, " 4:Decode\n" +
" | <dict id 10> : <string id 3>");
assertContains(plan, " 3:Project\n" +
" | output columns:\n" +
" | 9 <-> [9: max(1: S_SUPPKEY), INT, true]\n" +
" | 10 <-> [10: S_ADDRESS, INT, true]");
assertContains(plan, " 2:ANALYTIC\n" +
" | functions: [, max[([1: S_SUPPKEY, INT, false]); " +
"args: INT; result: INT; args nullable: false; result nullable: true], ]\n" +
" | partition by: [11: S_COMMENT, INT, false]\n" +
" | order by: [2: S_NAME, VARCHAR, false] ASC\n" +
" | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW");
sql = "SELECT S_ADDRESS, MAX(S_SUPPKEY) over(partition by concat(S_COMMENT, 'a') order by S_NAME) FROM supplier_nullable";
plan = getCostExplain(sql);
assertContains(plan, " 4:ANALYTIC\n" +
" | functions: [, max[([9: S_SUPPKEY, INT, false]); args: INT; result: INT; " +
"args nullable: false; result nullable: true], ]\n" +
" | partition by: [17: concat, VARCHAR, true]\n" +
" | order by: [2: S_NAME, VARCHAR, false] ASC");
assertContains(plan, " 3:Decode\n" +
" | <dict id 20> : <string id 17>");
assertContains(plan, " 3:ANALYTIC\n" +
" | functions: [, max[([9: S_SUPPKEY, INT, false]); " +
"args: INT; result: INT; args nullable: false; result nullable: true], ]\n" +
" | partition by: [20: concat, INT, true]\n" +
" | order by: [2: S_NAME, VARCHAR, false] ASC\n" +
" | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW");
assertContains(plan, " 2:SORT\n" +
" | order by: [20, INT, true] ASC, [2, VARCHAR, false] ASC\n" +
" | analytic partition by: [20, INT, true]");
" | analytic partition by: [20: concat, INT, true]");
assertContains(plan, " 1:Project\n" +
" | output columns:\n" +
" | 2 <-> [2: S_NAME, CHAR, false]\n" +
" | 9 <-> [1: S_SUPPKEY, INT, false]\n" +
" | 11 <-> [3: S_ADDRESS, VARCHAR, true]\n" +
" | 20 <-> DictDefine([19: S_COMMENT, INT, false], [concat[(<place-holder>, 'a'); " +
"args: VARCHAR; result: VARCHAR; args nullable: false; result nullable: true]])\n");
// partition column is not dict column
sql = "SELECT S_ADDRESS, MAX(S_SUPPKEY) over(partition by S_NAME order by S_COMMENT) FROM supplier_nullable";
plan = getCostExplain(sql);
assertContains(plan, " 3:ANALYTIC\n" +
" | functions: [, max[([1: S_SUPPKEY, INT, false]); args: INT; result: INT; " +
"args nullable: false; result nullable: true], ]\n" +
assertContains(plan, " 4:Decode\n" +
" | <dict id 10> : <string id 3>");
assertContains(plan, " 3:Project\n" +
" | output columns:\n" +
" | 9 <-> [9: max(1: S_SUPPKEY), INT, true]\n" +
" | 10 <-> [10: S_ADDRESS, INT, true]");
assertContains(plan, " 2:ANALYTIC\n" +
" | functions: [, max[([1: S_SUPPKEY, INT, false]); " +
"args: INT; result: INT; args nullable: false; result nullable: true], ]\n" +
" | partition by: [2: S_NAME, VARCHAR, false]\n" +
" | order by: [7: S_COMMENT, VARCHAR(101), false] ASC");
assertContains(plan, " 2:Decode\n" +
" | <dict id 10> : <string id 3>\n" +
" | <dict id 11> : <string id 7>");
assertContains(plan, " 1:SORT\n" +
" | order by: [2, VARCHAR, false] ASC, [11, INT, false] ASC\n" +
" | analytic partition by: [2: S_NAME, VARCHAR, false]");
" | order by: [11: S_COMMENT, INT, false] ASC\n" +
" | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW");
// there is not DecodeNode
sql = "SELECT /*+SET_VAR(cbo_enable_low_cardinality_optimize=false)*/" +
@ -1536,14 +1523,12 @@ public class LowCardinalityTest2 extends PlanTestBase {
// window function with full order by
sql = "select rank() over (order by S_ADDRESS) as rk from supplier_nullable";
plan = getFragmentPlan(sql);
assertContains(plan, " 4:ANALYTIC\n" +
" | functions: [, rank(), ]\n" +
" | order by: 3: S_ADDRESS ASC\n" +
plan = getVerboseExplain(sql);
assertContains(plan, " 3:ANALYTIC\n" +
" | functions: [, rank[(); args: ; result: BIGINT; args nullable: false; result nullable: true], ]\n" +
" | order by: [10: S_ADDRESS, INT, true] ASC\n" +
" | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\n" +
" | \n" +
" 3:Decode\n" +
" | <dict id 10> : <string id 3>");
" | cardinality: 1");
// Decode node under sort node
sql = "select S_ADDRESS, S_COMMENT from (select S_ADDRESS, " +
@ -2170,16 +2155,15 @@ public class LowCardinalityTest2 extends PlanTestBase {
"FROM supplier " +
"ORDER BY S_ADDRESS, S_COMMENT;\n";
String plan = getFragmentPlan(sql);
assertContains(plan, " 3:Decode\n" +
String plan = getVerboseExplain(sql);
assertContains(plan, " 6:Decode\n" +
" | <dict id 10> : <string id 3>\n" +
" | <dict id 11> : <string id 7>");
assertContains(plan, " 5:SORT\n" +
" | order by: <slot 3> 3: S_ADDRESS ASC, <slot 7> 7: S_COMMENT ASC\n" +
" | offset: 0");
assertContains(plan, " 1:SORT\n" +
" | order by: <slot 10> 10: S_ADDRESS ASC, <slot 11> 11: S_COMMENT ASC\n" +
" | offset: 0");
assertContains(plan, " 3:ANALYTIC\n" +
" | functions: [, first_value[([5: S_PHONE, VARCHAR, false]); " +
"args: VARCHAR; result: VARCHAR; args nullable: false; result nullable: true], ]\n" +
" | order by: [10: S_ADDRESS, INT, false] ASC, [11: S_COMMENT, INT, false] ASC\n" +
" | window: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING");
}
@Test
@ -2343,21 +2327,19 @@ public class LowCardinalityTest2 extends PlanTestBase {
" ) t\n" +
"WHERE\n" +
" t.row_num = 1;";
String plan = getFragmentPlan(sql);
assertContains(plan, " 3:Decode\n" +
String plan = getVerboseExplain(sql);
assertContains(plan, " 7:Decode\n" +
" | <dict id 12> : <string id 3>\n" +
" | <dict id 13> : <string id 11>\n" +
" | <dict id 13> : <string id 9>\n" +
" | <dict id 14> : <string id 11>\n" +
" | cardinality: 1\n" +
" | \n" +
" 2:SORT\n" +
" | order by: <slot 12> 12: S_ADDRESS ASC\n" +
" | analytic partition by: <slot 12> 12: S_ADDRESS\n" +
" | offset: 0\n" +
" | \n" +
" 1:PARTITION-TOP-N\n" +
" | partition by: 12: S_ADDRESS \n" +
" | partition limit: 1\n" +
" | order by: <slot 12> 12: S_ADDRESS ASC\n" +
" | pre agg functions: [, min(12: S_ADDRESS), ]\n" +
" | offset: 0");
" 6:ANALYTIC\n" +
" | functions: [, row_number[(); args: ; result: BIGINT; " +
"args nullable: false; result nullable: true], ]\n" +
" | partition by: [12: S_ADDRESS, INT, false]\n" +
" | order by: [12: S_ADDRESS, INT, false] ASC\n" +
" | window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\n" +
" | cardinality: 1");
}
}

View File

@ -0,0 +1,157 @@
-- name: test_low_cardinality_opt_window_upon_table_function
create table t(v1 bigint, v2 bigint, a1 array<string>, a2 array<string>) properties('replication_num'='1');
-- result:
-- !result
insert into t
select i%10 as v1, i as v2, [concat('foo_', i%250), concat('foo_',(i+1)%250), concat('foo_',(i+2)%250)] as a1,
[concat('bar_', (i+10)%250),concat('bar_', (i+11)%250),concat('bar_', (i+12)%250)] as a1
from table(generate_series(1, 10000)) t(i);
-- result:
-- !result
[UC] analyze full table t;
-- result:
test_db_22e1ef1e12b0451e94d13215dc0d8b94.t analyze status OK
-- !result
function: wait_global_dict_ready('a1', 't')
-- result:
-- !result
function: wait_global_dict_ready('a2', 't')
-- result:
-- !result
create table result(fingerprint bigint)properties('replication_num'='1');
-- result:
-- !result
truncate table result;
-- result:
-- !result
insert into result with cte0 as (
select t.v1, t.v2, t.a1, t.a2,tmp.e1, tmp.e2
from t, unnest(t.a1, t.a2) tmp(e1,e2)
),
cte1 as(
select v1, v2, lead(e1) over(partition by v1 order by v2, e1) as r
from cte0
)
select /*+SET_VAR(low_cardinality_optimize_v2=true, cbo_enable_low_cardinality_optimize=true,array_low_cardinality_optimize=true)*/
sum(murmur_hash3_32(v1, v2, coalesce(r, ""))) as fingerprint
from cte1;
-- result:
-- !result
insert into result with cte0 as (
select t.v1, t.v2, t.a1, t.a2,tmp.e1, tmp.e2
from t, unnest(t.a1, t.a2) tmp(e1,e2)
),
cte1 as(
select v1, v2, lead(e1) over(partition by v1 order by v2, e1) as r
from cte0
)
select /*+SET_VAR(low_cardinality_optimize_v2=false, cbo_enable_low_cardinality_optimize=false,array_low_cardinality_optimize=false)*/
sum(murmur_hash3_32(v1, v2, coalesce(r, ""))) as fingerprint
from cte1;
-- result:
-- !result
select assert_true(count(1)=2 and count(distinct fingerprint)=1) from result;
-- result:
1
-- !result
truncate table result;
-- result:
-- !result
insert into result with cte0 as (
select t.v1, t.v2, t.a1, t.a2,tmp.e1, tmp.e2
from t, unnest(t.a1, t.a2) tmp(e1,e2)
),
cte1 as(
select e1, v2, lead(v1) over(partition by e1 order by v2, e1) as r
from cte0
)
select /*+SET_VAR(low_cardinality_optimize_v2=true, cbo_enable_low_cardinality_optimize=true,array_low_cardinality_optimize=true)*/
sum(murmur_hash3_32(e1, v2, coalesce(r, ""))) as fingerprint
from cte1;
-- result:
-- !result
insert into result with cte0 as (
select t.v1, t.v2, t.a1, t.a2,tmp.e1, tmp.e2
from t, unnest(t.a1, t.a2) tmp(e1,e2)
),
cte1 as(
select e1, v2, lead(v1) over(partition by e1 order by v2, e1) as r
from cte0
)
select /*+SET_VAR(low_cardinality_optimize_v2=false, cbo_enable_low_cardinality_optimize=false,array_low_cardinality_optimize=false)*/
sum(murmur_hash3_32(e1, v2, coalesce(r, ""))) as fingerprint
from cte1;
-- result:
-- !result
select assert_true(count(1)=2 and count(distinct fingerprint)=1) from result;
-- result:
1
-- !result
truncate table result;
-- result:
-- !result
insert into result with cte0 as (
select t.v1, t.v2, t.a1, t.a2,tmp.e1, tmp.e2
from t, unnest(t.a1, t.a2) tmp(e1,e2)
),
cte1 as(
select v1, v2, e2, lag(v1) over(partition by v1 order by v2, e2) as r
from cte0
)
select /*+SET_VAR(low_cardinality_optimize_v2=true, cbo_enable_low_cardinality_optimize=true,array_low_cardinality_optimize=true)*/
sum(murmur_hash3_32(v1, v2, e2, coalesce(r, ""))) as fingerprint
from cte1;
-- result:
-- !result
insert into result with cte0 as (
select t.v1, t.v2, t.a1, t.a2,tmp.e1, tmp.e2
from t, unnest(t.a1, t.a2) tmp(e1,e2)
),
cte1 as(
select v1, v2, e2, lag(v1) over(partition by v1 order by v2, e2) as r
from cte0
)
select /*+SET_VAR(low_cardinality_optimize_v2=false, cbo_enable_low_cardinality_optimize=false,array_low_cardinality_optimize=false)*/
sum(murmur_hash3_32(v1, v2, e2, coalesce(r, ""))) as fingerprint
from cte1;
-- result:
-- !result
select assert_true(count(1)=2 and count(distinct fingerprint)=1) from result;
-- result:
1
-- !result
truncate table result;
-- result:
-- !result
insert into result with cte0 as (
select t.v1, t.v2, t.a1, t.a2,tmp.e1, tmp.e2
from t, unnest(t.a1, t.a2) tmp(e1,e2)
),
cte1 as(
select v1, v2, a1, a2, e1, e2, lead(e1) over(partition by substr(e1,5) order by v2, substr(e2,5)) as r
from cte0
)
select /*+SET_VAR(low_cardinality_optimize_v2=true, cbo_enable_low_cardinality_optimize=true,array_low_cardinality_optimize=true)*/
sum(murmur_hash3_32(v1, v2, array_join(a1,","), array_join(a2,","), e1, e2, coalesce(r, ""))) as fingerprint
from cte1;
-- result:
-- !result
insert into result with cte0 as (
select t.v1, t.v2, t.a1, t.a2,tmp.e1, tmp.e2
from t, unnest(t.a1, t.a2) tmp(e1,e2)
),
cte1 as(
select v1, v2, a1, a2, e1, e2, lead(e1) over(partition by substr(e1,5) order by v2, substr(e2,5)) as r
from cte0
)
select /*+SET_VAR(low_cardinality_optimize_v2=false, cbo_enable_low_cardinality_optimize=false,array_low_cardinality_optimize=false)*/
sum(murmur_hash3_32(v1, v2, array_join(a1,","), array_join(a2,","), e1, e2, coalesce(r, ""))) as fingerprint
from cte1;
-- result:
-- !result
select assert_true(count(1)=2 and count(distinct fingerprint)=1) from result;
-- result:
1
-- !result

View File

@ -0,0 +1,108 @@
-- name: test_low_cardinality_opt_window_upon_table_function
create table t(v1 bigint, v2 bigint, a1 array<string>, a2 array<string>) properties('replication_num'='1');
insert into t
select i%10 as v1, i as v2, [concat('foo_', i%250), concat('foo_',(i+1)%250), concat('foo_',(i+2)%250)] as a1,
[concat('bar_', (i+10)%250),concat('bar_', (i+11)%250),concat('bar_', (i+12)%250)] as a1
from table(generate_series(1, 10000)) t(i);
[UC] analyze full table t;
function: wait_global_dict_ready('a1', 't')
function: wait_global_dict_ready('a2', 't')
create table result(fingerprint bigint)properties('replication_num'='1');
truncate table result;
insert into result with cte0 as (
select t.v1, t.v2, t.a1, t.a2,tmp.e1, tmp.e2
from t, unnest(t.a1, t.a2) tmp(e1,e2)
),
cte1 as(
select v1, v2, lead(e1) over(partition by v1 order by v2, e1) as r
from cte0
)
select /*+SET_VAR(low_cardinality_optimize_v2=true, cbo_enable_low_cardinality_optimize=true,array_low_cardinality_optimize=true)*/
sum(murmur_hash3_32(v1, v2, coalesce(r, ""))) as fingerprint
from cte1;
insert into result with cte0 as (
select t.v1, t.v2, t.a1, t.a2,tmp.e1, tmp.e2
from t, unnest(t.a1, t.a2) tmp(e1,e2)
),
cte1 as(
select v1, v2, lead(e1) over(partition by v1 order by v2, e1) as r
from cte0
)
select /*+SET_VAR(low_cardinality_optimize_v2=false, cbo_enable_low_cardinality_optimize=false,array_low_cardinality_optimize=false)*/
sum(murmur_hash3_32(v1, v2, coalesce(r, ""))) as fingerprint
from cte1;
select assert_true(count(1)=2 and count(distinct fingerprint)=1) from result;
truncate table result;
insert into result with cte0 as (
select t.v1, t.v2, t.a1, t.a2,tmp.e1, tmp.e2
from t, unnest(t.a1, t.a2) tmp(e1,e2)
),
cte1 as(
select e1, v2, lead(v1) over(partition by e1 order by v2, e1) as r
from cte0
)
select /*+SET_VAR(low_cardinality_optimize_v2=true, cbo_enable_low_cardinality_optimize=true,array_low_cardinality_optimize=true)*/
sum(murmur_hash3_32(e1, v2, coalesce(r, ""))) as fingerprint
from cte1;
insert into result with cte0 as (
select t.v1, t.v2, t.a1, t.a2,tmp.e1, tmp.e2
from t, unnest(t.a1, t.a2) tmp(e1,e2)
),
cte1 as(
select e1, v2, lead(v1) over(partition by e1 order by v2, e1) as r
from cte0
)
select /*+SET_VAR(low_cardinality_optimize_v2=false, cbo_enable_low_cardinality_optimize=false,array_low_cardinality_optimize=false)*/
sum(murmur_hash3_32(e1, v2, coalesce(r, ""))) as fingerprint
from cte1;
select assert_true(count(1)=2 and count(distinct fingerprint)=1) from result;
truncate table result;
insert into result with cte0 as (
select t.v1, t.v2, t.a1, t.a2,tmp.e1, tmp.e2
from t, unnest(t.a1, t.a2) tmp(e1,e2)
),
cte1 as(
select v1, v2, e2, lag(v1) over(partition by v1 order by v2, e2) as r
from cte0
)
select /*+SET_VAR(low_cardinality_optimize_v2=true, cbo_enable_low_cardinality_optimize=true,array_low_cardinality_optimize=true)*/
sum(murmur_hash3_32(v1, v2, e2, coalesce(r, ""))) as fingerprint
from cte1;
insert into result with cte0 as (
select t.v1, t.v2, t.a1, t.a2,tmp.e1, tmp.e2
from t, unnest(t.a1, t.a2) tmp(e1,e2)
),
cte1 as(
select v1, v2, e2, lag(v1) over(partition by v1 order by v2, e2) as r
from cte0
)
select /*+SET_VAR(low_cardinality_optimize_v2=false, cbo_enable_low_cardinality_optimize=false,array_low_cardinality_optimize=false)*/
sum(murmur_hash3_32(v1, v2, e2, coalesce(r, ""))) as fingerprint
from cte1;
select assert_true(count(1)=2 and count(distinct fingerprint)=1) from result;
truncate table result;
insert into result with cte0 as (
select t.v1, t.v2, t.a1, t.a2,tmp.e1, tmp.e2
from t, unnest(t.a1, t.a2) tmp(e1,e2)
),
cte1 as(
select v1, v2, a1, a2, e1, e2, lead(e1) over(partition by substr(e1,5) order by v2, substr(e2,5)) as r
from cte0
)
select /*+SET_VAR(low_cardinality_optimize_v2=true, cbo_enable_low_cardinality_optimize=true,array_low_cardinality_optimize=true)*/
sum(murmur_hash3_32(v1, v2, array_join(a1,","), array_join(a2,","), e1, e2, coalesce(r, ""))) as fingerprint
from cte1;
insert into result with cte0 as (
select t.v1, t.v2, t.a1, t.a2,tmp.e1, tmp.e2
from t, unnest(t.a1, t.a2) tmp(e1,e2)
),
cte1 as(
select v1, v2, a1, a2, e1, e2, lead(e1) over(partition by substr(e1,5) order by v2, substr(e2,5)) as r
from cte0
)
select /*+SET_VAR(low_cardinality_optimize_v2=false, cbo_enable_low_cardinality_optimize=false,array_low_cardinality_optimize=false)*/
sum(murmur_hash3_32(v1, v2, array_join(a1,","), array_join(a2,","), e1, e2, coalesce(r, ""))) as fingerprint
from cte1;
select assert_true(count(1)=2 and count(distinct fingerprint)=1) from result;