[Enhancement] Extend MinMaxStats optimization to support DictMappingExpr (backport #62212) (#62316)

Co-authored-by: Murphy <96611012+murphyatwork@users.noreply.github.com>
This commit is contained in:
mergify[bot] 2025-08-26 05:58:15 +00:00 committed by GitHub
parent afcbb584cd
commit 3b1e377c6e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 33 additions and 3 deletions

View File

@ -179,6 +179,7 @@ public class AggregationNode extends PlanNode implements RuntimeFilterBuildNode
this.groupByMinMaxStats = groupByMinMaxStats;
}
@Override
public void disablePhysicalPropertyOptimize() {
setUseSortAgg(false);
setUsePerBucketOptimize(false);

View File

@ -197,11 +197,13 @@ public class Utils {
return list;
}
/**
* Extract all operators satisfied the predicate
*/
public static <E extends Operator> void extractOperator(OptExpression root, List<E> list,
Predicate<Operator> lambda) {
if (lambda.test(root.getOp())) {
list.add((E) root.getOp());
return;
}
List<OptExpression> inputs = root.getInputs();
@ -210,11 +212,13 @@ public class Utils {
}
}
/**
* Extract all operators satisfied the predicate
*/
private static <E extends Operator> void extractOperator(GroupExpression root, List<E> list,
Predicate<Operator> lambda) {
if (lambda.test(root.getOp())) {
list.add((E) root.getOp());
return;
}
List<Group> groups = root.getInputs();

View File

@ -29,6 +29,7 @@ import com.starrocks.sql.optimizer.operator.physical.PhysicalHashAggregateOperat
import com.starrocks.sql.optimizer.operator.physical.PhysicalOlapScanOperator;
import com.starrocks.sql.optimizer.operator.scalar.ColumnRefOperator;
import com.starrocks.sql.optimizer.operator.scalar.ConstantOperator;
import com.starrocks.sql.optimizer.operator.scalar.DictMappingOperator;
import com.starrocks.sql.optimizer.statistics.ColumnDict;
import com.starrocks.sql.optimizer.statistics.IMinMaxStatsMgr;
import com.starrocks.sql.optimizer.statistics.StatsVersion;
@ -73,6 +74,23 @@ public class ApplyMinMaxStatisticRule implements TreeRewriteRule {
if (table.inputHasTempPartition(scanOperator.getSelectedPartitionId())) {
continue;
}
if (scanOperator.getProjection() != null) {
for (var entry : scanOperator.getProjection().getColumnRefMap().entrySet()) {
if (groupByRefSets.contains(entry.getKey()) &&
entry.getValue() instanceof DictMappingOperator mappingOperator) {
ColumnRefOperator column = mappingOperator.getDictColumn();
if (!column.getType().isNumericType() && !column.getType().isDate()) {
continue;
}
if (globalDicts.containsKey(column.getId())) {
final ConstantOperator min = ConstantOperator.createVarchar("0");
final ColumnDict columnDict = globalDicts.get(column.getId());
final ConstantOperator max = ConstantOperator.createVarchar("" + columnDict.getDictSize());
infos.put(entry.getKey().getId(), new Pair<>(min, max));
}
}
}
}
for (ColumnRefOperator column : scanOperator.getColRefToColumnMetaMap().keySet()) {
if (groupByRefSets.contains(column.getId())) {
if (!column.getType().isNumericType() && !column.getType().isDate()) {

View File

@ -308,6 +308,11 @@ public class JsonPathRewriteTest extends PlanTestBase {
Arguments.of(
"select case when get_json_string(c2, 'f13') = 'x' then 1 else 0 end from extend_predicate2",
"3 <-> DictDecode(5: c2.f13, [if(<place-holder> = 'x', 1, 0)])"
),
// 15. JSON with MinMaxStats optimization
Arguments.of(
"select get_json_string(c2, 'f1') k1, count(*) from extend_predicate2 group by k1",
"group by min-max stats"
)
);

View File

@ -1242,7 +1242,9 @@ public class LowCardinalityTest2 extends PlanTestBase {
" | aggregate: approx_count_distinct[([11: S_ADDRESS, INT, false]); args: INT; " +
"result: VARBINARY; args nullable: false; result nullable: false]\n" +
" | group by: [12: upper, INT, true]\n" +
" | cardinality: 1");
" | group by min-max stats:\n" +
" | - 0:1\n" +
" | cardinality: 1\n");
assertContains(plan, "Global Dict Exprs:\n" +
" 12: DictDefine(11: S_ADDRESS, [upper(<place-holder>)])");
// TODO add a case: Decode node before Sort Node