[Enhancement] integrate global dict with flatjson (backport #61681) (#62055)

Signed-off-by: Murphy <mofei@starrocks.com>
Co-authored-by: Murphy <96611012+murphyatwork@users.noreply.github.com>
This commit is contained in:
mergify[bot] 2025-08-19 11:30:16 +08:00 committed by GitHub
parent 04bb4e3f1b
commit 89bc4ff068
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
31 changed files with 1453 additions and 134 deletions

View File

@ -92,9 +92,8 @@ public class ColumnAccessPath {
}
public static ColumnAccessPath createFromLinearPath(String linearPath, Type valueType) {
final int jsonFlattenDepth = 20;
List<String> pieces = Lists.newArrayList();
if (SubfieldAccessPathNormalizer.parseSimpleJsonPath(jsonFlattenDepth, linearPath, pieces)) {
List<String> pieces = SubfieldAccessPathNormalizer.parseSimpleJsonPath(linearPath);
if (pieces.isEmpty()) {
throw new IllegalArgumentException("illegal json path: " + linearPath);
}
return createLinearPath(pieces, valueType);

View File

@ -446,6 +446,7 @@ public class OlapTable extends Table {
olapTable.curBinlogConfig = new BinlogConfig(this.curBinlogConfig);
}
olapTable.dbName = this.dbName;
olapTable.maxColUniqueId = new AtomicInteger(this.maxColUniqueId.get());
}
public void addDoubleWritePartition(long sourcePartitionId, long tempPartitionId) {

View File

@ -41,6 +41,7 @@ import com.starrocks.thrift.TScanRange;
import com.starrocks.thrift.TScanRangeLocation;
import com.starrocks.thrift.TScanRangeLocations;
import com.starrocks.warehouse.cngroup.ComputeResource;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -176,6 +177,10 @@ public class MetaScanNode extends ScanNode {
columnsDesc.add(tColumn);
}
msg.meta_scan_node.setColumns(columnsDesc);
if (CollectionUtils.isNotEmpty(columnAccessPaths)) {
msg.meta_scan_node.setColumn_access_paths(columnAccessPathToThrift());
}
}
@Override
@ -193,6 +198,10 @@ public class MetaScanNode extends ScanNode {
if (!selectPartitionNames.isEmpty()) {
output.append(prefix).append("Partitions: ").append(selectPartitionNames).append("\n");
}
if (detailLevel == TExplainLevel.VERBOSE) {
output.append(explainColumnAccessPath(prefix));
}
return output.toString();
}

View File

@ -48,6 +48,7 @@ import com.starrocks.sql.optimizer.ScanOptimizeOption;
import com.starrocks.thrift.TColumnAccessPath;
import com.starrocks.thrift.TScanRangeLocations;
import com.starrocks.warehouse.cngroup.ComputeResource;
import org.apache.commons.collections4.CollectionUtils;
import org.jetbrains.annotations.TestOnly;
import java.util.ArrayList;
@ -167,6 +168,9 @@ public abstract class ScanNode extends PlanNode {
protected String explainColumnAccessPath(String prefix) {
String result = "";
if (CollectionUtils.isEmpty(columnAccessPaths)) {
return result;
}
if (columnAccessPaths.stream().anyMatch(c -> !c.isFromPredicate() && !c.isExtended())) {
result += prefix + "ColumnAccessPath: [" + columnAccessPaths.stream()
.filter(c -> !c.isFromPredicate() && !c.isExtended())

View File

@ -389,6 +389,7 @@ public class SessionVariable implements Serializable, Writable, Cloneable {
"cbo_enable_single_node_prefer_two_stage_aggregate";
public static final String CBO_JSON_V2_REWRITE = "cbo_json_v2_rewrite";
public static final String CBO_JSON_V2_DICT_OPT = "cbo_json_v2_dict_opt";
public static final String CBO_PUSH_DOWN_DISTINCT_BELOW_WINDOW = "cbo_push_down_distinct_below_window";
public static final String CBO_PUSH_DOWN_AGGREGATE = "cbo_push_down_aggregate";
@ -1247,6 +1248,9 @@ public class SessionVariable implements Serializable, Writable, Cloneable {
@VarAttr(name = CBO_JSON_V2_REWRITE)
private boolean cboJSONV2Rewrite = true;
@VarAttr(name = CBO_JSON_V2_DICT_OPT)
private boolean cboJSONV2DictOpt = true;
/*
* the parallel exec instance num for one Fragment in one BE
@ -5189,6 +5193,14 @@ public class SessionVariable implements Serializable, Writable, Cloneable {
this.enableDropTableCheckMvDependency = enableDropTableCheckMvDependency;
}
public boolean isEnableJSONV2DictOpt() {
return cboJSONV2DictOpt;
}
public void setEnableJSONV2DictOpt(boolean value) {
this.cboJSONV2DictOpt = value;
}
// Serialize to thrift object
// used for rest api
public TQueryOptions toThrift() {

View File

@ -335,6 +335,7 @@ public class InsertPlanner {
dict.ifPresent(
columnDict -> globalDicts.add(new Pair<>(slotDescriptor.getId().asInt(), columnDict)));
}
// TODO: attach the dict for JSON
}
tupleDesc.computeMemLayout();

View File

@ -40,6 +40,7 @@ import com.starrocks.server.GlobalStateMgr;
import com.starrocks.sql.analyzer.SemanticException;
import com.starrocks.sql.ast.CreateMaterializedViewStmt;
import com.starrocks.sql.ast.StatementBase;
import com.starrocks.sql.optimizer.base.ColumnIdentifier;
import com.starrocks.sql.optimizer.rule.mv.MVUtils;
import com.starrocks.sql.parser.SqlParser;
import com.starrocks.thrift.TUniqueId;
@ -198,6 +199,19 @@ public class MetaUtils {
}
}
public static long lookupDbIdByTableId(long tableId) {
long res = -1;
for (Long id : GlobalStateMgr.getCurrentState().getLocalMetastore().getDbIds()) {
Database db = GlobalStateMgr.getCurrentState().getLocalMetastore().getDb(id);
if (db != null &&
GlobalStateMgr.getCurrentState().getLocalMetastore().getTable(db.getId(), tableId) != null) {
res = db.getId();
break;
}
}
return res;
}
public static List<Column> getColumnsByColumnIds(Table table, List<ColumnId> ids) {
return getColumnsByColumnIds(table.getIdToColumn(), ids);
}
@ -271,6 +285,22 @@ public class MetaUtils {
return column.getName();
}
public static Column getColumnByColumnId(ColumnIdentifier identifier) {
return getColumnByColumnId(identifier.getDbId(), identifier.getTableId(), identifier.getColumnName());
}
public static Column getColumnByColumnId(long dbId, long tableId, ColumnId columnId) {
Table table = GlobalStateMgr.getCurrentState().getLocalMetastore().getTable(dbId, tableId);
if (table == null) {
throw new SemanticException("Table %s is not found", tableId);
}
Column column = table.getColumn(columnId);
if (column == null) {
throw new SemanticException(String.format("can not find column by column id: %s", columnId));
}
return column;
}
public static List<ColumnId> getColumnIdsByColumnNames(Table table, List<String> names) {
List<ColumnId> columnIds = new ArrayList<>(names.size());
for (String name : names) {

View File

@ -682,6 +682,9 @@ public class QueryOptimizer extends Optimizer {
CTEUtils.collectCteOperators(tree, context);
}
// Rewrite the jsonpath in META-SCAN
scheduler.rewriteOnce(tree, rootTaskContext, JsonPathRewriteRule.createForMetaScan());
scheduler.rewriteIterative(tree, rootTaskContext, new MergeTwoProjectRule());
scheduler.rewriteOnce(tree, rootTaskContext, RuleSet.META_SCAN_REWRITE_RULES);
scheduler.rewriteIterative(tree, rootTaskContext, new MergeTwoProjectRule());
@ -692,6 +695,8 @@ public class QueryOptimizer extends Optimizer {
// After this rule, we shouldn't generate logical project operator
scheduler.rewriteIterative(tree, rootTaskContext, new MergeProjectWithChildRule());
scheduler.rewriteOnce(tree, rootTaskContext, JsonPathRewriteRule.createForOlapScan());
scheduler.rewriteOnce(tree, rootTaskContext, new EliminateSortColumnWithEqualityPredicateRule());
scheduler.rewriteOnce(tree, rootTaskContext, new PushDownTopNBelowOuterJoinRule());
// intersect rewrite depend on statistics
@ -963,8 +968,6 @@ public class QueryOptimizer extends Optimizer {
int planCount = result.getPlanCount();
result = new JsonPathRewriteRule().rewrite(result, rootTaskContext);
// Since there may be many different plans in the logic phase, it's possible
// that this switch can't turned on after logical optimization, so we only determine
// whether the PreAggregate can be turned on in the final

View File

@ -26,6 +26,7 @@ import com.starrocks.authorization.ObjectType;
import com.starrocks.authorization.PrivilegeType;
import com.starrocks.catalog.BaseTableInfo;
import com.starrocks.catalog.Column;
import com.starrocks.catalog.ColumnId;
import com.starrocks.catalog.Database;
import com.starrocks.catalog.InternalCatalog;
import com.starrocks.catalog.KeysType;
@ -34,6 +35,7 @@ import com.starrocks.catalog.MvId;
import com.starrocks.catalog.MvPlanContext;
import com.starrocks.catalog.OlapTable;
import com.starrocks.catalog.Table;
import com.starrocks.catalog.Type;
import com.starrocks.common.ErrorCode;
import com.starrocks.common.ErrorReport;
import com.starrocks.common.util.concurrent.lock.LockType;
@ -59,6 +61,8 @@ import com.starrocks.sql.optimizer.dump.QueryDumper;
import com.starrocks.sql.optimizer.operator.scalar.ConstantOperator;
import com.starrocks.sql.optimizer.rewrite.ConstantFunction;
import com.starrocks.sql.optimizer.rule.transformation.materialization.MvUtils;
import com.starrocks.sql.optimizer.statistics.CacheDictManager;
import com.starrocks.sql.optimizer.statistics.ColumnDict;
import com.starrocks.thrift.TResultBatch;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
@ -586,4 +590,29 @@ public class MetaFunctions {
}
}
/**
* Inspect global dictionary table, and return the content in JSON format.
*/
@ConstantFunction(name = "inspect_global_dict", argTypes = {VARCHAR,
VARCHAR}, returnType = VARCHAR, isMetaFunction = true)
public static ConstantOperator inspectGlobalDict(ConstantOperator tableName, ConstantOperator columnName) {
TableName tableNameValue = TableName.fromString(tableName.getVarchar());
Optional<Table> maybeTable = GlobalStateMgr.getCurrentState().getMetadataMgr()
.getTable(new ConnectContext(), tableNameValue);
maybeTable.orElseThrow(() -> ErrorReport.buildSemanticException(ErrorCode.ERR_BAD_TABLE_ERROR, tableNameValue));
if (!(maybeTable.get() instanceof OlapTable)) {
ErrorReport.reportSemanticException(ErrorCode.ERR_INVALID_PARAMETER, "must be OLAP_TABLE");
}
OlapTable table = (OlapTable) maybeTable.get();
String column = columnName.getVarchar();
CacheDictManager instance = CacheDictManager.getInstance();
Optional<ColumnDict> dict = instance.getGlobalDictSync(table.getId(), ColumnId.create(column));
if (dict.isEmpty()) {
return ConstantOperator.createNull(Type.VARCHAR);
} else {
return ConstantOperator.createVarchar(dict.get().toJson());
}
}
}

View File

@ -14,6 +14,7 @@
package com.starrocks.sql.optimizer.operator.logical;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.starrocks.sql.optimizer.operator.OperatorType;
import com.starrocks.sql.optimizer.operator.OperatorVisitor;
@ -82,6 +83,7 @@ public class LogicalMetaScanOperator extends LogicalScanOperator {
super.withOperator(operator);
builder.aggColumnIdToNames = ImmutableMap.copyOf(operator.aggColumnIdToNames);
builder.selectPartitionNames = operator.selectPartitionNames;
builder.columnAccessPaths = ImmutableList.copyOf(operator.columnAccessPaths);
return this;
}

View File

@ -282,6 +282,14 @@ public abstract class LogicalScanOperator extends LogicalOperator {
return (B) this;
}
public B addColumnAccessPaths(List<ColumnAccessPath> paths) {
builder.columnAccessPaths = ImmutableList.<ColumnAccessPath>builder()
.addAll(paths)
.addAll(builder.columnAccessPaths)
.build();
return (B) this;
}
public B setTable(Table table) {
builder.table = table;
return (B) this;

View File

@ -14,6 +14,8 @@
package com.starrocks.sql.optimizer.operator.physical;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.starrocks.sql.optimizer.OptExpression;
import com.starrocks.sql.optimizer.OptExpressionVisitor;
import com.starrocks.sql.optimizer.operator.OperatorType;
@ -25,8 +27,14 @@ import java.util.Map;
import java.util.Objects;
public class PhysicalMetaScanOperator extends PhysicalScanOperator {
private final Map<Integer, String> aggColumnIdToNames;
private final List<String> selectPartitionNames;
private Map<Integer, String> aggColumnIdToNames;
private List<String> selectPartitionNames;
public PhysicalMetaScanOperator() {
super(OperatorType.PHYSICAL_META_SCAN);
this.aggColumnIdToNames = ImmutableMap.of();
this.selectPartitionNames = ImmutableList.of();
}
public PhysicalMetaScanOperator(LogicalMetaScanOperator scanOperator) {
super(OperatorType.PHYSICAL_META_SCAN, scanOperator);
@ -69,6 +77,27 @@ public class PhysicalMetaScanOperator extends PhysicalScanOperator {
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), aggColumnIdToNames);
return Objects.hash(super.hashCode(), aggColumnIdToNames, selectPartitionNames);
}
public static Builder builder() {
return new Builder();
}
public static class Builder
extends PhysicalScanOperator.Builder<PhysicalMetaScanOperator, PhysicalScanOperator.Builder> {
@Override
protected PhysicalMetaScanOperator newInstance() {
return new PhysicalMetaScanOperator();
}
@Override
public PhysicalMetaScanOperator.Builder withOperator(PhysicalMetaScanOperator operator) {
super.withOperator(operator);
builder.aggColumnIdToNames = ImmutableMap.copyOf(operator.aggColumnIdToNames);
builder.selectPartitionNames = ImmutableList.copyOf(operator.selectPartitionNames);
return this;
}
}
}

View File

@ -322,6 +322,7 @@ public class PhysicalOlapScanOperator extends PhysicalScanOperator {
builder.prunedPartitionPredicates = operator.prunedPartitionPredicates;
builder.vectorSearchOptions = operator.vectorSearchOptions;
builder.sample = operator.getSample();
builder.columnAccessPaths = operator.columnAccessPaths;
return this;
}

View File

@ -157,6 +157,7 @@ public enum RuleType {
TF_INTERSECT_DISTINCT,
TF_MERGE_PROJECT_WITH_CHILD,
TF_JSON_PATH_REWRITE,
TF_PUSH_DOWN_JOIN_ON_EXPRESSION_TO_CHILD_PROJECT,

View File

@ -32,6 +32,7 @@ import java.util.List;
public class MergeProjectWithChildRule extends TransformationRule {
boolean eliminateUselessProject = true;
public MergeProjectWithChildRule() {
super(RuleType.TF_MERGE_PROJECT_WITH_CHILD,
Pattern.create(OperatorType.LOGICAL_PROJECT).
@ -72,7 +73,6 @@ public class MergeProjectWithChildRule extends TransformationRule {
return Lists.newArrayList(OptExpression.create(builder.build(), input.inputAt(0).getInputs()));
}
ColumnRefSet projectColumns = logicalProjectOperator.getOutputColumns(
new ExpressionContext(input));
ColumnRefSet childOutputColumns = child.getOutputColumns(new ExpressionContext(input.inputAt(0)));

View File

@ -36,7 +36,9 @@ import com.starrocks.sql.optimizer.operator.scalar.CallOperator;
import com.starrocks.sql.optimizer.operator.scalar.ColumnRefOperator;
import com.starrocks.sql.optimizer.operator.scalar.ScalarOperator;
import com.starrocks.sql.optimizer.rule.RuleType;
import com.starrocks.sql.optimizer.rule.tree.JsonPathRewriteRule;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.collections4.MapUtils;
import java.util.List;
import java.util.Map;
@ -59,9 +61,16 @@ public class PushDownAggToMetaScanRule extends TransformationRule {
return false;
}
for (Map.Entry<ColumnRefOperator, ScalarOperator> entry : projectOperator.getColumnRefMap().entrySet()) {
if (!entry.getKey().equals(entry.getValue())) {
return false;
// select min(c1) from tbl [_META_]
if (entry.getKey().equals(entry.getValue())) {
continue;
}
// select dict_merge(get_json_string(c1)) from tbl [_META_]
if (entry.getValue().getHints().contains(JsonPathRewriteRule.COLUMN_REF_HINT)) {
continue;
}
return false;
}
for (CallOperator aggCall : agg.getAggregations().values()) {
@ -81,6 +90,7 @@ public class PushDownAggToMetaScanRule extends TransformationRule {
@Override
public List<OptExpression> transform(OptExpression input, OptimizerContext context) {
LogicalAggregationOperator agg = (LogicalAggregationOperator) input.getOp();
LogicalProjectOperator project = (LogicalProjectOperator) input.inputAt(0).getOp();
LogicalMetaScanOperator metaScan = (LogicalMetaScanOperator) input.inputAt(0).inputAt(0).getOp();
ColumnRefFactory columnRefFactory = context.getColumnRefFactory();
@ -100,6 +110,12 @@ public class PushDownAggToMetaScanRule extends TransformationRule {
if (aggCall.getFnName().equals(FunctionSet.COUNT) && aggCall.getChildren().isEmpty()) {
usedColumn = metaScan.getOutputColumns().get(0);
metaColumnName = "rows_" + usedColumn.getName();
} else if (MapUtils.isNotEmpty(project.getColumnRefMap())) {
ColumnRefSet usedColumns = aggCall.getUsedColumns();
Preconditions.checkArgument(usedColumns.cardinality() == 1);
List<ColumnRefOperator> columnRefOperators = usedColumns.getColumnRefOperators(columnRefFactory);
usedColumn = (ColumnRefOperator) project.getColumnRefMap().get(columnRefOperators.get(0));
metaColumnName = aggCall.getFnName() + "_" + usedColumn.getName();
} else {
ColumnRefSet usedColumns = aggCall.getUsedColumns();
Preconditions.checkArgument(usedColumns.cardinality() == 1);

View File

@ -79,7 +79,7 @@ public class EliminateOveruseColumnAccessPathRule implements TreeRewriteRule {
}
@Override
public Optional<ColumnRefSet> visitPhysicalScan(OptExpression optExpression,
public Optional<ColumnRefSet> visitPhysicalOlapScan(OptExpression optExpression,
ColumnRefSet parentUsedColumnRefs) {
Preconditions.checkState(parentUsedColumnRefs != null);
PhysicalScanOperator scan = optExpression.getOp().cast();
@ -107,6 +107,7 @@ public class EliminateOveruseColumnAccessPathRule implements TreeRewriteRule {
.collect(Collectors.toMap(e -> e.getValue().getName(), Map.Entry::getKey));
Predicate<ColumnAccessPath> isOveruseProjecting = accessPath ->
columnNameToIdMap.containsKey(accessPath.getPath()) &&
parentUsedColumnRefs.contains(Objects.requireNonNull(columnNameToIdMap.get(accessPath.getPath())));
Map<Boolean, List<ColumnAccessPath>> subfieldPruningProjectingGroups = subfieldPruningProjectings

View File

@ -15,6 +15,7 @@
package com.starrocks.sql.optimizer.rule.tree;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.starrocks.catalog.Column;
@ -28,10 +29,17 @@ import com.starrocks.lake.LakeTable;
import com.starrocks.qe.SessionVariable;
import com.starrocks.sql.optimizer.OptExpression;
import com.starrocks.sql.optimizer.OptExpressionVisitor;
import com.starrocks.sql.optimizer.OptimizerContext;
import com.starrocks.sql.optimizer.base.ColumnRefFactory;
import com.starrocks.sql.optimizer.base.ColumnRefSet;
import com.starrocks.sql.optimizer.operator.Operator;
import com.starrocks.sql.optimizer.operator.OperatorBuilderFactory;
import com.starrocks.sql.optimizer.operator.physical.PhysicalScanOperator;
import com.starrocks.sql.optimizer.operator.OperatorType;
import com.starrocks.sql.optimizer.operator.logical.LogicalMetaScanOperator;
import com.starrocks.sql.optimizer.operator.logical.LogicalOlapScanOperator;
import com.starrocks.sql.optimizer.operator.logical.LogicalProjectOperator;
import com.starrocks.sql.optimizer.operator.logical.LogicalScanOperator;
import com.starrocks.sql.optimizer.operator.pattern.Pattern;
import com.starrocks.sql.optimizer.operator.scalar.CallOperator;
import com.starrocks.sql.optimizer.operator.scalar.ColumnRefOperator;
import com.starrocks.sql.optimizer.operator.scalar.ConstantOperator;
@ -39,8 +47,9 @@ import com.starrocks.sql.optimizer.operator.scalar.ScalarOperator;
import com.starrocks.sql.optimizer.rewrite.ScalarOperatorRewriteContext;
import com.starrocks.sql.optimizer.rewrite.ScalarOperatorRewriter;
import com.starrocks.sql.optimizer.rewrite.scalar.BottomUpScalarOperatorRewriteRule;
import com.starrocks.sql.optimizer.rule.RuleType;
import com.starrocks.sql.optimizer.rule.transformation.TransformationRule;
import com.starrocks.sql.optimizer.rule.tree.prunesubfield.SubfieldAccessPathNormalizer;
import com.starrocks.sql.optimizer.task.TaskContext;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections4.MapUtils;
import org.apache.logging.log4j.LogManager;
@ -51,7 +60,7 @@ import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
/**
* JsonPathRewriteRule rewrites JSON function calls to column access paths for better performance.
@ -67,11 +76,12 @@ import java.util.regex.Pattern;
* - get_json_double
* - get_json_bool
*/
public class JsonPathRewriteRule implements TreeRewriteRule {
public class JsonPathRewriteRule extends TransformationRule {
private static final Logger LOG = LogManager.getLogger(JsonPathRewriteRule.class);
private static final int DEFAULT_JSON_FLATTEN_DEPTH = 20;
private static final Pattern JSON_PATH_VALID_PATTERN = Pattern.compile("^[a-zA-Z0-9_]+$");
private static final java.util.regex.Pattern JSON_PATH_VALID_PATTERN =
java.util.regex.Pattern.compile("^[a-zA-Z0-9_]+$");
public static final String COLUMN_REF_HINT = "JsonPathExtended";
private static final Set<String> SUPPORTED_JSON_FUNCTIONS = Set.of(
FunctionSet.GET_JSON_STRING,
@ -80,20 +90,34 @@ public class JsonPathRewriteRule implements TreeRewriteRule {
FunctionSet.GET_JSON_BOOL
);
protected JsonPathRewriteRule(OperatorType operatorType) {
super(RuleType.TF_JSON_PATH_REWRITE, Pattern.create(operatorType));
}
public static JsonPathRewriteRule createForOlapScan() {
return new JsonPathRewriteRule(OperatorType.LOGICAL_OLAP_SCAN);
}
public static JsonPathRewriteRule createForMetaScan() {
return new JsonPathRewriteRule(OperatorType.LOGICAL_PROJECT);
}
@Override
public OptExpression rewrite(OptExpression root, TaskContext taskContext) {
SessionVariable variables = taskContext.getOptimizerContext().getSessionVariable();
public List<OptExpression> transform(OptExpression root, OptimizerContext optimizerContext) {
SessionVariable variables = optimizerContext.getSessionVariable();
if (!variables.isEnableJSONV2Rewrite() || variables.isCboUseDBLock()) {
return root;
return List.of(root);
}
ColumnRefFactory columnRefFactory = taskContext.getOptimizerContext().getColumnRefFactory();
ColumnRefFactory columnRefFactory = optimizerContext.getColumnRefFactory();
try {
JsonPathRewriteVisitor visitor = new JsonPathRewriteVisitor(columnRefFactory);
return root.getOp().accept(visitor, root, null);
root = root.getOp().accept(visitor, root, null);
optimizerContext.getTaskContext().getRequiredColumns().union(root.getOutputColumns());
return List.of(root);
} catch (Exception e) {
LOG.warn("Failed to rewrite JSON paths in expression: {}", root, e);
return root;
return List.of(root);
}
}
@ -150,6 +174,7 @@ public class JsonPathRewriteRule implements TreeRewriteRule {
// Create a ref
ColumnRefOperator newColumnRef = columnRefFactory.create(path, jsonPath.getValueType(), true);
newColumnRef.setHints(List.of(COLUMN_REF_HINT));
columnRefFactory.updateColumnRefToColumns(newColumnRef, extendedColumn, tableAndColumn.first);
pathMap.put(fullPath, newColumnRef);
@ -168,6 +193,12 @@ public class JsonPathRewriteRule implements TreeRewriteRule {
// this rule is only applied during query planning, thus the Table here is already copied for the
// query. So this change would not affect the original table schema.
Column extendedColumn = new Column(path, jsonPath.getValueType(), true);
// Allocate the unique id for extended column
OlapTable olapTable = (OlapTable) table;
int nextUniqueId = olapTable.incAndGetMaxColUniqueId();
extendedColumn.setUniqueId(nextUniqueId);
table.addColumn(extendedColumn);
return extendedColumn;
} else {
@ -203,37 +234,83 @@ public class JsonPathRewriteRule implements TreeRewriteRule {
return OptExpression.builder().with(optExpr).setInputs(newInputs).build();
}
private Map<ColumnRefOperator, ScalarOperator> rewriteProjections(
Map<ColumnRefOperator, ScalarOperator> originalProjections,
JsonPathRewriteContext context,
JsonPathExpressionRewriter rewriter) {
Map<ColumnRefOperator, ScalarOperator> rewritten = Maps.newHashMap();
@Override
public OptExpression visitLogicalProject(OptExpression optExpr, Void v) {
Operator child = optExpr.inputAt(0).getOp();
if (child instanceof LogicalMetaScanOperator) {
return rewriteMetaScan(optExpr, v);
}
return optExpr;
}
for (var entry : originalProjections.entrySet()) {
ScalarOperator rewrittenExpr = rewriteScalar(entry.getValue(), context, rewriter);
if (!rewrittenExpr.equals(entry.getValue())) {
rewritten.put(entry.getKey(), rewrittenExpr);
} else {
// Keep original expression if no change
rewritten.put(entry.getKey(), entry.getValue());
}
// PROJECT(get_json_string(c1, 'f1')) -> META_SCAN(c1)
// =>
// PROJECT(c1.f1) -> META_SCAN(c1.f1)
private OptExpression rewriteMetaScan(OptExpression optExpr, Void v) {
LogicalProjectOperator project = (LogicalProjectOperator) optExpr.getOp();
LogicalMetaScanOperator metaScan = (LogicalMetaScanOperator) optExpr.inputAt(0).getOp();
LogicalMetaScanOperator.Builder scanBuilder =
LogicalMetaScanOperator.builder().withOperator(metaScan);
JsonPathRewriteContext context = new JsonPathRewriteContext(columnRefFactory);
JsonPathExpressionRewriter rewriter = new JsonPathExpressionRewriter(context);
// rewrite project
Map<ColumnRefOperator, ScalarOperator> newProjection = Maps.newHashMap();
boolean hasChanges = false;
for (var entry : project.getColumnRefMap().entrySet()) {
ScalarOperator rewritten = rewriteScalar(entry.getValue(), context, rewriter);
newProjection.put(entry.getKey(), rewritten);
hasChanges = hasChanges || !rewritten.equals(entry.getValue());
}
return rewritten;
// Check if any changes were made
if (!hasChanges) {
return optExpr;
}
LogicalProjectOperator newProject = new LogicalProjectOperator(newProjection);
Map<ColumnRefOperator, Column> metaScanColumnMap =
ImmutableMap.<ColumnRefOperator, Column>builder()
.putAll(metaScan.getColRefToColumnMetaMap())
.putAll(rewriter.getExtendedColumns())
.build();
scanBuilder.setColRefToColumnMetaMap(metaScanColumnMap);
// Record the access path into scan node
List<ColumnAccessPath> paths = Lists.newArrayList();
for (var entry : rewriter.getExtendedColumns().entrySet()) {
ColumnAccessPath path = JsonPathRewriteContext.pathFromColumn(entry.getValue());
paths.add(path);
}
scanBuilder.setColumnAccessPaths(paths);
LogicalMetaScanOperator newMetaScan = scanBuilder.build();
OptExpression newMetaScanExpr =
OptExpression.builder()
.with(optExpr.inputAt(0))
.setOp(newMetaScan)
.build();
return OptExpression.builder().with(optExpr)
.setOp(newProject)
.setInputs(Lists.newArrayList(newMetaScanExpr))
.build();
}
@Override
public OptExpression visitPhysicalOlapScan(OptExpression optExpr, Void v) {
return rewritePhysicalScan(optExpr, v);
public OptExpression visitLogicalTableScan(OptExpression optExpr, Void v) {
if (!(optExpr.getOp() instanceof LogicalOlapScanOperator)) {
return optExpr;
}
return rewriteLogicalScan(optExpr, v);
}
/**
* Rewrites PhysicalScanOperator to handle JSON path access.
*/
private OptExpression rewritePhysicalScan(OptExpression optExpr, Void v) {
PhysicalScanOperator scanOperator = (PhysicalScanOperator) optExpr.getOp();
PhysicalScanOperator.Builder builder =
(PhysicalScanOperator.Builder) OperatorBuilderFactory.build(scanOperator)
private OptExpression rewriteLogicalScan(OptExpression optExpr, Void v) {
LogicalScanOperator scanOperator = (LogicalScanOperator) optExpr.getOp();
LogicalScanOperator.Builder builder =
(LogicalScanOperator.Builder) OperatorBuilderFactory.build(scanOperator)
.withOperator(scanOperator);
JsonPathRewriteContext context = new JsonPathRewriteContext(columnRefFactory);
@ -243,20 +320,34 @@ public class JsonPathRewriteRule implements TreeRewriteRule {
builder.setPredicate(rewriteScalar(scanOperator.getPredicate(), context, rewriter));
// Rewrite projection if exists
ColumnRefSet requiredColumnSet = new ColumnRefSet();
if (builder.getPredicate() != null) {
requiredColumnSet.union(builder.getPredicate().getUsedColumns());
}
if (scanOperator.getProjection() != null) {
Map<ColumnRefOperator, ScalarOperator> mapping = Maps.newHashMap();
for (var entry : scanOperator.getProjection().getColumnRefMap().entrySet()) {
mapping.put(entry.getKey(), rewriteScalar(entry.getValue(), context, rewriter));
}
builder.getProjection().getColumnRefMap().putAll(mapping);
mapping.values().forEach(x -> requiredColumnSet.union(x.getUsedColumns()));
} else {
scanOperator.getOutputColumns().forEach(requiredColumnSet::union);
}
if (MapUtils.isNotEmpty(rewriter.getExtendedColumns())) {
// Add extended columns to scan operator
Map<ColumnRefOperator, Column> colRefToColumnMetaMap = Maps.newHashMap();
colRefToColumnMetaMap.putAll(scanOperator.getColRefToColumnMetaMap());
colRefToColumnMetaMap.putAll(rewriter.getExtendedColumns());
for (var entry : scanOperator.getColRefToColumnMetaMap().entrySet()) {
if (requiredColumnSet.contains(entry.getKey())) {
colRefToColumnMetaMap.put(entry.getKey(), entry.getValue());
}
}
builder.setColRefToColumnMetaMap(colRefToColumnMetaMap);
for (ColumnRefOperator col : rewriter.getExtendedColumns().keySet()) {
optExpr.getOutputColumns().union(col);
}
// Add access paths
List<ColumnAccessPath> paths = Lists.newArrayList();
@ -286,6 +377,7 @@ public class JsonPathRewriteRule implements TreeRewriteRule {
* Rewrites JSON function calls to column access expressions.
*/
private static class JsonPathExpressionRewriter extends BottomUpScalarOperatorRewriteRule {
private final JsonPathRewriteContext context;
public JsonPathExpressionRewriter(JsonPathRewriteContext context) {
@ -346,7 +438,8 @@ public class JsonPathRewriteRule implements TreeRewriteRule {
ColumnAccessPath accessPath = ColumnAccessPath.createLinearPath(fullPath, resultType);
Pair<Boolean, ColumnRefOperator> columnResult = context.getOrCreateColumn(jsonColumn, accessPath);
// Note: extendedColumns are now automatically recorded in context.getOrCreateColumn()
// Return the new column reference for the JSON path
// This allows the optimizer to work with the JSON field directly
return columnResult.second;
}
@ -355,11 +448,9 @@ public class JsonPathRewriteRule implements TreeRewriteRule {
* Returns null if the path was truncated due to exceeding depth limit.
*/
private static List<String> parseJsonPath(String path) {
List<String> result = Lists.newArrayList();
boolean wasTruncated =
SubfieldAccessPathNormalizer.parseSimpleJsonPath(DEFAULT_JSON_FLATTEN_DEPTH, path, result);
List<String> result = SubfieldAccessPathNormalizer.parseSimpleJsonPath(path);
// If the path was truncated, return null to prevent incorrect rewriting
if (wasTruncated) {
if (result.isEmpty()) {
return null;
}
return result;

View File

@ -65,6 +65,7 @@ import com.starrocks.sql.optimizer.operator.scalar.LikePredicateOperator;
import com.starrocks.sql.optimizer.operator.scalar.MatchExprOperator;
import com.starrocks.sql.optimizer.operator.scalar.ScalarOperator;
import com.starrocks.sql.optimizer.operator.scalar.ScalarOperatorVisitor;
import com.starrocks.sql.optimizer.rule.tree.JsonPathRewriteRule;
import com.starrocks.sql.optimizer.statistics.CacheDictManager;
import com.starrocks.sql.optimizer.statistics.CacheRelaxDictManager;
import com.starrocks.sql.optimizer.statistics.ColumnDict;
@ -264,7 +265,10 @@ public class DecodeCollector extends OptExpressionVisitor<DecodeInfo, DecodeInfo
List<ScalarOperator> dictExprList = stringExpressions.getOrDefault(cid, Collections.emptyList());
long allExprNum = dictExprList.size();
// only query original string-column
long worthless = dictExprList.stream().filter(ScalarOperator::isColumnRef).count();
long worthless = dictExprList.stream()
.filter(ScalarOperator::isColumnRef)
.filter(x -> !((ColumnRefOperator) x).getHints().contains(JsonPathRewriteRule.COLUMN_REF_HINT))
.count();
// we believe that the more complex expressions using the dict-column, and the preformance will be better
if (worthless == 0 && allExprNum != 0) {
context.allStringColumns.add(cid);
@ -690,16 +694,21 @@ public class DecodeCollector extends OptExpressionVisitor<DecodeInfo, DecodeInfo
continue;
}
ColumnStatistic columnStatistic = GlobalStateMgr.getCurrentState().getStatisticStorage()
.getColumnStatistic(table, column.getName());
// Condition 2: the varchar column is low cardinality string column
// If it's not an extended column, we have to check the cardinality of the column.
// TODO(murphy) support collect cardinality of extended column
if (!checkExtendedColumn(scan, column)) {
ColumnStatistic columnStatistic = GlobalStateMgr.getCurrentState().getStatisticStorage()
.getColumnStatistic(table, column.getName());
// Condition 2: the varchar column is low cardinality string column
boolean alwaysCollectDict = sessionVariable.isAlwaysCollectDict();
if (!alwaysCollectDict && !column.getType().isArrayType() && !FeConstants.USE_MOCK_DICT_MANAGER &&
(columnStatistic.isUnknown() ||
columnStatistic.getDistinctValuesCount() > CacheDictManager.LOW_CARDINALITY_THRESHOLD)) {
LOG.debug("{} isn't low cardinality string column", column.getName());
continue;
boolean alwaysCollectDict = sessionVariable.isAlwaysCollectDict();
if (!alwaysCollectDict && !column.getType().isArrayType() && !FeConstants.USE_MOCK_DICT_MANAGER &&
(columnStatistic.isUnknown() ||
columnStatistic.getDistinctValuesCount() >
CacheDictManager.LOW_CARDINALITY_THRESHOLD)) {
LOG.debug("{} isn't low cardinality string column", column.getName());
continue;
}
}
// Condition 3: the varchar column has collected global dict
@ -874,6 +883,26 @@ public class DecodeCollector extends OptExpressionVisitor<DecodeInfo, DecodeInfo
return true;
}
/**
* Check if the column is an extended string column, if so, it can be used for global dict optimization.
*/
private boolean checkExtendedColumn(PhysicalOlapScanOperator scan, ColumnRefOperator column) {
if (!sessionVariable.isEnableJSONV2DictOpt()) {
return false;
}
String colName = scan.getColRefToColumnMetaMap().get(column).getName();
for (ColumnAccessPath path : scan.getColumnAccessPaths()) {
if (path.isExtended() &&
path.getLinearPath().equals(colName) &&
path.getType() == TAccessPathType.ROOT &&
path.getValueType().isStringType()) {
return true;
}
}
return false;
}
private void collectPredicate(Operator operator, DecodeInfo info) {
if (operator.getPredicate() == null) {
return;

View File

@ -317,29 +317,27 @@ public class SubfieldAccessPathNormalizer {
// $.a#b.c -> [a#b, c]
// $.a.b.c.d.e.f -> [a, b] -- don't support overflown JSON_FLATTEN_DEPTH
// a.b.c -> [a, b, c]
public static boolean parseSimpleJsonPath(int jsonFlattenDepth, String path, List<String> result) {
public static List<String> parseSimpleJsonPath(String path) {
List<String> result = Lists.newArrayList();
path = StringUtils.trimToEmpty(path);
if (StringUtils.isBlank(path) || StringUtils.contains(path, "..") || StringUtils.equals("$", path) ||
StringUtils.countMatches(path, "\"") % 2 != 0) {
// .. is recursive search in json path, not supported
// unpaired quota char
return false;
return result;
}
StrTokenizer tokenizer = new StrTokenizer(path, '.', '"');
String[] tokens = tokenizer.getTokenArray();
if (tokens.length < 1) {
return false;
return result;
}
int size = jsonFlattenDepth;
int i = 0;
if (tokens[0].equals("$")) {
size++;
i++;
}
size = Math.min(tokens.length, size);
for (; i < size; i++) {
for (; i < tokens.length; i++) {
if (tokens[i].contains(".")) {
result.add("\"" + tokens[i] + "\"");
continue;
@ -347,7 +345,7 @@ public class SubfieldAccessPathNormalizer {
result.add(tokens[i]);
}
}
return size < tokens.length;
return result;
}
public void collect(List<ScalarOperator> scalarOperators) {

View File

@ -20,6 +20,7 @@ import com.github.benmanes.caffeine.cache.Caffeine;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.starrocks.catalog.Column;
import com.starrocks.catalog.ColumnId;
import com.starrocks.catalog.Database;
import com.starrocks.common.Config;
@ -29,7 +30,9 @@ import com.starrocks.common.ThreadPoolManager;
import com.starrocks.memory.MemoryTrackable;
import com.starrocks.qe.ConnectContext;
import com.starrocks.server.GlobalStateMgr;
import com.starrocks.sql.common.MetaUtils;
import com.starrocks.sql.optimizer.base.ColumnIdentifier;
import com.starrocks.sql.optimizer.rule.tree.prunesubfield.SubfieldAccessPathNormalizer;
import com.starrocks.thrift.TGlobalDict;
import com.starrocks.thrift.TStatisticData;
import org.apache.logging.log4j.LogManager;
@ -48,6 +51,34 @@ import java.util.concurrent.Executor;
import static com.starrocks.statistic.StatisticExecutor.queryDictSync;
/**
* CacheDictManager manages global dictionary caching for low cardinality string columns.
* <p>
* Global Dictionary Maintenance:
* <p>
* 1. Dictionary Collection:
* - Dictionaries are collected from BE (Backend) nodes via queryDictSync()
* - Only columns with cardinality <= LOW_CARDINALITY_THRESHOLD are considered
* - Dictionary data size must be <= 1MB to ensure BE can generate dictionary pages after compaction
* <p>
* 2. Cache Management:
* - Uses Caffeine AsyncLoadingCache with maximum size from Config.statistic_dict_columns
* - Cache entries are keyed by ColumnIdentifier (tableId + columnName)
* - Cache automatically loads dictionaries asynchronously when accessed
* <p>
* 3. Version Control:
* - Each dictionary has a version timestamp for consistency checking
* - Dictionaries are invalidated when versions become outdated
* - Version mismatches trigger dictionary removal and re-collection
* <p>
* 4. Data Update Handling:
* - When data is updated (INSERT/UPDATE/DELETE), the dictionary version becomes stale
* - hasGlobalDict() checks version timestamps and invalidates outdated dictionaries
* - updateGlobalDict() updates version timestamps when new data is collected
* - Version mismatches between collectVersion and dictCollectVersion trigger removal
* - During data updates, queries may fall back to non-dictionary optimization temporarily
* - New dictionaries are automatically collected when cache is accessed after invalidation
*/
public class CacheDictManager implements IDictManager, MemoryTrackable {
private static final Logger LOG = LogManager.getLogger(CacheDictManager.class);
private static final Set<ColumnIdentifier> NO_DICT_STRING_COLUMNS = Sets.newConcurrentHashSet();
@ -60,7 +91,7 @@ public class CacheDictManager implements IDictManager, MemoryTrackable {
private static final CacheDictManager INSTANCE = new CacheDictManager();
protected static CacheDictManager getInstance() {
public static CacheDictManager getInstance() {
return INSTANCE;
}
@ -235,6 +266,28 @@ public class CacheDictManager implements IDictManager, MemoryTrackable {
LOG.info("remove dict for table:{} column:{}", tableId, columnName);
dictStatistics.synchronous().invalidate(columnIdentifier);
// Remove all subfields' dicts if the column is a JSON type
try {
List<String> parts = SubfieldAccessPathNormalizer.parseSimpleJsonPath(columnName.getId());
if (parts.size() > 1) {
String rootColumn = parts.get(0);
columnIdentifier = new ColumnIdentifier(tableId, ColumnId.create(rootColumn));
// Set dbId for the columnIdentifier to enable MetaUtils.getColumnByColumnId lookup
long dbId = MetaUtils.lookupDbIdByTableId(tableId);
if (dbId != -1) {
columnIdentifier.setDbId(dbId);
Column column = MetaUtils.getColumnByColumnId(columnIdentifier);
if (column.getType().isJsonType()) {
removeGlobalDictForJson(tableId, column.getColumnId());
}
} else {
LOG.debug("Could not find db id for table {}, skipping JSON subfield cleanup", tableId);
}
}
} catch (Exception ignored) {
}
}
@Override
@ -298,6 +351,83 @@ public class CacheDictManager implements IDictManager, MemoryTrackable {
return Optional.empty();
}
public Optional<ColumnDict> getGlobalDictSync(long tableId, ColumnId columnName) {
ColumnIdentifier columnIdentifier = new ColumnIdentifier(tableId, columnName);
// NOTE: it's used to patch the dbId, because asyncLoad requires the dbId
long dbId = MetaUtils.lookupDbIdByTableId(tableId);
if (dbId == -1) {
throw new RuntimeException("table not found " + tableId);
}
columnIdentifier.setDbId(dbId);
CompletableFuture<Optional<ColumnDict>> columnFuture = dictStatistics.get(columnIdentifier);
try {
return columnFuture.get();
} catch (Exception e) {
LOG.warn(String.format("get dict cache for %d: %s failed", tableId, columnName), e);
}
return Optional.empty();
}
// TODO(murphy) support an efficient dict cache for JSON subfields
@Override
public boolean hasGlobalDictForJson(long tableId, ColumnId columnName) {
// TODO(murphy) optimizer performance
return dictStatistics.asMap().keySet().stream().anyMatch(column -> {
if (column.getTableId() != tableId) {
return false;
}
List<String> parts = SubfieldAccessPathNormalizer.parseSimpleJsonPath(column.getColumnName().getId());
if (parts.size() > 1 && parts.get(0).equalsIgnoreCase(columnName.getId())) {
return true;
}
return false;
});
}
// TODO(murphy) support an efficient dict cache for JSON subfields
@Override
public List<ColumnDict> getGlobalDictForJson(long tableId, ColumnId columnName) {
// TODO(murphy) optimizer performance
return dictStatistics.synchronous().asMap().entrySet().stream().filter(kv -> {
ColumnIdentifier column = kv.getKey();
if (column.getTableId() != tableId) {
return false;
}
List<String> parts = SubfieldAccessPathNormalizer.parseSimpleJsonPath(column.getColumnName().getId());
if (parts.size() > 1 && parts.get(0).equalsIgnoreCase(columnName.getId())) {
return true;
}
return false;
}).flatMap(x -> x.getValue().stream()).toList();
}
// TODO(murphy) support an efficient dict cache for JSON subfields
@Override
public void removeGlobalDictForJson(long tableId, ColumnId columnName) {
if (GlobalStateMgr.isCheckpointThread()) {
return;
}
// TODO(murphy) optimizer performance
// Remove all global dict entries for JSON subfields of the given column in the given table.
// This is used to invalidate all dicts for subfields of a JSON column (e.g., c2.f1, c2.f2, etc.)
List<ColumnIdentifier> toRemove = new ArrayList<>();
for (ColumnIdentifier column : dictStatistics.asMap().keySet()) {
if (column.getTableId() != tableId) {
continue;
}
List<String> parts = SubfieldAccessPathNormalizer.parseSimpleJsonPath(column.getColumnName().getId());
if (parts.size() > 1 && parts.get(0).equalsIgnoreCase(columnName.getId())) {
toRemove.add(column);
}
}
for (ColumnIdentifier column : toRemove) {
dictStatistics.synchronous().invalidate(column);
}
}
@Override
public Map<String, Long> estimateCount() {
return ImmutableMap.of("ColumnDict", (long) dictStatistics.asMap().size());

View File

@ -16,7 +16,9 @@ package com.starrocks.sql.optimizer.statistics;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.gson.Gson;
import com.starrocks.common.Config;
import com.starrocks.persist.gson.GsonUtils;
import java.nio.ByteBuffer;
@ -46,4 +48,26 @@ public final class ColumnDict extends StatsVersion {
public int getDictSize() {
return dict.size();
}
public String toJson() {
Gson gson = GsonUtils.GSON;
// Manually build a JSON object with all fields
// Convert ByteBuffer keys to base64 strings for JSON compatibility
java.util.Map<String, Integer> dictMap = new java.util.HashMap<>();
for (java.util.Map.Entry<ByteBuffer, Integer> entry : dict.entrySet()) {
ByteBuffer key = entry.getKey();
// Duplicate to avoid changing position
ByteBuffer dup = key.duplicate();
byte[] bytes = new byte[dup.remaining()];
dup.get(bytes);
// Convert bytes to string using UTF-8 encoding
String strKey = new String(bytes, java.nio.charset.StandardCharsets.UTF_8);
dictMap.put(strKey, entry.getValue());
}
java.util.Map<String, Object> jsonMap = new java.util.HashMap<>();
jsonMap.put("dict", dictMap);
jsonMap.put("collectedVersion", collectedVersion);
jsonMap.put("version", version);
return gson.toJson(jsonMap);
}
}

View File

@ -18,6 +18,7 @@ package com.starrocks.sql.optimizer.statistics;
import com.starrocks.catalog.ColumnId;
import com.starrocks.common.FeConstants;
import java.util.List;
import java.util.Optional;
public interface IDictManager {
@ -36,6 +37,24 @@ public interface IDictManager {
// You should call `hasGlobalDict` firstly to ensure the global dict exist
Optional<ColumnDict> getGlobalDict(long tableId, ColumnId columnName);
/**
* For JSON type column, we use a different dict manager
* For example, '{"f1": "a", "f2": "b"}' will be stored as multiple entries in the cache.
* Cache Invalidation: All fields in the JSON will be invalidated when the JSON is updated
* Cache Population: the cache is populated when the JSON field is first accessed
* TODO(murphy) invalidate individual json field when the JSON is updated
*/
default boolean hasGlobalDictForJson(long tableId, ColumnId columnName) {
return false;
}
default List<ColumnDict> getGlobalDictForJson(long tableId, ColumnId columnName) {
return List.of();
}
default void removeGlobalDictForJson(long tableId, ColumnId columnName) {
}
static IDictManager getInstance() {
if (FeConstants.USE_MOCK_DICT_MANAGER) {
return MockDictManager.getInstance();

View File

@ -825,7 +825,8 @@ public class PlanFragmentBuilder {
.collect(Collectors.toSet());
if (scan.getPredicate() == null) {
// remove path which has pruned
return scan.getColumnAccessPaths().stream().filter(p -> checkNames.contains(p.getPath()))
return scan.getColumnAccessPaths().stream()
.filter(p -> checkNames.contains(p.getPath()) || p.isExtended())
.collect(Collectors.toList());
}
@ -1031,6 +1032,7 @@ public class PlanFragmentBuilder {
scan.getSelectPartitionNames(),
context.getConnectContext().getCurrentComputeResource());
scanNode.setColumnAccessPaths(scan.getColumnAccessPaths());
scanNode.computeRangeLocations(computeResource);
scanNode.computeStatistics(optExpression.getStatistics());
currentExecGroup.add(scanNode, true);

View File

@ -23,7 +23,6 @@ import com.starrocks.catalog.Column;
import com.starrocks.catalog.ColumnId;
import com.starrocks.catalog.Database;
import com.starrocks.catalog.InternalCatalog;
import com.starrocks.catalog.OlapTable;
import com.starrocks.catalog.PhysicalPartition;
import com.starrocks.catalog.Table;
import com.starrocks.catalog.Type;
@ -48,6 +47,7 @@ import com.starrocks.sql.ast.StatementBase;
import com.starrocks.sql.common.ErrorType;
import com.starrocks.sql.common.MetaUtils;
import com.starrocks.sql.common.StarRocksPlannerException;
import com.starrocks.sql.optimizer.rule.tree.prunesubfield.SubfieldAccessPathNormalizer;
import com.starrocks.sql.optimizer.statistics.CacheDictManager;
import com.starrocks.sql.optimizer.statistics.IRelaxDictManager;
import com.starrocks.sql.parser.SqlParser;
@ -322,15 +322,46 @@ public class StatisticExecutor {
table.getName());
}
OlapTable olapTable = (OlapTable) table;
long version = table.getPartitions().stream().flatMap(p -> p.getSubPartitions().stream()).map(
PhysicalPartition::getVisibleVersionTime).max(Long::compareTo).orElse(0L);
String columnName = MetaUtils.getColumnNameByColumnId(dbId, tableId, columnId);
List<String> pieces = SubfieldAccessPathNormalizer.parseSimpleJsonPath(columnId.getId());
if (pieces.size() == 1) {
String columnName = MetaUtils.getColumnNameByColumnId(dbId, tableId, columnId);
String catalogName = InternalCatalog.DEFAULT_INTERNAL_CATALOG_NAME;
String sql = "select cast(" + StatsConstants.STATISTIC_DICT_VERSION + " as Int), " +
"cast(" + version + " as bigint), " +
"dict_merge(" + StatisticUtils.quoting(columnName) + ", " +
CacheDictManager.LOW_CARDINALITY_THRESHOLD + ") as _dict_merge_" + columnName +
" from " + StatisticUtils.quoting(catalogName, db.getOriginName(), table.getName()) + " [_META_]";
return executeStatisticDQLWithoutContext(sql);
} else {
return queryDictSyncForJson(dbId, tableId, columnId, version, db, table);
}
}
private static Pair<List<TStatisticData>, Status> queryDictSyncForJson(Long dbId, Long tableId,
ColumnId columnId,
long version,
Database db, Table table) throws TException {
List<String> pieces = SubfieldAccessPathNormalizer.parseSimpleJsonPath(columnId.getId());
if (pieces.isEmpty()) {
throw new RuntimeException("invalid json path: " + columnId);
}
ColumnId realColumnId = ColumnId.create(pieces.get(0));
Column column = MetaUtils.getColumnByColumnId(dbId, tableId, realColumnId);
if (!column.getType().equals(Type.JSON)) {
throw new SemanticException("Column '%s' is not a JSON type", column.getName());
}
String fullPath = String.join(".", pieces);
String path = pieces.stream().skip(1).collect(Collectors.joining("."));
String columnRef = String.format("get_json_string(%s, '%s')", StatisticUtils.quoting(column.getName()), path);
String catalogName = InternalCatalog.DEFAULT_INTERNAL_CATALOG_NAME;
String columnAlias = fullPath.replace(".", "_");
String sql = "select cast(" + StatsConstants.STATISTIC_DICT_VERSION + " as Int), " +
"cast(" + version + " as bigint), " +
"dict_merge(" + StatisticUtils.quoting(columnName) + ", " +
CacheDictManager.LOW_CARDINALITY_THRESHOLD + ") as _dict_merge_" + columnName +
"dict_merge(" + columnRef + ", " +
CacheDictManager.LOW_CARDINALITY_THRESHOLD + ") as _dict_merge_" + columnAlias +
" from " + StatisticUtils.quoting(catalogName, db.getOriginName(), table.getName()) + " [_META_]";
return executeStatisticDQLWithoutContext(sql);

View File

@ -15,6 +15,7 @@
package com.starrocks.transaction;
import com.google.common.collect.Lists;
import com.starrocks.catalog.Column;
import com.starrocks.catalog.ColumnId;
import com.starrocks.catalog.Database;
import com.starrocks.catalog.LocalTablet;
@ -207,6 +208,15 @@ public class OlapTableTxnLogApplier implements TransactionLogApplier {
maxPartitionVersionTime = Math.max(maxPartitionVersionTime, versionTime);
}
// TODO(murphy) don't invalidate all cache columns, only invalidate the columns that are changed
// Invalidate the dict for JSON type
List<Column> jsonColumns = table.getColumns().stream()
.filter(x -> x.getType().isJsonType())
.toList();
for (Column column : jsonColumns) {
IDictManager.getInstance().removeGlobalDictForJson(tableId, column.getColumnId());
}
if (!GlobalStateMgr.isCheckpointThread() && dictCollectedVersions.size() == validDictCacheColumns.size()) {
for (int i = 0; i < validDictCacheColumns.size(); i++) {
ColumnId columnName = validDictCacheColumns.get(i);

View File

@ -14,6 +14,7 @@
package com.starrocks.sql.plan;
import com.starrocks.common.FeConstants;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
@ -30,12 +31,20 @@ public class JsonPathRewriteTest extends PlanTestBase {
starRocksAssert.withTable("create table extend_predicate( c1 int, c2 json ) properties('replication_num'='1')");
starRocksAssert.withTable("create table extend_predicate2( c1 int, c2 json ) properties" +
"('replication_num'='1')");
FeConstants.USE_MOCK_DICT_MANAGER = true;
connectContext.getSessionVariable().setEnableLowCardinalityOptimize(true);
connectContext.getSessionVariable().setUseLowCardinalityOptimizeV2(true);
}
@ParameterizedTest
@MethodSource("jsonPathRewriteTestCases")
public void testExtendPredicateParameterized(String sql, String expectedPlanFragment, String expectedColumnPath)
throws Exception {
connectContext.getSessionVariable().setEnableJSONV2DictOpt(false);
connectContext.getSessionVariable().setUseLowCardinalityOptimizeV2(false);
connectContext.getSessionVariable().setEnableLowCardinalityOptimize(false);
connectContext.getSessionVariable().setQueryTimeoutS(10000);
String plan = getFragmentPlan(sql);
assertContains(plan, expectedPlanFragment);
@ -45,46 +54,50 @@ public class JsonPathRewriteTest extends PlanTestBase {
private static Stream<Arguments> jsonPathRewriteTestCases() {
return of(
// Projection: JSON expression in SELECT list
// [1] Projection: JSON expression in SELECT list
Arguments.of(
"select get_json_string(c2, 'f2') as f2_str from extend_predicate",
" 1:Project\n | <slot 3> : 4: c2.f2\n",
"ExtendedColumnAccessPath: [/c2(varchar)/f2(varchar)]"
),
// [2]
Arguments.of(
"select get_json_string(c2, '$.f2.f3') as f2_str from extend_predicate",
" 1:Project\n | <slot 3> : 4: c2.f2.f3\n",
"ExtendedColumnAccessPath: [/c2(varchar)/f2(varchar)/f3(varchar)]"
),
// [3]
Arguments.of(
"select get_json_string(c2, 'f1.f2.f3') as f2_str from extend_predicate",
" 1:Project\n | <slot 3> : 4: c2.f1.f2.f3\n",
"ExtendedColumnAccessPath: [/c2(varchar)/f1(varchar)/f2(varchar)/f3(varchar)]"
),
// [4]
Arguments.of(
"select get_json_string(c2, 'f1[1]') as f2_str from extend_predicate",
" | <slot 3> : get_json_string(2: c2, 'f1[1]')\n",
""
),
// TODO: support this case
// [5]
Arguments.of(
"select get_json_string(c2, 'f1\".a\"') as f2_str from extend_predicate",
" | <slot 3> : get_json_string(2: c2, 'f1\".a\"')\n",
""
),
// Filter: JSON expression in WHERE clause with different function
// [6] Filter: JSON expression in WHERE clause with different function
Arguments.of(
"select * from extend_predicate where get_json_double(c2, 'f3') > 1.5",
"PREDICATES: 3: c2.f3 > 1.5",
""
),
// Order By: JSON expression in ORDER BY
// [7] Order By: JSON expression in ORDER BY
Arguments.of(
"select * from extend_predicate order by get_json_string(c2, 'f4')",
"order by: <slot 3> 3: get_json_string ASC",
""
),
// Aggregation: JSON expression in GROUP BY
// [8] Aggregation: JSON expression in GROUP BY
Arguments.of(
"select get_json_int(c2, 'f5'), count(*) from extend_predicate group by get_json_int(c2, 'f5')",
" 2:AGGREGATE (update finalize)\n" +
@ -95,7 +108,7 @@ public class JsonPathRewriteTest extends PlanTestBase {
" | <slot 3> : 5: c2.f5\n",
"ExtendedColumnAccessPath: [/c2(bigint(20))/f5(bigint(20))]"
),
// Aggregation: JSON expression in aggregation function
// [9] Aggregation: JSON expression in aggregation function
Arguments.of(
"select sum(get_json_int(c2, 'f6')) from extend_predicate",
" 2:AGGREGATE (update finalize)\n" +
@ -106,23 +119,23 @@ public class JsonPathRewriteTest extends PlanTestBase {
" | <slot 3> : 5: c2.f6\n",
"ExtendedColumnAccessPath: [/c2(bigint(20))/f6(bigint(20))]"
),
// Join: JSON expression in join condition
// [10] Join: JSON expression in join condition
Arguments.of(
"select * from extend_predicate t1 join extend_predicate2 t2 on get_json_int(t1.c2, 'f7') = " +
"get_json_int(t2.c2, 'f7')",
" | <slot 5> : 9: c2.f7\n",
" | <slot 5> : 10: c2.f7\n",
""
),
// Join: JSON expression in projection after join
// [11] Join: JSON expression in projection after join
Arguments.of(
"select get_json_string(t1.c2, 'f8'), get_json_string(t2.c2, 'f8') from extend_predicate t1 " +
"join extend_predicate2 t2 on t1.c1 = t2.c1",
" 3:Project\n" +
" | <slot 3> : 3: c1\n" +
" | <slot 8> : 10: c2.f8\n",
" | <slot 8> : 9: c2.f8\n",
"ExtendedColumnAccessPath: [/c2(varchar)/f8(varchar)]"
),
// Join: self-join with JSON predicate pushdown
// [12] Join: self-join with JSON predicate pushdown
Arguments.of(
"""
select t1.c2
@ -134,7 +147,7 @@ public class JsonPathRewriteTest extends PlanTestBase {
"c2.f2",
"ExtendedColumnAccessPath: [/c2(bigint(20))/f2(bigint(20))]"
),
// Join: self-join with JSON projection pushdown
// [13] Join: self-join with JSON projection pushdown
Arguments.of(
"""
select get_json_int(t1.c2 , 'f2'), get_json_int(t2.c2, 'f2')
@ -145,7 +158,7 @@ public class JsonPathRewriteTest extends PlanTestBase {
"c2.f2",
"ExtendedColumnAccessPath: [/c2(bigint(20))/f2(bigint(20))]"
),
// Join: self-join without JSON expression pushdown
// [14] Join: self-join without JSON expression pushdown
Arguments.of(
"""
select t1.c2
@ -156,40 +169,145 @@ public class JsonPathRewriteTest extends PlanTestBase {
"get_json_int",
"equal join conjunct: [5: get_json_int, BIGINT, true] = [6: get_json_int, BIGINT, true]"
),
// JSON expression in HAVING clause
// [15] JSON expression in HAVING clause
Arguments.of(
"select get_json_int(c2, 'f9'), count(*) from extend_predicate group by get_json_int(c2, " +
"'f9') having get_json_int(c2, 'f9') > 10",
" 1:Project\n" +
" | <slot 3> : 5: c2.f9\n",
"""
select get_json_int(c2, 'f9'), count(*) from extend_predicate
group by get_json_int(c2, 'f9')
having get_json_int(c2, 'f9') > 10
""",
" 1:Project\n | <slot 3> : 5: c2.f9\n",
"ExtendedColumnAccessPath: [/c2(bigint(20))/f9(bigint(20))]"
),
// JSON expression in complex filter (AND/OR)
// [16] JSON expression in complex filter (AND/OR)
Arguments.of(
"select * from extend_predicate where get_json_int(c2, 'f10') = 1 or get_json_string(c2, " +
"'f11') = 'abc'",
"PREDICATES: (3: c2.f10 = 1) OR (4: c2.f11 = 'abc')",
" ExtendedColumnAccessPath: [/c2(bigint(20))/f10(bigint(20)), /c2(varchar)/f11(varchar)]"
),
// JSON expression in nested function
// [17] JSON expression in nested function
Arguments.of(
"select abs(get_json_int(c2, 'f12')) from extend_predicate",
" <slot 3> : abs(4: c2.f12)",
"ExtendedColumnAccessPath: [/c2(bigint(20))/f12(bigint(20))]"
),
// JSON path exceeding depth limit should not be rewritten
// [18] JSON path exceeding depth limit should not be rewritten
Arguments.of(
"select get_json_string(c2, 'f1.f2.f3.f4.f5.f6.f7.f8.f9.f10.f11.f12.f13.f14.f15.f16.f17.f18" +
".f19.f20.f21') from extend_predicate",
" | <slot 3> : get_json_string(2: c2, 'f1.f2.f3.f4.f5.f6.f7.f8.f9.f10.f11.f12.f13.f14.f1.." +
".')",
"<slot 3> : 4: c2.f1.f2.f3.f4.f5.f6.f7.f8.f9.f10.f11.f12.f13.f14.f15.f16.f17.f18.f19.f20.f21\n",
""
),
// JSON path with mixed data types should not be rewritten
// [19] JSON path with mixed data types should not be rewritten
Arguments.of(
"select get_json_string(c2, 'f2'), get_json_int(c2, 'f2') from extend_predicate",
"get_json_string(2: c2, 'f2')",
""
),
// [20] MetaScan
Arguments.of(
"select dict_merge(get_json_string(c2, 'f1'), 255) from extend_predicate [_META_]",
"0:MetaScan\n" +
" Table: extend_predicate\n" +
" <id 6> : dict_merge_c2.f1\n",
" ExtendedColumnAccessPath: [/c2(varchar)/f1(varchar)]\n"
)
);
}
@ParameterizedTest
@MethodSource("globalDictRewriteCases")
public void testGlobalDictRewrite(String sql, String expectedPlanFragment) throws Exception {
connectContext.getSessionVariable().setEnableJSONV2DictOpt(true);
connectContext.getSessionVariable().setUseLowCardinalityOptimizeV2(true);
connectContext.getSessionVariable().setEnableLowCardinalityOptimize(true);
String verbosePlan = getVerboseExplain(sql);
assertContains(verbosePlan, expectedPlanFragment);
}
private static Stream<Arguments> globalDictRewriteCases() {
return of(
// 1. Predicate
Arguments.of(
"select count(*) from extend_predicate2 where get_json_string(c2, 'f1') = 'value'",
"Predicates: DictDecode(7: c2.f1, [<place-holder> = 'value'])"
),
// 2. Predicate with count on get_json_string
Arguments.of(
"select count(get_json_string(c2, 'f1')) from extend_predicate2 where get_json_string(c2, " +
"'f1') = 'value'",
"Predicates: DictDecode(6: c2.f1, [<place-holder> = 'value'])"
),
// 3. Aggregation with group by JSON path
Arguments.of(
"select get_json_string(c2, 'f1') k1, count(*) from extend_predicate2 group by k1",
"7 <-> DictDefine(6: c2.f1, [<place-holder>])"
),
// 4. Aggregation with min on JSON path
Arguments.of(
"select min(get_json_string(c2, 'f1')) from extend_predicate2",
" 7 <-> DictDefine(6: c2.f1, [<place-holder>])"
),
// 5. Multiple JSON path accesses in select and where clause
Arguments.of(
"select get_json_string(c2, 'f1'), get_json_string(c2, 'f2') from extend_predicate2 where " +
"get_json_string(c2, 'f1') = 'value' and get_json_string(c2, 'f2') = 'other'",
"Predicates: DictDecode(7: c2.f1, [<place-holder> = 'value']), DictDecode(8: c2.f2, " +
"[<place-holder> = 'other'])"
),
// 6. Aggregation on JSON path
Arguments.of(
"select get_json_string(c2, 'f3'), sum(c1) from extend_predicate2 group by get_json_string" +
"(c2, 'f3')",
" 7 <-> DictDefine(6: c2.f3, [<place-holder>])"
),
// 7. Nested function using JSON path
Arguments.of(
"select upper(get_json_string(c2, 'f4')) from extend_predicate2",
" 3 <-> DictDecode(5: c2.f4, [upper(<place-holder>)])"
),
// 8. JSON path in having clause
Arguments.of(
"select get_json_string(c2, 'f5'), count(*) from extend_predicate2 group by get_json_string" +
"(c2, 'f5') having get_json_string(c2, 'f5') = 'foo'",
"7 <-> DictDefine(6: c2.f5, [<place-holder>])"
),
// 9. JSON path in order by
Arguments.of(
"select get_json_string(c2, 'f6') from extend_predicate2 order by get_json_string(c2, 'f6')",
" 6: DictDefine(5: c2.f6, [<place-holder>])"
),
// 10. JSON path in both select and where, different fields
Arguments.of(
"select get_json_string(c2, 'f7') from extend_predicate2 where get_json_string(c2, 'f8') = " +
"'bar'",
"3 <-> DictDecode(6: c2.f7, [<place-holder>])"
),
// 11. JSON path in a function argument
Arguments.of(
"select length(get_json_string(c2, 'f9')) from extend_predicate2",
"3 <-> DictDecode(5: c2.f9, [length(<place-holder>)])"
),
// 12. JSON path in a filter with OR
Arguments.of(
"select count(*) from extend_predicate2 where get_json_string(c2, 'f10') = 'a' or " +
"get_json_string(c2, 'f11') = 'b'",
" Predicates: (DictDecode(8: c2.f10, [<place-holder> = 'a'])) OR (DictDecode(9: c2.f11, " +
"[<place-holder> = 'b']))"
),
// 13. JSON path in a subquery
Arguments.of(
"select * from extend_predicate2 where c1 in (select c1 from extend_predicate2 where " +
"get_json_string(c2, 'f12') = 'baz')",
"Predicates: 3: c1 IS NOT NULL, DictDecode(7: c2.f12, [<place-holder> = 'baz'])"
),
// 14. JSON path in a case expression
Arguments.of(
"select case when get_json_string(c2, 'f13') = 'x' then 1 else 0 end from extend_predicate2",
"3 <-> DictDecode(5: c2.f13, [if(<place-holder> = 'x', 1, 0)])"
)
);

View File

@ -125,19 +125,6 @@ class ColumnUsageTest extends PlanTestBase {
"|analyze table t0 predicate columns|v1",
"select v1, count(v2) from (select * from t0 order by v1 limit 100) r group by v1" +
"|analyze table t0 predicate columns|v1",
"select case when get_json_string(v_json, 'a') > 1 " +
" then get_json_string(v_json, 'b') else 'small' end as v1_case, " +
"count(*) from tjson " +
"group by 1" +
"|analyze table tjson predicate columns|v_json",
"select case when get_json_string(vvv, 'a') > 1 " +
" then get_json_string(vvv, 'b') else 'small' end as v1_case, " +
"count(*) from (" +
" select json_object('a', get_json_string(v_json, 'a'), " +
" 'b', get_json_int(v_json, 'b')) as vvv " +
" from tjson) r " +
"group by 1" +
"|analyze table tjson predicate columns|v_json",
// with join
"select * from t0 join t1 on t0.v1 = t1.v4" +

View File

@ -0,0 +1,480 @@
-- name: test_normal_flat_json_dict @sequential
update information_schema.be_configs set value = 'true' where name = 'enable_json_flat';
-- result:
-- !result
set enable_profile = true;
-- result:
-- !result
set enable_async_profile = false;
-- result:
-- !result
set cbo_json_v2_rewrite = true;
-- result:
-- !result
set cbo_json_v2_dict_opt = true;
-- result:
-- !result
create view profile_decode as
select trim(unnest)
from table(unnest(split(get_query_profile(last_query_id()), '\n')))
where unnest like '%DICT_DECODE%'
order by unnest;
-- result:
-- !result
CREATE TABLE js2 (
v1 BIGINT NULL,
c1 JSON NULL
)
DUPLICATE KEY (v1)
DISTRIBUTED BY HASH(`v1`) BUCKETS 1
PROPERTIES ( "replication_num" = "1" );
-- result:
-- !result
insert into js2
select
generate_series,
json_object(
'f1', concat('a', generate_series % 10),
'f2', concat('a', generate_series % 100),
'f3', concat('a', generate_series % 200),
'f4', concat('a', generate_series % 500)
)
from (table(generate_series(1, 1000)));
-- result:
-- !result
select dict_merge(get_json_string(c1, 'f1'), 255) from js2 [_META_];
-- result:
{"2":{"lst":["str",10,"YTA","YTE","YTI","YTM","YTQ","YTU","YTY","YTc","YTg","YTk"]},"3":{"lst":["i32",10,1,2,3,4,5,6,7,8,9,10]}}
-- !result
select dict_merge(get_json_string(c1, 'f2'), 255) from js2 [_META_];
-- result:
{"2":{"lst":["str",100,"YTA","YTE","YTEw","YTEx","YTEy","YTEz","YTE0","YTE1","YTE2","YTE3","YTE4","YTE5","YTI","YTIw","YTIx","YTIy","YTIz","YTI0","YTI1","YTI2","YTI3","YTI4","YTI5","YTM","YTMw","YTMx","YTMy","YTMz","YTM0","YTM1","YTM2","YTM3","YTM4","YTM5","YTQ","YTQw","YTQx","YTQy","YTQz","YTQ0","YTQ1","YTQ2","YTQ3","YTQ4","YTQ5","YTU","YTUw","YTUx","YTUy","YTUz","YTU0","YTU1","YTU2","YTU3","YTU4","YTU5","YTY","YTYw","YTYx","YTYy","YTYz","YTY0","YTY1","YTY2","YTY3","YTY4","YTY5","YTc","YTcw","YTcx","YTcy","YTcz","YTc0","YTc1","YTc2","YTc3","YTc4","YTc5","YTg","YTgw","YTgx","YTgy","YTgz","YTg0","YTg1","YTg2","YTg3","YTg4","YTg5","YTk","YTkw","YTkx","YTky","YTkz","YTk0","YTk1","YTk2","YTk3","YTk4","YTk5"]},"3":{"lst":["i32",100,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100]}}
-- !result
select dict_merge(get_json_string(c1, 'f3'), 255) from js2 [_META_];
-- result:
{"2":{"lst":["str",200,"YTA","YTE","YTEw","YTEwMA","YTEwMQ","YTEwMg","YTEwMw","YTEwNA","YTEwNQ","YTEwNg","YTEwNw","YTEwOA","YTEwOQ","YTEx","YTExMA","YTExMQ","YTExMg","YTExMw","YTExNA","YTExNQ","YTExNg","YTExNw","YTExOA","YTExOQ","YTEy","YTEyMA","YTEyMQ","YTEyMg","YTEyMw","YTEyNA","YTEyNQ","YTEyNg","YTEyNw","YTEyOA","YTEyOQ","YTEz","YTEzMA","YTEzMQ","YTEzMg","YTEzMw","YTEzNA","YTEzNQ","YTEzNg","YTEzNw","YTEzOA","YTEzOQ","YTE0","YTE0MA","YTE0MQ","YTE0Mg","YTE0Mw","YTE0NA","YTE0NQ","YTE0Ng","YTE0Nw","YTE0OA","YTE0OQ","YTE1","YTE1MA","YTE1MQ","YTE1Mg","YTE1Mw","YTE1NA","YTE1NQ","YTE1Ng","YTE1Nw","YTE1OA","YTE1OQ","YTE2","YTE2MA","YTE2MQ","YTE2Mg","YTE2Mw","YTE2NA","YTE2NQ","YTE2Ng","YTE2Nw","YTE2OA","YTE2OQ","YTE3","YTE3MA","YTE3MQ","YTE3Mg","YTE3Mw","YTE3NA","YTE3NQ","YTE3Ng","YTE3Nw","YTE3OA","YTE3OQ","YTE4","YTE4MA","YTE4MQ","YTE4Mg","YTE4Mw","YTE4NA","YTE4NQ","YTE4Ng","YTE4Nw","YTE4OA","YTE4OQ","YTE5","YTE5MA","YTE5MQ","YTE5Mg","YTE5Mw","YTE5NA","YTE5NQ","YTE5Ng","YTE5Nw","YTE5OA","YTE5OQ","YTI","YTIw","YTIx","YTIy","YTIz","YTI0","YTI1","YTI2","YTI3","YTI4","YTI5","YTM","YTMw","YTMx","YTMy","YTMz","YTM0","YTM1","YTM2","YTM3","YTM4","YTM5","YTQ","YTQw","YTQx","YTQy","YTQz","YTQ0","YTQ1","YTQ2","YTQ3","YTQ4","YTQ5","YTU","YTUw","YTUx","YTUy","YTUz","YTU0","YTU1","YTU2","YTU3","YTU4","YTU5","YTY","YTYw","YTYx","YTYy","YTYz","YTY0","YTY1","YTY2","YTY3","YTY4","YTY5","YTc","YTcw","YTcx","YTcy","YTcz","YTc0","YTc1","YTc2","YTc3","YTc4","YTc5","YTg","YTgw","YTgx","YTgy","YTgz","YTg0","YTg1","YTg2","YTg3","YTg4","YTg5","YTk","YTkw","YTkx","YTky","YTkz","YTk0","YTk1","YTk2","YTk3","YTk4","YTk5"]},"3":{"lst":["i32",200,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200]}}
-- !result
select dict_merge(get_json_string(c1, 'f4'), 255) from js2 [_META_];
-- result:
[REGEX]E: \(1064, 'global dict size:500 greater than low_cardinality_threshold:255: BE:.*'\)
-- !result
select dict_merge(get_json_string(c1, 'f5'), 255) from js2 [_META_];
-- result:
[REGEX]E: \(1064, 'no global dict: BE:.*'\)
-- !result
insert into js2
select
generate_series,
json_object(
'f1', generate_series % 10, -- integer
'f2', cast((generate_series % 20) * 1.5 as double), -- float
'f3', if(generate_series % 2 = 0, true, false), -- boolean
'f4', null, -- null
'f5', json_array(generate_series % 5, 'arr', 1.23) -- array
)
from (table(generate_series(1, 100)));
-- result:
-- !result
select dict_merge(get_json_string(c1, 'f1'), 255) from js2 [_META_];
-- result:
[REGEX]E: \(1064, 'no global dict: BE:.*'\)
-- !result
select dict_merge(get_json_string(c1, 'f2'), 255) from js2 [_META_];
-- result:
[REGEX]E: \(1064, 'no global dict: BE:.*'\)
-- !result
select dict_merge(get_json_string(c1, 'f3'), 255) from js2 [_META_];
-- result:
[REGEX]E: \(1064, 'no global dict: BE:.*'\)
-- !result
select dict_merge(get_json_string(c1, 'f4'), 255) from js2 [_META_];
-- result:
[REGEX]E: \(1064, 'global dict size:500 greater than low_cardinality_threshold:255: BE:.*'\)
-- !result
select dict_merge(get_json_string(c1, 'f5'), 255) from js2 [_META_];
-- result:
[REGEX]E: \(1064, 'no global dict: BE:.*'\)
-- !result
truncate table js2;
-- result:
-- !result
insert into js2
select
generate_series,
json_object(
'f1', concat('a', generate_series % 10),
'f2', concat('a', generate_series % 100),
'f3', concat('a', generate_series % 200),
'f4', concat('a', generate_series % 500)
)
from (table(generate_series(1, 1000)));
-- result:
-- !result
select get_json_string(inspect_global_dict('js2', 'c1'), 'dict');
-- result:
{"a0": 1, "a1": 2, "a10": 3, "a100": 4, "a101": 5, "a102": 6, "a103": 7, "a104": 8, "a105": 9, "a106": 10, "a107": 11, "a108": 12, "a109": 13, "a11": 14, "a110": 15, "a111": 16, "a112": 17, "a113": 18, "a114": 19, "a115": 20, "a116": 21, "a117": 22, "a118": 23, "a119": 24, "a12": 25, "a120": 26, "a121": 27, "a122": 28, "a123": 29, "a124": 30, "a125": 31, "a126": 32, "a127": 33, "a128": 34, "a129": 35, "a13": 36, "a130": 37, "a131": 38, "a132": 39, "a133": 40, "a134": 41, "a135": 42, "a136": 43, "a137": 44, "a138": 45, "a139": 46, "a14": 47, "a140": 48, "a141": 49, "a142": 50, "a143": 51, "a144": 52, "a145": 53, "a146": 54, "a147": 55, "a148": 56, "a149": 57, "a15": 58, "a150": 59, "a151": 60, "a152": 61, "a153": 62, "a154": 63, "a155": 64, "a156": 65, "a157": 66, "a158": 67, "a159": 68, "a16": 69, "a160": 70, "a161": 71, "a162": 72, "a163": 73, "a164": 74, "a165": 75, "a166": 76, "a167": 77, "a168": 78, "a169": 79, "a17": 80, "a170": 81, "a171": 82, "a172": 83, "a173": 84, "a174": 85, "a175": 86, "a176": 87, "a177": 88, "a178": 89, "a179": 90, "a18": 91, "a180": 92, "a181": 93, "a182": 94, "a183": 95, "a184": 96, "a185": 97, "a186": 98, "a187": 99, "a188": 100, "a189": 101, "a19": 102, "a190": 103, "a191": 104, "a192": 105, "a193": 106, "a194": 107, "a195": 108, "a196": 109, "a197": 110, "a198": 111, "a199": 112, "a2": 113, "a20": 114, "a21": 115, "a22": 116, "a23": 117, "a24": 118, "a25": 119, "a26": 120, "a27": 121, "a28": 122, "a29": 123, "a3": 124, "a30": 125, "a31": 126, "a32": 127, "a33": 128, "a34": 129, "a35": 130, "a36": 131, "a37": 132, "a38": 133, "a39": 134, "a4": 135, "a40": 136, "a41": 137, "a42": 138, "a43": 139, "a44": 140, "a45": 141, "a46": 142, "a47": 143, "a48": 144, "a49": 145, "a5": 146, "a50": 147, "a51": 148, "a52": 149, "a53": 150, "a54": 151, "a55": 152, "a56": 153, "a57": 154, "a58": 155, "a59": 156, "a6": 157, "a60": 158, "a61": 159, "a62": 160, "a63": 161, "a64": 162, "a65": 163, "a66": 164, "a67": 165, "a68": 166, "a69": 167, "a7": 168, "a70": 169, "a71": 170, "a72": 171, "a73": 172, "a74": 173, "a75": 174, "a76": 175, "a77": 176, "a78": 177, "a79": 178, "a8": 179, "a80": 180, "a81": 181, "a82": 182, "a83": 183, "a84": 184, "a85": 185, "a86": 186, "a87": 187, "a88": 188, "a89": 189, "a9": 190, "a90": 191, "a91": 192, "a92": 193, "a93": 194, "a94": 195, "a95": 196, "a96": 197, "a97": 198, "a98": 199, "a99": 200}
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
-- result:
{"a0": 1, "a1": 2, "a2": 3, "a3": 4, "a4": 5, "a5": 6, "a6": 7, "a7": 8, "a8": 9, "a9": 10}
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
-- result:
{"a0": 1, "a1": 2, "a10": 3, "a11": 4, "a12": 5, "a13": 6, "a14": 7, "a15": 8, "a16": 9, "a17": 10, "a18": 11, "a19": 12, "a2": 13, "a20": 14, "a21": 15, "a22": 16, "a23": 17, "a24": 18, "a25": 19, "a26": 20, "a27": 21, "a28": 22, "a29": 23, "a3": 24, "a30": 25, "a31": 26, "a32": 27, "a33": 28, "a34": 29, "a35": 30, "a36": 31, "a37": 32, "a38": 33, "a39": 34, "a4": 35, "a40": 36, "a41": 37, "a42": 38, "a43": 39, "a44": 40, "a45": 41, "a46": 42, "a47": 43, "a48": 44, "a49": 45, "a5": 46, "a50": 47, "a51": 48, "a52": 49, "a53": 50, "a54": 51, "a55": 52, "a56": 53, "a57": 54, "a58": 55, "a59": 56, "a6": 57, "a60": 58, "a61": 59, "a62": 60, "a63": 61, "a64": 62, "a65": 63, "a66": 64, "a67": 65, "a68": 66, "a69": 67, "a7": 68, "a70": 69, "a71": 70, "a72": 71, "a73": 72, "a74": 73, "a75": 74, "a76": 75, "a77": 76, "a78": 77, "a79": 78, "a8": 79, "a80": 80, "a81": 81, "a82": 82, "a83": 83, "a84": 84, "a85": 85, "a86": 86, "a87": 87, "a88": 88, "a89": 89, "a9": 90, "a90": 91, "a91": 92, "a92": 93, "a93": 94, "a94": 95, "a95": 96, "a96": 97, "a97": 98, "a98": 99, "a99": 100}
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
-- result:
{"a0": 1, "a1": 2, "a10": 3, "a100": 4, "a101": 5, "a102": 6, "a103": 7, "a104": 8, "a105": 9, "a106": 10, "a107": 11, "a108": 12, "a109": 13, "a11": 14, "a110": 15, "a111": 16, "a112": 17, "a113": 18, "a114": 19, "a115": 20, "a116": 21, "a117": 22, "a118": 23, "a119": 24, "a12": 25, "a120": 26, "a121": 27, "a122": 28, "a123": 29, "a124": 30, "a125": 31, "a126": 32, "a127": 33, "a128": 34, "a129": 35, "a13": 36, "a130": 37, "a131": 38, "a132": 39, "a133": 40, "a134": 41, "a135": 42, "a136": 43, "a137": 44, "a138": 45, "a139": 46, "a14": 47, "a140": 48, "a141": 49, "a142": 50, "a143": 51, "a144": 52, "a145": 53, "a146": 54, "a147": 55, "a148": 56, "a149": 57, "a15": 58, "a150": 59, "a151": 60, "a152": 61, "a153": 62, "a154": 63, "a155": 64, "a156": 65, "a157": 66, "a158": 67, "a159": 68, "a16": 69, "a160": 70, "a161": 71, "a162": 72, "a163": 73, "a164": 74, "a165": 75, "a166": 76, "a167": 77, "a168": 78, "a169": 79, "a17": 80, "a170": 81, "a171": 82, "a172": 83, "a173": 84, "a174": 85, "a175": 86, "a176": 87, "a177": 88, "a178": 89, "a179": 90, "a18": 91, "a180": 92, "a181": 93, "a182": 94, "a183": 95, "a184": 96, "a185": 97, "a186": 98, "a187": 99, "a188": 100, "a189": 101, "a19": 102, "a190": 103, "a191": 104, "a192": 105, "a193": 106, "a194": 107, "a195": 108, "a196": 109, "a197": 110, "a198": 111, "a199": 112, "a2": 113, "a20": 114, "a21": 115, "a22": 116, "a23": 117, "a24": 118, "a25": 119, "a26": 120, "a27": 121, "a28": 122, "a29": 123, "a3": 124, "a30": 125, "a31": 126, "a32": 127, "a33": 128, "a34": 129, "a35": 130, "a36": 131, "a37": 132, "a38": 133, "a39": 134, "a4": 135, "a40": 136, "a41": 137, "a42": 138, "a43": 139, "a44": 140, "a45": 141, "a46": 142, "a47": 143, "a48": 144, "a49": 145, "a5": 146, "a50": 147, "a51": 148, "a52": 149, "a53": 150, "a54": 151, "a55": 152, "a56": 153, "a57": 154, "a58": 155, "a59": 156, "a6": 157, "a60": 158, "a61": 159, "a62": 160, "a63": 161, "a64": 162, "a65": 163, "a66": 164, "a67": 165, "a68": 166, "a69": 167, "a7": 168, "a70": 169, "a71": 170, "a72": 171, "a73": 172, "a74": 173, "a75": 174, "a76": 175, "a77": 176, "a78": 177, "a79": 178, "a8": 179, "a80": 180, "a81": 181, "a82": 182, "a83": 183, "a84": 184, "a85": 185, "a86": 186, "a87": 187, "a88": 188, "a89": 189, "a9": 190, "a90": 191, "a91": 192, "a92": 193, "a93": 194, "a94": 195, "a95": 196, "a96": 197, "a97": 198, "a98": 199, "a99": 200}
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
-- result:
None
-- !result
select min(get_json_string(c1, 'f1')) from js2;
-- result:
a0
-- !result
select * from profile_decode;
-- result:
DICT_DECODE (plan_node_id=3):
-- !result
select min(get_json_string(c1, 'f2')) from js2;
-- result:
a0
-- !result
select * from profile_decode;
-- result:
DICT_DECODE (plan_node_id=3):
-- !result
select min(get_json_string(c1, 'f3')) from js2;
-- result:
a0
-- !result
select * from profile_decode;
-- result:
DICT_DECODE (plan_node_id=3):
-- !result
select min(get_json_string(c1, 'f4')) from js2;
-- result:
a0
-- !result
select * from profile_decode;
-- result:
-- !result
select min(get_json_string(c1, 'f5')) from js2;
-- result:
None
-- !result
select * from profile_decode;
-- result:
-- !result
select get_json_string(c1, 'f1') k1, count(*) from js2 group by k1 order by k1 limit 3;
-- result:
a0 100
a1 100
a2 100
-- !result
select * from profile_decode;
-- result:
DICT_DECODE (plan_node_id=5):
-- !result
select get_json_string(c1, 'f2') k1, count(*) from js2 group by k1 order by k1 limit 3;
-- result:
a0 10
a1 10
a10 10
-- !result
select * from profile_decode;
-- result:
DICT_DECODE (plan_node_id=5):
-- !result
select get_json_string(c1, 'f3') k1, count(*) from js2 group by k1 order by k1 limit 3;
-- result:
a0 5
a1 5
a10 5
-- !result
select * from profile_decode;
-- result:
DICT_DECODE (plan_node_id=5):
-- !result
select get_json_string(c1, 'f4') k1, count(*) from js2 group by k1 order by k1 limit 3;
-- result:
a0 2
a1 2
a10 2
-- !result
select * from profile_decode;
-- result:
-- !result
select get_json_string(c1, 'f5') k1, count(*) from js2 group by k1 order by k1 limit 3;
-- result:
None 1000
-- !result
select * from profile_decode;
-- result:
-- !result
insert into js2
select
generate_series,
json_object(
'f1', concat('a', generate_series % 10),
'f2', concat('a', generate_series % 300),
'f3', concat('a', generate_series % 500),
'f4', concat('a', generate_series % 500)
)
from (table(generate_series(1, 1000)));
-- result:
-- !result
select get_json_string((inspect_global_dict('js2', 'c1.f1')), 'dict');
-- result:
{"a0": 1, "a1": 2, "a2": 3, "a3": 4, "a4": 5, "a5": 6, "a6": 7, "a7": 8, "a8": 9, "a9": 10}
-- !result
select get_json_string((inspect_global_dict('js2', 'c1.f2')), 'dict');
-- result:
None
-- !result
select get_json_string((inspect_global_dict('js2', 'c1.f3')), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
-- result:
None
-- !result
insert into js2
select
generate_series,
json_object(
'f1', concat('a', generate_series % 1000),
'f2', concat('a', generate_series % 1000),
'f3', concat('a', generate_series % 1000),
'f4', concat('a', generate_series % 1000)
)
from (table(generate_series(1, 1000)));
-- result:
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
-- result:
None
-- !result
truncate table js2;
-- result:
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
-- result:
None
-- !result
insert into js2
select
generate_series,
json_object(
'f1', concat('a', generate_series % 10),
'f2', concat('a', generate_series % 20),
'f3', concat('a', generate_series % 30),
'f4', concat('a', generate_series % 40),
'f5', concat('a', generate_series % 50)
)
from (table(generate_series(1, 1000)));
-- result:
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
-- result:
{"a0": 1, "a1": 2, "a2": 3, "a3": 4, "a4": 5, "a5": 6, "a6": 7, "a7": 8, "a8": 9, "a9": 10}
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
-- result:
{"a0": 1, "a1": 2, "a10": 3, "a11": 4, "a12": 5, "a13": 6, "a14": 7, "a15": 8, "a16": 9, "a17": 10, "a18": 11, "a19": 12, "a2": 13, "a3": 14, "a4": 15, "a5": 16, "a6": 17, "a7": 18, "a8": 19, "a9": 20}
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
-- result:
{"a0": 1, "a1": 2, "a10": 3, "a11": 4, "a12": 5, "a13": 6, "a14": 7, "a15": 8, "a16": 9, "a17": 10, "a18": 11, "a19": 12, "a2": 13, "a20": 14, "a21": 15, "a22": 16, "a23": 17, "a24": 18, "a25": 19, "a26": 20, "a27": 21, "a28": 22, "a29": 23, "a3": 24, "a4": 25, "a5": 26, "a6": 27, "a7": 28, "a8": 29, "a9": 30}
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
-- result:
{"a0": 1, "a1": 2, "a10": 3, "a11": 4, "a12": 5, "a13": 6, "a14": 7, "a15": 8, "a16": 9, "a17": 10, "a18": 11, "a19": 12, "a2": 13, "a20": 14, "a21": 15, "a22": 16, "a23": 17, "a24": 18, "a25": 19, "a26": 20, "a27": 21, "a28": 22, "a29": 23, "a3": 24, "a30": 25, "a31": 26, "a32": 27, "a33": 28, "a34": 29, "a35": 30, "a36": 31, "a37": 32, "a38": 33, "a39": 34, "a4": 35, "a5": 36, "a6": 37, "a7": 38, "a8": 39, "a9": 40}
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
-- result:
{"a0": 1, "a1": 2, "a10": 3, "a11": 4, "a12": 5, "a13": 6, "a14": 7, "a15": 8, "a16": 9, "a17": 10, "a18": 11, "a19": 12, "a2": 13, "a20": 14, "a21": 15, "a22": 16, "a23": 17, "a24": 18, "a25": 19, "a26": 20, "a27": 21, "a28": 22, "a29": 23, "a3": 24, "a30": 25, "a31": 26, "a32": 27, "a33": 28, "a34": 29, "a35": 30, "a36": 31, "a37": 32, "a38": 33, "a39": 34, "a4": 35, "a40": 36, "a41": 37, "a42": 38, "a43": 39, "a44": 40, "a45": 41, "a46": 42, "a47": 43, "a48": 44, "a49": 45, "a5": 46, "a6": 47, "a7": 48, "a8": 49, "a9": 50}
-- !result
insert into js2
select
generate_series,
json_object(
'f1', generate_series % 10, -- integer
'f2', cast((generate_series % 20) * 1.5 as double), -- float
'f3', if(generate_series % 2 = 0, true, false), -- boolean
'f4', null, -- null
'f5', json_array(generate_series % 5, 'arr', 1.23) -- array
)
from (table(generate_series(1, 1000)));
-- result:
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
-- result:
None
-- !result
CREATE TABLE js3 (
v1 BIGINT NULL,
c1 JSON NULL
)
DUPLICATE KEY (v1)
DISTRIBUTED BY HASH(`v1`) BUCKETS 1
PROPERTIES ( "replication_num" = "1" );
-- result:
-- !result
insert into js3
select
generate_series,
json_object(
'str_field', concat('str_', generate_series % 10),
'int_field', generate_series % 100,
'float_field', (generate_series % 100) * 1.23,
'bool_field', if(generate_series % 2 = 0, true, false),
'null_field', null,
'array_field', json_array(
generate_series % 5,
concat('arr_', generate_series % 3),
(generate_series % 10) * 0.5
),
'object_field', json_object(
'nested_str', concat('nested_', generate_series % 7),
'nested_int', generate_series % 7,
'nested_bool', if(generate_series % 3 = 0, true, false)
),
'deep_nested', json_object(
'level1', json_object(
'level2', json_array(
json_object(
'leaf_str', concat('leaf_', generate_series % 2),
'leaf_num', generate_series % 2
),
generate_series % 2
)
)
)
)
from (table(generate_series(1, 1000)));
-- result:
-- !result
select dict_merge(get_json_string(c1, 'str_field'), 255) from js3 [_META_];
-- result:
{"2":{"lst":["str",10,"c3RyXzA","c3RyXzE","c3RyXzI","c3RyXzM","c3RyXzQ","c3RyXzU","c3RyXzY","c3RyXzc","c3RyXzg","c3RyXzk"]},"3":{"lst":["i32",10,1,2,3,4,5,6,7,8,9,10]}}
-- !result
select get_json_string(inspect_global_dict('js3', 'c1.str_field'), 'dict');
-- result:
{"str_0": 1, "str_1": 2, "str_2": 3, "str_3": 4, "str_4": 5, "str_5": 6, "str_6": 7, "str_7": 8, "str_8": 9, "str_9": 10}
-- !result
select get_json_string(inspect_global_dict('js3', 'c1.int_field'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js3', 'c1.float_field'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js3', 'c1.bool_field'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js3', 'c1.null_field'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js3', 'c1.array_field'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js3', 'c1.object_field'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js3', 'c1.object_field.nested_str'), 'dict');
-- result:
{"nested_0": 1, "nested_1": 2, "nested_2": 3, "nested_3": 4, "nested_4": 5, "nested_5": 6, "nested_6": 7}
-- !result
select get_json_string(inspect_global_dict('js3', 'c1.object_field.nested_int'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js3', 'c1.object_field.nested_bool'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested.level1'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested.level1.level2'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested.level1.level2.leaf_str'), 'dict');
-- result:
None
-- !result
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested.level1.level2.leaf_num'), 'dict');
-- result:
None
-- !result

View File

@ -168,8 +168,8 @@ select get_json_int(j1, '$.f1') from js2 where get_json_int(j1, '$.f1') = 1;
select * from profile_access_path;
-- result:
- AccessPathExtract: 1
- AccessPathHits: 3
- PushdownAccessPaths: 1
- AccessPathHits: 1
- PushdownAccessPaths: 0
-- !result
select get_json_int(j1, '$.f2') from js2 where get_json_int(j1, '$.f1') = 1;
-- result:
@ -178,8 +178,8 @@ None
select * from profile_access_path;
-- result:
- AccessPathExtract: 2
- AccessPathHits: 4
- PushdownAccessPaths: 2
- AccessPathHits: 2
- PushdownAccessPaths: 0
-- !result
select get_json_int(j1, '$.f1') from js2 where get_json_int(j1, '$.f2') = 1;
-- result:
@ -188,8 +188,8 @@ None
select * from profile_access_path;
-- result:
- AccessPathExtract: 2
- AccessPathHits: 4
- PushdownAccessPaths: 2
- AccessPathHits: 2
- PushdownAccessPaths: 0
-- !result
select get_json_int(j1, '$.f2'), j1 from js2 where get_json_int(j1, '$.f1') = 1;
-- result:
@ -217,8 +217,8 @@ None None
select * from profile_access_path;
-- result:
- AccessPathExtract: 2
- AccessPathHits: 4
- PushdownAccessPaths: 2
- AccessPathHits: 2
- PushdownAccessPaths: 0
-- !result
select count(get_json_int(j1, '$.f2')) from js2 ;
-- result:
@ -227,8 +227,8 @@ select count(get_json_int(j1, '$.f2')) from js2 ;
select * from profile_access_path;
-- result:
- AccessPathExtract: 1
- AccessPathHits: 3
- PushdownAccessPaths: 1
- AccessPathHits: 1
- PushdownAccessPaths: 0
-- !result
select count(get_json_int(j1, '$.f3')) from js2 ;
-- result:
@ -237,8 +237,8 @@ select count(get_json_int(j1, '$.f3')) from js2 ;
select * from profile_access_path;
-- result:
- AccessPathExtract: 2
- AccessPathHits: 4
- PushdownAccessPaths: 1
- AccessPathHits: 2
- PushdownAccessPaths: 0
-- !result
select * from js2 where get_json_int(j1, '$.f1') = -1;
-- result:
@ -508,8 +508,8 @@ select count(*) from js3 where get_json_double(j1, 'f_bool') = 1.0;
select * from profile_access_path;
-- result:
- AccessPathExtract: 1
- AccessPathHits: 3
- PushdownAccessPaths: 1
- AccessPathHits: 1
- PushdownAccessPaths: 0
-- !result
select count(*) from js3 where get_json_int(j1, 'f_int') < 500;
-- result:
@ -530,8 +530,8 @@ select count(*) from js3 where get_json_double(j1, 'f_int') < 500;
select * from profile_access_path;
-- result:
- AccessPathExtract: 1
- AccessPathHits: 3
- PushdownAccessPaths: 1
- AccessPathHits: 1
- PushdownAccessPaths: 0
-- !result
select count(*) from js3 where get_json_double(j1, 'f_double') < 500.0;
-- result:
@ -552,8 +552,8 @@ select count(*) from js3 where get_json_string(j1, 'f_double') < '500';
select * from profile_access_path;
-- result:
- AccessPathExtract: 1
- AccessPathHits: 3
- PushdownAccessPaths: 1
- AccessPathHits: 1
- PushdownAccessPaths: 0
-- !result
select count(*) from js3 where get_json_int(j1, 'f_none') IS NULL;
-- result:
@ -582,6 +582,6 @@ select count(*) from js3 where get_json_double(j1, 'f_none') < 500;
select * from profile_access_path;
-- result:
- AccessPathExtract: 2
- AccessPathHits: 4
- PushdownAccessPaths: 1
- AccessPathHits: 2
- PushdownAccessPaths: 0
-- !result

View File

@ -0,0 +1,254 @@
-- name: test_normal_flat_json_dict @sequential
update information_schema.be_configs set value = 'true' where name = 'enable_json_flat';
set enable_profile = true;
set enable_async_profile = false;
set cbo_json_v2_rewrite = true;
set cbo_json_v2_dict_opt = true;
create view profile_decode as
select trim(unnest)
from table(unnest(split(get_query_profile(last_query_id()), '\n')))
where unnest like '%DICT_DECODE%'
order by unnest;
CREATE TABLE js2 (
v1 BIGINT NULL,
c1 JSON NULL
)
DUPLICATE KEY (v1)
DISTRIBUTED BY HASH(`v1`) BUCKETS 1
PROPERTIES ( "replication_num" = "1" );
insert into js2
select
generate_series,
json_object(
'f1', concat('a', generate_series % 10),
'f2', concat('a', generate_series % 100),
'f3', concat('a', generate_series % 200),
'f4', concat('a', generate_series % 500)
)
from (table(generate_series(1, 1000)));
-- Test dict_merge function
select dict_merge(get_json_string(c1, 'f1'), 255) from js2 [_META_];
select dict_merge(get_json_string(c1, 'f2'), 255) from js2 [_META_];
select dict_merge(get_json_string(c1, 'f3'), 255) from js2 [_META_];
select dict_merge(get_json_string(c1, 'f4'), 255) from js2 [_META_];
select dict_merge(get_json_string(c1, 'f5'), 255) from js2 [_META_];
insert into js2
select
generate_series,
json_object(
'f1', generate_series % 10, -- integer
'f2', cast((generate_series % 20) * 1.5 as double), -- float
'f3', if(generate_series % 2 = 0, true, false), -- boolean
'f4', null, -- null
'f5', json_array(generate_series % 5, 'arr', 1.23) -- array
)
from (table(generate_series(1, 100)));
select dict_merge(get_json_string(c1, 'f1'), 255) from js2 [_META_];
select dict_merge(get_json_string(c1, 'f2'), 255) from js2 [_META_];
select dict_merge(get_json_string(c1, 'f3'), 255) from js2 [_META_];
select dict_merge(get_json_string(c1, 'f4'), 255) from js2 [_META_];
select dict_merge(get_json_string(c1, 'f5'), 255) from js2 [_META_];
-- Test the dict
truncate table js2;
insert into js2
select
generate_series,
json_object(
'f1', concat('a', generate_series % 10),
'f2', concat('a', generate_series % 100),
'f3', concat('a', generate_series % 200),
'f4', concat('a', generate_series % 500)
)
from (table(generate_series(1, 1000)));
-- inspect the dict itself
select get_json_string(inspect_global_dict('js2', 'c1'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
-- test some simple queries
select min(get_json_string(c1, 'f1')) from js2;
select * from profile_decode;
select min(get_json_string(c1, 'f2')) from js2;
select * from profile_decode;
select min(get_json_string(c1, 'f3')) from js2;
select * from profile_decode;
select min(get_json_string(c1, 'f4')) from js2;
select * from profile_decode;
select min(get_json_string(c1, 'f5')) from js2;
select * from profile_decode;
select get_json_string(c1, 'f1') k1, count(*) from js2 group by k1 order by k1 limit 3;
select * from profile_decode;
select get_json_string(c1, 'f2') k1, count(*) from js2 group by k1 order by k1 limit 3;
select * from profile_decode;
select get_json_string(c1, 'f3') k1, count(*) from js2 group by k1 order by k1 limit 3;
select * from profile_decode;
select get_json_string(c1, 'f4') k1, count(*) from js2 group by k1 order by k1 limit 3;
select * from profile_decode;
select get_json_string(c1, 'f5') k1, count(*) from js2 group by k1 order by k1 limit 3;
select * from profile_decode;
-- dict update, invalidate some of dicts
-- f1: valid, cardinality: 10
-- f2: invalid, cardinality: 300
-- f3: invalid, cardinality: 500
-- f4: invalid, cardinality: 500
insert into js2
select
generate_series,
json_object(
'f1', concat('a', generate_series % 10),
'f2', concat('a', generate_series % 300),
'f3', concat('a', generate_series % 500),
'f4', concat('a', generate_series % 500)
)
from (table(generate_series(1, 1000)));
select get_json_string((inspect_global_dict('js2', 'c1.f1')), 'dict');
select get_json_string((inspect_global_dict('js2', 'c1.f2')), 'dict');
select get_json_string((inspect_global_dict('js2', 'c1.f3')), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
-- dict update: invalidate all dicts
insert into js2
select
generate_series,
json_object(
'f1', concat('a', generate_series % 1000),
'f2', concat('a', generate_series % 1000),
'f3', concat('a', generate_series % 1000),
'f4', concat('a', generate_series % 1000)
)
from (table(generate_series(1, 1000)));
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
-- dict update: truncate table
truncate table js2;
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
-- dict update: activate all dicts
insert into js2
select
generate_series,
json_object(
'f1', concat('a', generate_series % 10),
'f2', concat('a', generate_series % 20),
'f3', concat('a', generate_series % 30),
'f4', concat('a', generate_series % 40),
'f5', concat('a', generate_series % 50)
)
from (table(generate_series(1, 1000)));
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
-- Test mixed data types with string
insert into js2
select
generate_series,
json_object(
'f1', generate_series % 10, -- integer
'f2', cast((generate_series % 20) * 1.5 as double), -- float
'f3', if(generate_series % 2 = 0, true, false), -- boolean
'f4', null, -- null
'f5', json_array(generate_series % 5, 'arr', 1.23) -- array
)
from (table(generate_series(1, 1000)));
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
-- Test with various JSON data types (not just string):
-- int, float, bool, null, array, object, and nested structures
CREATE TABLE js3 (
v1 BIGINT NULL,
c1 JSON NULL
)
DUPLICATE KEY (v1)
DISTRIBUTED BY HASH(`v1`) BUCKETS 1
PROPERTIES ( "replication_num" = "1" );
insert into js3
select
generate_series,
json_object(
'str_field', concat('str_', generate_series % 10),
'int_field', generate_series % 100,
'float_field', (generate_series % 100) * 1.23,
'bool_field', if(generate_series % 2 = 0, true, false),
'null_field', null,
'array_field', json_array(
generate_series % 5,
concat('arr_', generate_series % 3),
(generate_series % 10) * 0.5
),
'object_field', json_object(
'nested_str', concat('nested_', generate_series % 7),
'nested_int', generate_series % 7,
'nested_bool', if(generate_series % 3 = 0, true, false)
),
'deep_nested', json_object(
'level1', json_object(
'level2', json_array(
json_object(
'leaf_str', concat('leaf_', generate_series % 2),
'leaf_num', generate_series % 2
),
generate_series % 2
)
)
)
)
from (table(generate_series(1, 1000)));
select dict_merge(get_json_string(c1, 'str_field'), 255) from js3 [_META_];
select get_json_string(inspect_global_dict('js3', 'c1.str_field'), 'dict');
select get_json_string(inspect_global_dict('js3', 'c1.int_field'), 'dict');
select get_json_string(inspect_global_dict('js3', 'c1.float_field'), 'dict');
select get_json_string(inspect_global_dict('js3', 'c1.bool_field'), 'dict');
select get_json_string(inspect_global_dict('js3', 'c1.null_field'), 'dict');
select get_json_string(inspect_global_dict('js3', 'c1.array_field'), 'dict');
select get_json_string(inspect_global_dict('js3', 'c1.object_field'), 'dict');
select get_json_string(inspect_global_dict('js3', 'c1.object_field.nested_str'), 'dict');
select get_json_string(inspect_global_dict('js3', 'c1.object_field.nested_int'), 'dict');
select get_json_string(inspect_global_dict('js3', 'c1.object_field.nested_bool'), 'dict');
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested'), 'dict');
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested.level1'), 'dict');
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested.level1.level2'), 'dict');
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested.level1.level2.leaf_str'), 'dict');
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested.level1.level2.leaf_num'), 'dict');