Signed-off-by: Murphy <mofei@starrocks.com> Co-authored-by: Murphy <96611012+murphyatwork@users.noreply.github.com>
This commit is contained in:
parent
04bb4e3f1b
commit
89bc4ff068
|
|
@ -92,9 +92,8 @@ public class ColumnAccessPath {
|
|||
}
|
||||
|
||||
public static ColumnAccessPath createFromLinearPath(String linearPath, Type valueType) {
|
||||
final int jsonFlattenDepth = 20;
|
||||
List<String> pieces = Lists.newArrayList();
|
||||
if (SubfieldAccessPathNormalizer.parseSimpleJsonPath(jsonFlattenDepth, linearPath, pieces)) {
|
||||
List<String> pieces = SubfieldAccessPathNormalizer.parseSimpleJsonPath(linearPath);
|
||||
if (pieces.isEmpty()) {
|
||||
throw new IllegalArgumentException("illegal json path: " + linearPath);
|
||||
}
|
||||
return createLinearPath(pieces, valueType);
|
||||
|
|
|
|||
|
|
@ -446,6 +446,7 @@ public class OlapTable extends Table {
|
|||
olapTable.curBinlogConfig = new BinlogConfig(this.curBinlogConfig);
|
||||
}
|
||||
olapTable.dbName = this.dbName;
|
||||
olapTable.maxColUniqueId = new AtomicInteger(this.maxColUniqueId.get());
|
||||
}
|
||||
|
||||
public void addDoubleWritePartition(long sourcePartitionId, long tempPartitionId) {
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ import com.starrocks.thrift.TScanRange;
|
|||
import com.starrocks.thrift.TScanRangeLocation;
|
||||
import com.starrocks.thrift.TScanRangeLocations;
|
||||
import com.starrocks.warehouse.cngroup.ComputeResource;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
|
|
@ -176,6 +177,10 @@ public class MetaScanNode extends ScanNode {
|
|||
columnsDesc.add(tColumn);
|
||||
}
|
||||
msg.meta_scan_node.setColumns(columnsDesc);
|
||||
|
||||
if (CollectionUtils.isNotEmpty(columnAccessPaths)) {
|
||||
msg.meta_scan_node.setColumn_access_paths(columnAccessPathToThrift());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -193,6 +198,10 @@ public class MetaScanNode extends ScanNode {
|
|||
if (!selectPartitionNames.isEmpty()) {
|
||||
output.append(prefix).append("Partitions: ").append(selectPartitionNames).append("\n");
|
||||
}
|
||||
|
||||
if (detailLevel == TExplainLevel.VERBOSE) {
|
||||
output.append(explainColumnAccessPath(prefix));
|
||||
}
|
||||
return output.toString();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ import com.starrocks.sql.optimizer.ScanOptimizeOption;
|
|||
import com.starrocks.thrift.TColumnAccessPath;
|
||||
import com.starrocks.thrift.TScanRangeLocations;
|
||||
import com.starrocks.warehouse.cngroup.ComputeResource;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.jetbrains.annotations.TestOnly;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -167,6 +168,9 @@ public abstract class ScanNode extends PlanNode {
|
|||
|
||||
protected String explainColumnAccessPath(String prefix) {
|
||||
String result = "";
|
||||
if (CollectionUtils.isEmpty(columnAccessPaths)) {
|
||||
return result;
|
||||
}
|
||||
if (columnAccessPaths.stream().anyMatch(c -> !c.isFromPredicate() && !c.isExtended())) {
|
||||
result += prefix + "ColumnAccessPath: [" + columnAccessPaths.stream()
|
||||
.filter(c -> !c.isFromPredicate() && !c.isExtended())
|
||||
|
|
|
|||
|
|
@ -389,6 +389,7 @@ public class SessionVariable implements Serializable, Writable, Cloneable {
|
|||
"cbo_enable_single_node_prefer_two_stage_aggregate";
|
||||
|
||||
public static final String CBO_JSON_V2_REWRITE = "cbo_json_v2_rewrite";
|
||||
public static final String CBO_JSON_V2_DICT_OPT = "cbo_json_v2_dict_opt";
|
||||
|
||||
public static final String CBO_PUSH_DOWN_DISTINCT_BELOW_WINDOW = "cbo_push_down_distinct_below_window";
|
||||
public static final String CBO_PUSH_DOWN_AGGREGATE = "cbo_push_down_aggregate";
|
||||
|
|
@ -1247,6 +1248,9 @@ public class SessionVariable implements Serializable, Writable, Cloneable {
|
|||
@VarAttr(name = CBO_JSON_V2_REWRITE)
|
||||
private boolean cboJSONV2Rewrite = true;
|
||||
|
||||
@VarAttr(name = CBO_JSON_V2_DICT_OPT)
|
||||
private boolean cboJSONV2DictOpt = true;
|
||||
|
||||
|
||||
/*
|
||||
* the parallel exec instance num for one Fragment in one BE
|
||||
|
|
@ -5189,6 +5193,14 @@ public class SessionVariable implements Serializable, Writable, Cloneable {
|
|||
this.enableDropTableCheckMvDependency = enableDropTableCheckMvDependency;
|
||||
}
|
||||
|
||||
public boolean isEnableJSONV2DictOpt() {
|
||||
return cboJSONV2DictOpt;
|
||||
}
|
||||
|
||||
public void setEnableJSONV2DictOpt(boolean value) {
|
||||
this.cboJSONV2DictOpt = value;
|
||||
}
|
||||
|
||||
// Serialize to thrift object
|
||||
// used for rest api
|
||||
public TQueryOptions toThrift() {
|
||||
|
|
|
|||
|
|
@ -335,6 +335,7 @@ public class InsertPlanner {
|
|||
dict.ifPresent(
|
||||
columnDict -> globalDicts.add(new Pair<>(slotDescriptor.getId().asInt(), columnDict)));
|
||||
}
|
||||
// TODO: attach the dict for JSON
|
||||
}
|
||||
tupleDesc.computeMemLayout();
|
||||
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ import com.starrocks.server.GlobalStateMgr;
|
|||
import com.starrocks.sql.analyzer.SemanticException;
|
||||
import com.starrocks.sql.ast.CreateMaterializedViewStmt;
|
||||
import com.starrocks.sql.ast.StatementBase;
|
||||
import com.starrocks.sql.optimizer.base.ColumnIdentifier;
|
||||
import com.starrocks.sql.optimizer.rule.mv.MVUtils;
|
||||
import com.starrocks.sql.parser.SqlParser;
|
||||
import com.starrocks.thrift.TUniqueId;
|
||||
|
|
@ -198,6 +199,19 @@ public class MetaUtils {
|
|||
}
|
||||
}
|
||||
|
||||
public static long lookupDbIdByTableId(long tableId) {
|
||||
long res = -1;
|
||||
for (Long id : GlobalStateMgr.getCurrentState().getLocalMetastore().getDbIds()) {
|
||||
Database db = GlobalStateMgr.getCurrentState().getLocalMetastore().getDb(id);
|
||||
if (db != null &&
|
||||
GlobalStateMgr.getCurrentState().getLocalMetastore().getTable(db.getId(), tableId) != null) {
|
||||
res = db.getId();
|
||||
break;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
public static List<Column> getColumnsByColumnIds(Table table, List<ColumnId> ids) {
|
||||
return getColumnsByColumnIds(table.getIdToColumn(), ids);
|
||||
}
|
||||
|
|
@ -271,6 +285,22 @@ public class MetaUtils {
|
|||
return column.getName();
|
||||
}
|
||||
|
||||
public static Column getColumnByColumnId(ColumnIdentifier identifier) {
|
||||
return getColumnByColumnId(identifier.getDbId(), identifier.getTableId(), identifier.getColumnName());
|
||||
}
|
||||
|
||||
public static Column getColumnByColumnId(long dbId, long tableId, ColumnId columnId) {
|
||||
Table table = GlobalStateMgr.getCurrentState().getLocalMetastore().getTable(dbId, tableId);
|
||||
if (table == null) {
|
||||
throw new SemanticException("Table %s is not found", tableId);
|
||||
}
|
||||
Column column = table.getColumn(columnId);
|
||||
if (column == null) {
|
||||
throw new SemanticException(String.format("can not find column by column id: %s", columnId));
|
||||
}
|
||||
return column;
|
||||
}
|
||||
|
||||
public static List<ColumnId> getColumnIdsByColumnNames(Table table, List<String> names) {
|
||||
List<ColumnId> columnIds = new ArrayList<>(names.size());
|
||||
for (String name : names) {
|
||||
|
|
|
|||
|
|
@ -682,6 +682,9 @@ public class QueryOptimizer extends Optimizer {
|
|||
CTEUtils.collectCteOperators(tree, context);
|
||||
}
|
||||
|
||||
// Rewrite the jsonpath in META-SCAN
|
||||
scheduler.rewriteOnce(tree, rootTaskContext, JsonPathRewriteRule.createForMetaScan());
|
||||
|
||||
scheduler.rewriteIterative(tree, rootTaskContext, new MergeTwoProjectRule());
|
||||
scheduler.rewriteOnce(tree, rootTaskContext, RuleSet.META_SCAN_REWRITE_RULES);
|
||||
scheduler.rewriteIterative(tree, rootTaskContext, new MergeTwoProjectRule());
|
||||
|
|
@ -692,6 +695,8 @@ public class QueryOptimizer extends Optimizer {
|
|||
// After this rule, we shouldn't generate logical project operator
|
||||
scheduler.rewriteIterative(tree, rootTaskContext, new MergeProjectWithChildRule());
|
||||
|
||||
scheduler.rewriteOnce(tree, rootTaskContext, JsonPathRewriteRule.createForOlapScan());
|
||||
|
||||
scheduler.rewriteOnce(tree, rootTaskContext, new EliminateSortColumnWithEqualityPredicateRule());
|
||||
scheduler.rewriteOnce(tree, rootTaskContext, new PushDownTopNBelowOuterJoinRule());
|
||||
// intersect rewrite depend on statistics
|
||||
|
|
@ -963,8 +968,6 @@ public class QueryOptimizer extends Optimizer {
|
|||
|
||||
int planCount = result.getPlanCount();
|
||||
|
||||
result = new JsonPathRewriteRule().rewrite(result, rootTaskContext);
|
||||
|
||||
// Since there may be many different plans in the logic phase, it's possible
|
||||
// that this switch can't turned on after logical optimization, so we only determine
|
||||
// whether the PreAggregate can be turned on in the final
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ import com.starrocks.authorization.ObjectType;
|
|||
import com.starrocks.authorization.PrivilegeType;
|
||||
import com.starrocks.catalog.BaseTableInfo;
|
||||
import com.starrocks.catalog.Column;
|
||||
import com.starrocks.catalog.ColumnId;
|
||||
import com.starrocks.catalog.Database;
|
||||
import com.starrocks.catalog.InternalCatalog;
|
||||
import com.starrocks.catalog.KeysType;
|
||||
|
|
@ -34,6 +35,7 @@ import com.starrocks.catalog.MvId;
|
|||
import com.starrocks.catalog.MvPlanContext;
|
||||
import com.starrocks.catalog.OlapTable;
|
||||
import com.starrocks.catalog.Table;
|
||||
import com.starrocks.catalog.Type;
|
||||
import com.starrocks.common.ErrorCode;
|
||||
import com.starrocks.common.ErrorReport;
|
||||
import com.starrocks.common.util.concurrent.lock.LockType;
|
||||
|
|
@ -59,6 +61,8 @@ import com.starrocks.sql.optimizer.dump.QueryDumper;
|
|||
import com.starrocks.sql.optimizer.operator.scalar.ConstantOperator;
|
||||
import com.starrocks.sql.optimizer.rewrite.ConstantFunction;
|
||||
import com.starrocks.sql.optimizer.rule.transformation.materialization.MvUtils;
|
||||
import com.starrocks.sql.optimizer.statistics.CacheDictManager;
|
||||
import com.starrocks.sql.optimizer.statistics.ColumnDict;
|
||||
import com.starrocks.thrift.TResultBatch;
|
||||
import io.netty.buffer.ByteBuf;
|
||||
import io.netty.buffer.Unpooled;
|
||||
|
|
@ -586,4 +590,29 @@ public class MetaFunctions {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Inspect global dictionary table, and return the content in JSON format.
|
||||
*/
|
||||
@ConstantFunction(name = "inspect_global_dict", argTypes = {VARCHAR,
|
||||
VARCHAR}, returnType = VARCHAR, isMetaFunction = true)
|
||||
public static ConstantOperator inspectGlobalDict(ConstantOperator tableName, ConstantOperator columnName) {
|
||||
TableName tableNameValue = TableName.fromString(tableName.getVarchar());
|
||||
Optional<Table> maybeTable = GlobalStateMgr.getCurrentState().getMetadataMgr()
|
||||
.getTable(new ConnectContext(), tableNameValue);
|
||||
maybeTable.orElseThrow(() -> ErrorReport.buildSemanticException(ErrorCode.ERR_BAD_TABLE_ERROR, tableNameValue));
|
||||
if (!(maybeTable.get() instanceof OlapTable)) {
|
||||
ErrorReport.reportSemanticException(ErrorCode.ERR_INVALID_PARAMETER, "must be OLAP_TABLE");
|
||||
}
|
||||
OlapTable table = (OlapTable) maybeTable.get();
|
||||
String column = columnName.getVarchar();
|
||||
|
||||
CacheDictManager instance = CacheDictManager.getInstance();
|
||||
Optional<ColumnDict> dict = instance.getGlobalDictSync(table.getId(), ColumnId.create(column));
|
||||
if (dict.isEmpty()) {
|
||||
return ConstantOperator.createNull(Type.VARCHAR);
|
||||
} else {
|
||||
return ConstantOperator.createVarchar(dict.get().toJson());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
package com.starrocks.sql.optimizer.operator.logical;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.starrocks.sql.optimizer.operator.OperatorType;
|
||||
import com.starrocks.sql.optimizer.operator.OperatorVisitor;
|
||||
|
|
@ -82,6 +83,7 @@ public class LogicalMetaScanOperator extends LogicalScanOperator {
|
|||
super.withOperator(operator);
|
||||
builder.aggColumnIdToNames = ImmutableMap.copyOf(operator.aggColumnIdToNames);
|
||||
builder.selectPartitionNames = operator.selectPartitionNames;
|
||||
builder.columnAccessPaths = ImmutableList.copyOf(operator.columnAccessPaths);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -282,6 +282,14 @@ public abstract class LogicalScanOperator extends LogicalOperator {
|
|||
return (B) this;
|
||||
}
|
||||
|
||||
public B addColumnAccessPaths(List<ColumnAccessPath> paths) {
|
||||
builder.columnAccessPaths = ImmutableList.<ColumnAccessPath>builder()
|
||||
.addAll(paths)
|
||||
.addAll(builder.columnAccessPaths)
|
||||
.build();
|
||||
return (B) this;
|
||||
}
|
||||
|
||||
public B setTable(Table table) {
|
||||
builder.table = table;
|
||||
return (B) this;
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@
|
|||
|
||||
package com.starrocks.sql.optimizer.operator.physical;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.starrocks.sql.optimizer.OptExpression;
|
||||
import com.starrocks.sql.optimizer.OptExpressionVisitor;
|
||||
import com.starrocks.sql.optimizer.operator.OperatorType;
|
||||
|
|
@ -25,8 +27,14 @@ import java.util.Map;
|
|||
import java.util.Objects;
|
||||
|
||||
public class PhysicalMetaScanOperator extends PhysicalScanOperator {
|
||||
private final Map<Integer, String> aggColumnIdToNames;
|
||||
private final List<String> selectPartitionNames;
|
||||
private Map<Integer, String> aggColumnIdToNames;
|
||||
private List<String> selectPartitionNames;
|
||||
|
||||
public PhysicalMetaScanOperator() {
|
||||
super(OperatorType.PHYSICAL_META_SCAN);
|
||||
this.aggColumnIdToNames = ImmutableMap.of();
|
||||
this.selectPartitionNames = ImmutableList.of();
|
||||
}
|
||||
|
||||
public PhysicalMetaScanOperator(LogicalMetaScanOperator scanOperator) {
|
||||
super(OperatorType.PHYSICAL_META_SCAN, scanOperator);
|
||||
|
|
@ -69,6 +77,27 @@ public class PhysicalMetaScanOperator extends PhysicalScanOperator {
|
|||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(super.hashCode(), aggColumnIdToNames);
|
||||
return Objects.hash(super.hashCode(), aggColumnIdToNames, selectPartitionNames);
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public static class Builder
|
||||
extends PhysicalScanOperator.Builder<PhysicalMetaScanOperator, PhysicalScanOperator.Builder> {
|
||||
|
||||
@Override
|
||||
protected PhysicalMetaScanOperator newInstance() {
|
||||
return new PhysicalMetaScanOperator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public PhysicalMetaScanOperator.Builder withOperator(PhysicalMetaScanOperator operator) {
|
||||
super.withOperator(operator);
|
||||
builder.aggColumnIdToNames = ImmutableMap.copyOf(operator.aggColumnIdToNames);
|
||||
builder.selectPartitionNames = ImmutableList.copyOf(operator.selectPartitionNames);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -322,6 +322,7 @@ public class PhysicalOlapScanOperator extends PhysicalScanOperator {
|
|||
builder.prunedPartitionPredicates = operator.prunedPartitionPredicates;
|
||||
builder.vectorSearchOptions = operator.vectorSearchOptions;
|
||||
builder.sample = operator.getSample();
|
||||
builder.columnAccessPaths = operator.columnAccessPaths;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -157,6 +157,7 @@ public enum RuleType {
|
|||
TF_INTERSECT_DISTINCT,
|
||||
|
||||
TF_MERGE_PROJECT_WITH_CHILD,
|
||||
TF_JSON_PATH_REWRITE,
|
||||
|
||||
TF_PUSH_DOWN_JOIN_ON_EXPRESSION_TO_CHILD_PROJECT,
|
||||
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import java.util.List;
|
|||
|
||||
public class MergeProjectWithChildRule extends TransformationRule {
|
||||
boolean eliminateUselessProject = true;
|
||||
|
||||
public MergeProjectWithChildRule() {
|
||||
super(RuleType.TF_MERGE_PROJECT_WITH_CHILD,
|
||||
Pattern.create(OperatorType.LOGICAL_PROJECT).
|
||||
|
|
@ -72,7 +73,6 @@ public class MergeProjectWithChildRule extends TransformationRule {
|
|||
return Lists.newArrayList(OptExpression.create(builder.build(), input.inputAt(0).getInputs()));
|
||||
}
|
||||
|
||||
|
||||
ColumnRefSet projectColumns = logicalProjectOperator.getOutputColumns(
|
||||
new ExpressionContext(input));
|
||||
ColumnRefSet childOutputColumns = child.getOutputColumns(new ExpressionContext(input.inputAt(0)));
|
||||
|
|
|
|||
|
|
@ -36,7 +36,9 @@ import com.starrocks.sql.optimizer.operator.scalar.CallOperator;
|
|||
import com.starrocks.sql.optimizer.operator.scalar.ColumnRefOperator;
|
||||
import com.starrocks.sql.optimizer.operator.scalar.ScalarOperator;
|
||||
import com.starrocks.sql.optimizer.rule.RuleType;
|
||||
import com.starrocks.sql.optimizer.rule.tree.JsonPathRewriteRule;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.collections4.MapUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
|
@ -59,9 +61,16 @@ public class PushDownAggToMetaScanRule extends TransformationRule {
|
|||
return false;
|
||||
}
|
||||
for (Map.Entry<ColumnRefOperator, ScalarOperator> entry : projectOperator.getColumnRefMap().entrySet()) {
|
||||
if (!entry.getKey().equals(entry.getValue())) {
|
||||
return false;
|
||||
// select min(c1) from tbl [_META_]
|
||||
if (entry.getKey().equals(entry.getValue())) {
|
||||
continue;
|
||||
}
|
||||
// select dict_merge(get_json_string(c1)) from tbl [_META_]
|
||||
if (entry.getValue().getHints().contains(JsonPathRewriteRule.COLUMN_REF_HINT)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
for (CallOperator aggCall : agg.getAggregations().values()) {
|
||||
|
|
@ -81,6 +90,7 @@ public class PushDownAggToMetaScanRule extends TransformationRule {
|
|||
@Override
|
||||
public List<OptExpression> transform(OptExpression input, OptimizerContext context) {
|
||||
LogicalAggregationOperator agg = (LogicalAggregationOperator) input.getOp();
|
||||
LogicalProjectOperator project = (LogicalProjectOperator) input.inputAt(0).getOp();
|
||||
LogicalMetaScanOperator metaScan = (LogicalMetaScanOperator) input.inputAt(0).inputAt(0).getOp();
|
||||
ColumnRefFactory columnRefFactory = context.getColumnRefFactory();
|
||||
|
||||
|
|
@ -100,6 +110,12 @@ public class PushDownAggToMetaScanRule extends TransformationRule {
|
|||
if (aggCall.getFnName().equals(FunctionSet.COUNT) && aggCall.getChildren().isEmpty()) {
|
||||
usedColumn = metaScan.getOutputColumns().get(0);
|
||||
metaColumnName = "rows_" + usedColumn.getName();
|
||||
} else if (MapUtils.isNotEmpty(project.getColumnRefMap())) {
|
||||
ColumnRefSet usedColumns = aggCall.getUsedColumns();
|
||||
Preconditions.checkArgument(usedColumns.cardinality() == 1);
|
||||
List<ColumnRefOperator> columnRefOperators = usedColumns.getColumnRefOperators(columnRefFactory);
|
||||
usedColumn = (ColumnRefOperator) project.getColumnRefMap().get(columnRefOperators.get(0));
|
||||
metaColumnName = aggCall.getFnName() + "_" + usedColumn.getName();
|
||||
} else {
|
||||
ColumnRefSet usedColumns = aggCall.getUsedColumns();
|
||||
Preconditions.checkArgument(usedColumns.cardinality() == 1);
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ public class EliminateOveruseColumnAccessPathRule implements TreeRewriteRule {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Optional<ColumnRefSet> visitPhysicalScan(OptExpression optExpression,
|
||||
public Optional<ColumnRefSet> visitPhysicalOlapScan(OptExpression optExpression,
|
||||
ColumnRefSet parentUsedColumnRefs) {
|
||||
Preconditions.checkState(parentUsedColumnRefs != null);
|
||||
PhysicalScanOperator scan = optExpression.getOp().cast();
|
||||
|
|
@ -107,6 +107,7 @@ public class EliminateOveruseColumnAccessPathRule implements TreeRewriteRule {
|
|||
.collect(Collectors.toMap(e -> e.getValue().getName(), Map.Entry::getKey));
|
||||
|
||||
Predicate<ColumnAccessPath> isOveruseProjecting = accessPath ->
|
||||
columnNameToIdMap.containsKey(accessPath.getPath()) &&
|
||||
parentUsedColumnRefs.contains(Objects.requireNonNull(columnNameToIdMap.get(accessPath.getPath())));
|
||||
|
||||
Map<Boolean, List<ColumnAccessPath>> subfieldPruningProjectingGroups = subfieldPruningProjectings
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
package com.starrocks.sql.optimizer.rule.tree;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.starrocks.catalog.Column;
|
||||
|
|
@ -28,10 +29,17 @@ import com.starrocks.lake.LakeTable;
|
|||
import com.starrocks.qe.SessionVariable;
|
||||
import com.starrocks.sql.optimizer.OptExpression;
|
||||
import com.starrocks.sql.optimizer.OptExpressionVisitor;
|
||||
import com.starrocks.sql.optimizer.OptimizerContext;
|
||||
import com.starrocks.sql.optimizer.base.ColumnRefFactory;
|
||||
import com.starrocks.sql.optimizer.base.ColumnRefSet;
|
||||
import com.starrocks.sql.optimizer.operator.Operator;
|
||||
import com.starrocks.sql.optimizer.operator.OperatorBuilderFactory;
|
||||
import com.starrocks.sql.optimizer.operator.physical.PhysicalScanOperator;
|
||||
import com.starrocks.sql.optimizer.operator.OperatorType;
|
||||
import com.starrocks.sql.optimizer.operator.logical.LogicalMetaScanOperator;
|
||||
import com.starrocks.sql.optimizer.operator.logical.LogicalOlapScanOperator;
|
||||
import com.starrocks.sql.optimizer.operator.logical.LogicalProjectOperator;
|
||||
import com.starrocks.sql.optimizer.operator.logical.LogicalScanOperator;
|
||||
import com.starrocks.sql.optimizer.operator.pattern.Pattern;
|
||||
import com.starrocks.sql.optimizer.operator.scalar.CallOperator;
|
||||
import com.starrocks.sql.optimizer.operator.scalar.ColumnRefOperator;
|
||||
import com.starrocks.sql.optimizer.operator.scalar.ConstantOperator;
|
||||
|
|
@ -39,8 +47,9 @@ import com.starrocks.sql.optimizer.operator.scalar.ScalarOperator;
|
|||
import com.starrocks.sql.optimizer.rewrite.ScalarOperatorRewriteContext;
|
||||
import com.starrocks.sql.optimizer.rewrite.ScalarOperatorRewriter;
|
||||
import com.starrocks.sql.optimizer.rewrite.scalar.BottomUpScalarOperatorRewriteRule;
|
||||
import com.starrocks.sql.optimizer.rule.RuleType;
|
||||
import com.starrocks.sql.optimizer.rule.transformation.TransformationRule;
|
||||
import com.starrocks.sql.optimizer.rule.tree.prunesubfield.SubfieldAccessPathNormalizer;
|
||||
import com.starrocks.sql.optimizer.task.TaskContext;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.apache.commons.collections4.MapUtils;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
|
|
@ -51,7 +60,7 @@ import java.util.Arrays;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
||||
/**
|
||||
* JsonPathRewriteRule rewrites JSON function calls to column access paths for better performance.
|
||||
|
|
@ -67,11 +76,12 @@ import java.util.regex.Pattern;
|
|||
* - get_json_double
|
||||
* - get_json_bool
|
||||
*/
|
||||
public class JsonPathRewriteRule implements TreeRewriteRule {
|
||||
public class JsonPathRewriteRule extends TransformationRule {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(JsonPathRewriteRule.class);
|
||||
private static final int DEFAULT_JSON_FLATTEN_DEPTH = 20;
|
||||
private static final Pattern JSON_PATH_VALID_PATTERN = Pattern.compile("^[a-zA-Z0-9_]+$");
|
||||
private static final java.util.regex.Pattern JSON_PATH_VALID_PATTERN =
|
||||
java.util.regex.Pattern.compile("^[a-zA-Z0-9_]+$");
|
||||
public static final String COLUMN_REF_HINT = "JsonPathExtended";
|
||||
|
||||
private static final Set<String> SUPPORTED_JSON_FUNCTIONS = Set.of(
|
||||
FunctionSet.GET_JSON_STRING,
|
||||
|
|
@ -80,20 +90,34 @@ public class JsonPathRewriteRule implements TreeRewriteRule {
|
|||
FunctionSet.GET_JSON_BOOL
|
||||
);
|
||||
|
||||
protected JsonPathRewriteRule(OperatorType operatorType) {
|
||||
super(RuleType.TF_JSON_PATH_REWRITE, Pattern.create(operatorType));
|
||||
}
|
||||
|
||||
public static JsonPathRewriteRule createForOlapScan() {
|
||||
return new JsonPathRewriteRule(OperatorType.LOGICAL_OLAP_SCAN);
|
||||
}
|
||||
|
||||
public static JsonPathRewriteRule createForMetaScan() {
|
||||
return new JsonPathRewriteRule(OperatorType.LOGICAL_PROJECT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public OptExpression rewrite(OptExpression root, TaskContext taskContext) {
|
||||
SessionVariable variables = taskContext.getOptimizerContext().getSessionVariable();
|
||||
public List<OptExpression> transform(OptExpression root, OptimizerContext optimizerContext) {
|
||||
SessionVariable variables = optimizerContext.getSessionVariable();
|
||||
if (!variables.isEnableJSONV2Rewrite() || variables.isCboUseDBLock()) {
|
||||
return root;
|
||||
return List.of(root);
|
||||
}
|
||||
|
||||
ColumnRefFactory columnRefFactory = taskContext.getOptimizerContext().getColumnRefFactory();
|
||||
ColumnRefFactory columnRefFactory = optimizerContext.getColumnRefFactory();
|
||||
try {
|
||||
JsonPathRewriteVisitor visitor = new JsonPathRewriteVisitor(columnRefFactory);
|
||||
return root.getOp().accept(visitor, root, null);
|
||||
root = root.getOp().accept(visitor, root, null);
|
||||
optimizerContext.getTaskContext().getRequiredColumns().union(root.getOutputColumns());
|
||||
return List.of(root);
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Failed to rewrite JSON paths in expression: {}", root, e);
|
||||
return root;
|
||||
return List.of(root);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -150,6 +174,7 @@ public class JsonPathRewriteRule implements TreeRewriteRule {
|
|||
|
||||
// Create a ref
|
||||
ColumnRefOperator newColumnRef = columnRefFactory.create(path, jsonPath.getValueType(), true);
|
||||
newColumnRef.setHints(List.of(COLUMN_REF_HINT));
|
||||
columnRefFactory.updateColumnRefToColumns(newColumnRef, extendedColumn, tableAndColumn.first);
|
||||
pathMap.put(fullPath, newColumnRef);
|
||||
|
||||
|
|
@ -168,6 +193,12 @@ public class JsonPathRewriteRule implements TreeRewriteRule {
|
|||
// this rule is only applied during query planning, thus the Table here is already copied for the
|
||||
// query. So this change would not affect the original table schema.
|
||||
Column extendedColumn = new Column(path, jsonPath.getValueType(), true);
|
||||
|
||||
// Allocate the unique id for extended column
|
||||
OlapTable olapTable = (OlapTable) table;
|
||||
int nextUniqueId = olapTable.incAndGetMaxColUniqueId();
|
||||
extendedColumn.setUniqueId(nextUniqueId);
|
||||
|
||||
table.addColumn(extendedColumn);
|
||||
return extendedColumn;
|
||||
} else {
|
||||
|
|
@ -203,37 +234,83 @@ public class JsonPathRewriteRule implements TreeRewriteRule {
|
|||
return OptExpression.builder().with(optExpr).setInputs(newInputs).build();
|
||||
}
|
||||
|
||||
private Map<ColumnRefOperator, ScalarOperator> rewriteProjections(
|
||||
Map<ColumnRefOperator, ScalarOperator> originalProjections,
|
||||
JsonPathRewriteContext context,
|
||||
JsonPathExpressionRewriter rewriter) {
|
||||
Map<ColumnRefOperator, ScalarOperator> rewritten = Maps.newHashMap();
|
||||
@Override
|
||||
public OptExpression visitLogicalProject(OptExpression optExpr, Void v) {
|
||||
Operator child = optExpr.inputAt(0).getOp();
|
||||
if (child instanceof LogicalMetaScanOperator) {
|
||||
return rewriteMetaScan(optExpr, v);
|
||||
}
|
||||
return optExpr;
|
||||
}
|
||||
|
||||
for (var entry : originalProjections.entrySet()) {
|
||||
ScalarOperator rewrittenExpr = rewriteScalar(entry.getValue(), context, rewriter);
|
||||
if (!rewrittenExpr.equals(entry.getValue())) {
|
||||
rewritten.put(entry.getKey(), rewrittenExpr);
|
||||
} else {
|
||||
// Keep original expression if no change
|
||||
rewritten.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
// PROJECT(get_json_string(c1, 'f1')) -> META_SCAN(c1)
|
||||
// =>
|
||||
// PROJECT(c1.f1) -> META_SCAN(c1.f1)
|
||||
private OptExpression rewriteMetaScan(OptExpression optExpr, Void v) {
|
||||
LogicalProjectOperator project = (LogicalProjectOperator) optExpr.getOp();
|
||||
LogicalMetaScanOperator metaScan = (LogicalMetaScanOperator) optExpr.inputAt(0).getOp();
|
||||
LogicalMetaScanOperator.Builder scanBuilder =
|
||||
LogicalMetaScanOperator.builder().withOperator(metaScan);
|
||||
|
||||
JsonPathRewriteContext context = new JsonPathRewriteContext(columnRefFactory);
|
||||
JsonPathExpressionRewriter rewriter = new JsonPathExpressionRewriter(context);
|
||||
|
||||
// rewrite project
|
||||
Map<ColumnRefOperator, ScalarOperator> newProjection = Maps.newHashMap();
|
||||
boolean hasChanges = false;
|
||||
for (var entry : project.getColumnRefMap().entrySet()) {
|
||||
ScalarOperator rewritten = rewriteScalar(entry.getValue(), context, rewriter);
|
||||
newProjection.put(entry.getKey(), rewritten);
|
||||
hasChanges = hasChanges || !rewritten.equals(entry.getValue());
|
||||
}
|
||||
|
||||
return rewritten;
|
||||
// Check if any changes were made
|
||||
if (!hasChanges) {
|
||||
return optExpr;
|
||||
}
|
||||
|
||||
LogicalProjectOperator newProject = new LogicalProjectOperator(newProjection);
|
||||
|
||||
Map<ColumnRefOperator, Column> metaScanColumnMap =
|
||||
ImmutableMap.<ColumnRefOperator, Column>builder()
|
||||
.putAll(metaScan.getColRefToColumnMetaMap())
|
||||
.putAll(rewriter.getExtendedColumns())
|
||||
.build();
|
||||
|
||||
scanBuilder.setColRefToColumnMetaMap(metaScanColumnMap);
|
||||
|
||||
// Record the access path into scan node
|
||||
List<ColumnAccessPath> paths = Lists.newArrayList();
|
||||
for (var entry : rewriter.getExtendedColumns().entrySet()) {
|
||||
ColumnAccessPath path = JsonPathRewriteContext.pathFromColumn(entry.getValue());
|
||||
paths.add(path);
|
||||
}
|
||||
scanBuilder.setColumnAccessPaths(paths);
|
||||
|
||||
LogicalMetaScanOperator newMetaScan = scanBuilder.build();
|
||||
OptExpression newMetaScanExpr =
|
||||
OptExpression.builder()
|
||||
.with(optExpr.inputAt(0))
|
||||
.setOp(newMetaScan)
|
||||
.build();
|
||||
return OptExpression.builder().with(optExpr)
|
||||
.setOp(newProject)
|
||||
.setInputs(Lists.newArrayList(newMetaScanExpr))
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public OptExpression visitPhysicalOlapScan(OptExpression optExpr, Void v) {
|
||||
return rewritePhysicalScan(optExpr, v);
|
||||
public OptExpression visitLogicalTableScan(OptExpression optExpr, Void v) {
|
||||
if (!(optExpr.getOp() instanceof LogicalOlapScanOperator)) {
|
||||
return optExpr;
|
||||
}
|
||||
return rewriteLogicalScan(optExpr, v);
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrites PhysicalScanOperator to handle JSON path access.
|
||||
*/
|
||||
private OptExpression rewritePhysicalScan(OptExpression optExpr, Void v) {
|
||||
PhysicalScanOperator scanOperator = (PhysicalScanOperator) optExpr.getOp();
|
||||
PhysicalScanOperator.Builder builder =
|
||||
(PhysicalScanOperator.Builder) OperatorBuilderFactory.build(scanOperator)
|
||||
private OptExpression rewriteLogicalScan(OptExpression optExpr, Void v) {
|
||||
LogicalScanOperator scanOperator = (LogicalScanOperator) optExpr.getOp();
|
||||
LogicalScanOperator.Builder builder =
|
||||
(LogicalScanOperator.Builder) OperatorBuilderFactory.build(scanOperator)
|
||||
.withOperator(scanOperator);
|
||||
|
||||
JsonPathRewriteContext context = new JsonPathRewriteContext(columnRefFactory);
|
||||
|
|
@ -243,20 +320,34 @@ public class JsonPathRewriteRule implements TreeRewriteRule {
|
|||
builder.setPredicate(rewriteScalar(scanOperator.getPredicate(), context, rewriter));
|
||||
|
||||
// Rewrite projection if exists
|
||||
ColumnRefSet requiredColumnSet = new ColumnRefSet();
|
||||
if (builder.getPredicate() != null) {
|
||||
requiredColumnSet.union(builder.getPredicate().getUsedColumns());
|
||||
}
|
||||
if (scanOperator.getProjection() != null) {
|
||||
Map<ColumnRefOperator, ScalarOperator> mapping = Maps.newHashMap();
|
||||
for (var entry : scanOperator.getProjection().getColumnRefMap().entrySet()) {
|
||||
mapping.put(entry.getKey(), rewriteScalar(entry.getValue(), context, rewriter));
|
||||
}
|
||||
builder.getProjection().getColumnRefMap().putAll(mapping);
|
||||
mapping.values().forEach(x -> requiredColumnSet.union(x.getUsedColumns()));
|
||||
} else {
|
||||
scanOperator.getOutputColumns().forEach(requiredColumnSet::union);
|
||||
}
|
||||
|
||||
if (MapUtils.isNotEmpty(rewriter.getExtendedColumns())) {
|
||||
// Add extended columns to scan operator
|
||||
Map<ColumnRefOperator, Column> colRefToColumnMetaMap = Maps.newHashMap();
|
||||
colRefToColumnMetaMap.putAll(scanOperator.getColRefToColumnMetaMap());
|
||||
colRefToColumnMetaMap.putAll(rewriter.getExtendedColumns());
|
||||
for (var entry : scanOperator.getColRefToColumnMetaMap().entrySet()) {
|
||||
if (requiredColumnSet.contains(entry.getKey())) {
|
||||
colRefToColumnMetaMap.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
builder.setColRefToColumnMetaMap(colRefToColumnMetaMap);
|
||||
for (ColumnRefOperator col : rewriter.getExtendedColumns().keySet()) {
|
||||
optExpr.getOutputColumns().union(col);
|
||||
}
|
||||
|
||||
// Add access paths
|
||||
List<ColumnAccessPath> paths = Lists.newArrayList();
|
||||
|
|
@ -286,6 +377,7 @@ public class JsonPathRewriteRule implements TreeRewriteRule {
|
|||
* Rewrites JSON function calls to column access expressions.
|
||||
*/
|
||||
private static class JsonPathExpressionRewriter extends BottomUpScalarOperatorRewriteRule {
|
||||
|
||||
private final JsonPathRewriteContext context;
|
||||
|
||||
public JsonPathExpressionRewriter(JsonPathRewriteContext context) {
|
||||
|
|
@ -346,7 +438,8 @@ public class JsonPathRewriteRule implements TreeRewriteRule {
|
|||
ColumnAccessPath accessPath = ColumnAccessPath.createLinearPath(fullPath, resultType);
|
||||
Pair<Boolean, ColumnRefOperator> columnResult = context.getOrCreateColumn(jsonColumn, accessPath);
|
||||
|
||||
// Note: extendedColumns are now automatically recorded in context.getOrCreateColumn()
|
||||
// Return the new column reference for the JSON path
|
||||
// This allows the optimizer to work with the JSON field directly
|
||||
return columnResult.second;
|
||||
}
|
||||
|
||||
|
|
@ -355,11 +448,9 @@ public class JsonPathRewriteRule implements TreeRewriteRule {
|
|||
* Returns null if the path was truncated due to exceeding depth limit.
|
||||
*/
|
||||
private static List<String> parseJsonPath(String path) {
|
||||
List<String> result = Lists.newArrayList();
|
||||
boolean wasTruncated =
|
||||
SubfieldAccessPathNormalizer.parseSimpleJsonPath(DEFAULT_JSON_FLATTEN_DEPTH, path, result);
|
||||
List<String> result = SubfieldAccessPathNormalizer.parseSimpleJsonPath(path);
|
||||
// If the path was truncated, return null to prevent incorrect rewriting
|
||||
if (wasTruncated) {
|
||||
if (result.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -65,6 +65,7 @@ import com.starrocks.sql.optimizer.operator.scalar.LikePredicateOperator;
|
|||
import com.starrocks.sql.optimizer.operator.scalar.MatchExprOperator;
|
||||
import com.starrocks.sql.optimizer.operator.scalar.ScalarOperator;
|
||||
import com.starrocks.sql.optimizer.operator.scalar.ScalarOperatorVisitor;
|
||||
import com.starrocks.sql.optimizer.rule.tree.JsonPathRewriteRule;
|
||||
import com.starrocks.sql.optimizer.statistics.CacheDictManager;
|
||||
import com.starrocks.sql.optimizer.statistics.CacheRelaxDictManager;
|
||||
import com.starrocks.sql.optimizer.statistics.ColumnDict;
|
||||
|
|
@ -264,7 +265,10 @@ public class DecodeCollector extends OptExpressionVisitor<DecodeInfo, DecodeInfo
|
|||
List<ScalarOperator> dictExprList = stringExpressions.getOrDefault(cid, Collections.emptyList());
|
||||
long allExprNum = dictExprList.size();
|
||||
// only query original string-column
|
||||
long worthless = dictExprList.stream().filter(ScalarOperator::isColumnRef).count();
|
||||
long worthless = dictExprList.stream()
|
||||
.filter(ScalarOperator::isColumnRef)
|
||||
.filter(x -> !((ColumnRefOperator) x).getHints().contains(JsonPathRewriteRule.COLUMN_REF_HINT))
|
||||
.count();
|
||||
// we believe that the more complex expressions using the dict-column, and the preformance will be better
|
||||
if (worthless == 0 && allExprNum != 0) {
|
||||
context.allStringColumns.add(cid);
|
||||
|
|
@ -690,16 +694,21 @@ public class DecodeCollector extends OptExpressionVisitor<DecodeInfo, DecodeInfo
|
|||
continue;
|
||||
}
|
||||
|
||||
ColumnStatistic columnStatistic = GlobalStateMgr.getCurrentState().getStatisticStorage()
|
||||
.getColumnStatistic(table, column.getName());
|
||||
// Condition 2: the varchar column is low cardinality string column
|
||||
// If it's not an extended column, we have to check the cardinality of the column.
|
||||
// TODO(murphy) support collect cardinality of extended column
|
||||
if (!checkExtendedColumn(scan, column)) {
|
||||
ColumnStatistic columnStatistic = GlobalStateMgr.getCurrentState().getStatisticStorage()
|
||||
.getColumnStatistic(table, column.getName());
|
||||
// Condition 2: the varchar column is low cardinality string column
|
||||
|
||||
boolean alwaysCollectDict = sessionVariable.isAlwaysCollectDict();
|
||||
if (!alwaysCollectDict && !column.getType().isArrayType() && !FeConstants.USE_MOCK_DICT_MANAGER &&
|
||||
(columnStatistic.isUnknown() ||
|
||||
columnStatistic.getDistinctValuesCount() > CacheDictManager.LOW_CARDINALITY_THRESHOLD)) {
|
||||
LOG.debug("{} isn't low cardinality string column", column.getName());
|
||||
continue;
|
||||
boolean alwaysCollectDict = sessionVariable.isAlwaysCollectDict();
|
||||
if (!alwaysCollectDict && !column.getType().isArrayType() && !FeConstants.USE_MOCK_DICT_MANAGER &&
|
||||
(columnStatistic.isUnknown() ||
|
||||
columnStatistic.getDistinctValuesCount() >
|
||||
CacheDictManager.LOW_CARDINALITY_THRESHOLD)) {
|
||||
LOG.debug("{} isn't low cardinality string column", column.getName());
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Condition 3: the varchar column has collected global dict
|
||||
|
|
@ -874,6 +883,26 @@ public class DecodeCollector extends OptExpressionVisitor<DecodeInfo, DecodeInfo
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the column is an extended string column, if so, it can be used for global dict optimization.
|
||||
*/
|
||||
private boolean checkExtendedColumn(PhysicalOlapScanOperator scan, ColumnRefOperator column) {
|
||||
if (!sessionVariable.isEnableJSONV2DictOpt()) {
|
||||
return false;
|
||||
}
|
||||
String colName = scan.getColRefToColumnMetaMap().get(column).getName();
|
||||
for (ColumnAccessPath path : scan.getColumnAccessPaths()) {
|
||||
if (path.isExtended() &&
|
||||
path.getLinearPath().equals(colName) &&
|
||||
path.getType() == TAccessPathType.ROOT &&
|
||||
path.getValueType().isStringType()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private void collectPredicate(Operator operator, DecodeInfo info) {
|
||||
if (operator.getPredicate() == null) {
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -317,29 +317,27 @@ public class SubfieldAccessPathNormalizer {
|
|||
// $.a#b.c -> [a#b, c]
|
||||
// $.a.b.c.d.e.f -> [a, b] -- don't support overflown JSON_FLATTEN_DEPTH
|
||||
// a.b.c -> [a, b, c]
|
||||
public static boolean parseSimpleJsonPath(int jsonFlattenDepth, String path, List<String> result) {
|
||||
public static List<String> parseSimpleJsonPath(String path) {
|
||||
List<String> result = Lists.newArrayList();
|
||||
path = StringUtils.trimToEmpty(path);
|
||||
if (StringUtils.isBlank(path) || StringUtils.contains(path, "..") || StringUtils.equals("$", path) ||
|
||||
StringUtils.countMatches(path, "\"") % 2 != 0) {
|
||||
// .. is recursive search in json path, not supported
|
||||
// unpaired quota char
|
||||
return false;
|
||||
return result;
|
||||
}
|
||||
|
||||
StrTokenizer tokenizer = new StrTokenizer(path, '.', '"');
|
||||
String[] tokens = tokenizer.getTokenArray();
|
||||
|
||||
if (tokens.length < 1) {
|
||||
return false;
|
||||
return result;
|
||||
}
|
||||
int size = jsonFlattenDepth;
|
||||
int i = 0;
|
||||
if (tokens[0].equals("$")) {
|
||||
size++;
|
||||
i++;
|
||||
}
|
||||
size = Math.min(tokens.length, size);
|
||||
for (; i < size; i++) {
|
||||
for (; i < tokens.length; i++) {
|
||||
if (tokens[i].contains(".")) {
|
||||
result.add("\"" + tokens[i] + "\"");
|
||||
continue;
|
||||
|
|
@ -347,7 +345,7 @@ public class SubfieldAccessPathNormalizer {
|
|||
result.add(tokens[i]);
|
||||
}
|
||||
}
|
||||
return size < tokens.length;
|
||||
return result;
|
||||
}
|
||||
|
||||
public void collect(List<ScalarOperator> scalarOperators) {
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ import com.github.benmanes.caffeine.cache.Caffeine;
|
|||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.starrocks.catalog.Column;
|
||||
import com.starrocks.catalog.ColumnId;
|
||||
import com.starrocks.catalog.Database;
|
||||
import com.starrocks.common.Config;
|
||||
|
|
@ -29,7 +30,9 @@ import com.starrocks.common.ThreadPoolManager;
|
|||
import com.starrocks.memory.MemoryTrackable;
|
||||
import com.starrocks.qe.ConnectContext;
|
||||
import com.starrocks.server.GlobalStateMgr;
|
||||
import com.starrocks.sql.common.MetaUtils;
|
||||
import com.starrocks.sql.optimizer.base.ColumnIdentifier;
|
||||
import com.starrocks.sql.optimizer.rule.tree.prunesubfield.SubfieldAccessPathNormalizer;
|
||||
import com.starrocks.thrift.TGlobalDict;
|
||||
import com.starrocks.thrift.TStatisticData;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
|
|
@ -48,6 +51,34 @@ import java.util.concurrent.Executor;
|
|||
|
||||
import static com.starrocks.statistic.StatisticExecutor.queryDictSync;
|
||||
|
||||
/**
|
||||
* CacheDictManager manages global dictionary caching for low cardinality string columns.
|
||||
* <p>
|
||||
* Global Dictionary Maintenance:
|
||||
* <p>
|
||||
* 1. Dictionary Collection:
|
||||
* - Dictionaries are collected from BE (Backend) nodes via queryDictSync()
|
||||
* - Only columns with cardinality <= LOW_CARDINALITY_THRESHOLD are considered
|
||||
* - Dictionary data size must be <= 1MB to ensure BE can generate dictionary pages after compaction
|
||||
* <p>
|
||||
* 2. Cache Management:
|
||||
* - Uses Caffeine AsyncLoadingCache with maximum size from Config.statistic_dict_columns
|
||||
* - Cache entries are keyed by ColumnIdentifier (tableId + columnName)
|
||||
* - Cache automatically loads dictionaries asynchronously when accessed
|
||||
* <p>
|
||||
* 3. Version Control:
|
||||
* - Each dictionary has a version timestamp for consistency checking
|
||||
* - Dictionaries are invalidated when versions become outdated
|
||||
* - Version mismatches trigger dictionary removal and re-collection
|
||||
* <p>
|
||||
* 4. Data Update Handling:
|
||||
* - When data is updated (INSERT/UPDATE/DELETE), the dictionary version becomes stale
|
||||
* - hasGlobalDict() checks version timestamps and invalidates outdated dictionaries
|
||||
* - updateGlobalDict() updates version timestamps when new data is collected
|
||||
* - Version mismatches between collectVersion and dictCollectVersion trigger removal
|
||||
* - During data updates, queries may fall back to non-dictionary optimization temporarily
|
||||
* - New dictionaries are automatically collected when cache is accessed after invalidation
|
||||
*/
|
||||
public class CacheDictManager implements IDictManager, MemoryTrackable {
|
||||
private static final Logger LOG = LogManager.getLogger(CacheDictManager.class);
|
||||
private static final Set<ColumnIdentifier> NO_DICT_STRING_COLUMNS = Sets.newConcurrentHashSet();
|
||||
|
|
@ -60,7 +91,7 @@ public class CacheDictManager implements IDictManager, MemoryTrackable {
|
|||
|
||||
private static final CacheDictManager INSTANCE = new CacheDictManager();
|
||||
|
||||
protected static CacheDictManager getInstance() {
|
||||
public static CacheDictManager getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
|
|
@ -235,6 +266,28 @@ public class CacheDictManager implements IDictManager, MemoryTrackable {
|
|||
|
||||
LOG.info("remove dict for table:{} column:{}", tableId, columnName);
|
||||
dictStatistics.synchronous().invalidate(columnIdentifier);
|
||||
|
||||
// Remove all subfields' dicts if the column is a JSON type
|
||||
try {
|
||||
List<String> parts = SubfieldAccessPathNormalizer.parseSimpleJsonPath(columnName.getId());
|
||||
if (parts.size() > 1) {
|
||||
String rootColumn = parts.get(0);
|
||||
columnIdentifier = new ColumnIdentifier(tableId, ColumnId.create(rootColumn));
|
||||
|
||||
// Set dbId for the columnIdentifier to enable MetaUtils.getColumnByColumnId lookup
|
||||
long dbId = MetaUtils.lookupDbIdByTableId(tableId);
|
||||
if (dbId != -1) {
|
||||
columnIdentifier.setDbId(dbId);
|
||||
Column column = MetaUtils.getColumnByColumnId(columnIdentifier);
|
||||
if (column.getType().isJsonType()) {
|
||||
removeGlobalDictForJson(tableId, column.getColumnId());
|
||||
}
|
||||
} else {
|
||||
LOG.debug("Could not find db id for table {}, skipping JSON subfield cleanup", tableId);
|
||||
}
|
||||
}
|
||||
} catch (Exception ignored) {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -298,6 +351,83 @@ public class CacheDictManager implements IDictManager, MemoryTrackable {
|
|||
return Optional.empty();
|
||||
}
|
||||
|
||||
public Optional<ColumnDict> getGlobalDictSync(long tableId, ColumnId columnName) {
|
||||
ColumnIdentifier columnIdentifier = new ColumnIdentifier(tableId, columnName);
|
||||
|
||||
// NOTE: it's used to patch the dbId, because asyncLoad requires the dbId
|
||||
long dbId = MetaUtils.lookupDbIdByTableId(tableId);
|
||||
if (dbId == -1) {
|
||||
throw new RuntimeException("table not found " + tableId);
|
||||
}
|
||||
columnIdentifier.setDbId(dbId);
|
||||
|
||||
CompletableFuture<Optional<ColumnDict>> columnFuture = dictStatistics.get(columnIdentifier);
|
||||
try {
|
||||
return columnFuture.get();
|
||||
} catch (Exception e) {
|
||||
LOG.warn(String.format("get dict cache for %d: %s failed", tableId, columnName), e);
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
// TODO(murphy) support an efficient dict cache for JSON subfields
|
||||
@Override
|
||||
public boolean hasGlobalDictForJson(long tableId, ColumnId columnName) {
|
||||
// TODO(murphy) optimizer performance
|
||||
return dictStatistics.asMap().keySet().stream().anyMatch(column -> {
|
||||
if (column.getTableId() != tableId) {
|
||||
return false;
|
||||
}
|
||||
List<String> parts = SubfieldAccessPathNormalizer.parseSimpleJsonPath(column.getColumnName().getId());
|
||||
if (parts.size() > 1 && parts.get(0).equalsIgnoreCase(columnName.getId())) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
// TODO(murphy) support an efficient dict cache for JSON subfields
|
||||
@Override
|
||||
public List<ColumnDict> getGlobalDictForJson(long tableId, ColumnId columnName) {
|
||||
// TODO(murphy) optimizer performance
|
||||
return dictStatistics.synchronous().asMap().entrySet().stream().filter(kv -> {
|
||||
ColumnIdentifier column = kv.getKey();
|
||||
if (column.getTableId() != tableId) {
|
||||
return false;
|
||||
}
|
||||
List<String> parts = SubfieldAccessPathNormalizer.parseSimpleJsonPath(column.getColumnName().getId());
|
||||
if (parts.size() > 1 && parts.get(0).equalsIgnoreCase(columnName.getId())) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}).flatMap(x -> x.getValue().stream()).toList();
|
||||
}
|
||||
|
||||
// TODO(murphy) support an efficient dict cache for JSON subfields
|
||||
@Override
|
||||
public void removeGlobalDictForJson(long tableId, ColumnId columnName) {
|
||||
if (GlobalStateMgr.isCheckpointThread()) {
|
||||
return;
|
||||
}
|
||||
// TODO(murphy) optimizer performance
|
||||
// Remove all global dict entries for JSON subfields of the given column in the given table.
|
||||
// This is used to invalidate all dicts for subfields of a JSON column (e.g., c2.f1, c2.f2, etc.)
|
||||
List<ColumnIdentifier> toRemove = new ArrayList<>();
|
||||
for (ColumnIdentifier column : dictStatistics.asMap().keySet()) {
|
||||
if (column.getTableId() != tableId) {
|
||||
continue;
|
||||
}
|
||||
List<String> parts = SubfieldAccessPathNormalizer.parseSimpleJsonPath(column.getColumnName().getId());
|
||||
if (parts.size() > 1 && parts.get(0).equalsIgnoreCase(columnName.getId())) {
|
||||
toRemove.add(column);
|
||||
}
|
||||
}
|
||||
for (ColumnIdentifier column : toRemove) {
|
||||
dictStatistics.synchronous().invalidate(column);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Map<String, Long> estimateCount() {
|
||||
return ImmutableMap.of("ColumnDict", (long) dictStatistics.asMap().size());
|
||||
|
|
|
|||
|
|
@ -16,7 +16,9 @@ package com.starrocks.sql.optimizer.statistics;
|
|||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.gson.Gson;
|
||||
import com.starrocks.common.Config;
|
||||
import com.starrocks.persist.gson.GsonUtils;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
|
|
@ -46,4 +48,26 @@ public final class ColumnDict extends StatsVersion {
|
|||
public int getDictSize() {
|
||||
return dict.size();
|
||||
}
|
||||
|
||||
public String toJson() {
|
||||
Gson gson = GsonUtils.GSON;
|
||||
// Manually build a JSON object with all fields
|
||||
// Convert ByteBuffer keys to base64 strings for JSON compatibility
|
||||
java.util.Map<String, Integer> dictMap = new java.util.HashMap<>();
|
||||
for (java.util.Map.Entry<ByteBuffer, Integer> entry : dict.entrySet()) {
|
||||
ByteBuffer key = entry.getKey();
|
||||
// Duplicate to avoid changing position
|
||||
ByteBuffer dup = key.duplicate();
|
||||
byte[] bytes = new byte[dup.remaining()];
|
||||
dup.get(bytes);
|
||||
// Convert bytes to string using UTF-8 encoding
|
||||
String strKey = new String(bytes, java.nio.charset.StandardCharsets.UTF_8);
|
||||
dictMap.put(strKey, entry.getValue());
|
||||
}
|
||||
java.util.Map<String, Object> jsonMap = new java.util.HashMap<>();
|
||||
jsonMap.put("dict", dictMap);
|
||||
jsonMap.put("collectedVersion", collectedVersion);
|
||||
jsonMap.put("version", version);
|
||||
return gson.toJson(jsonMap);
|
||||
}
|
||||
}
|
||||
|
|
@ -18,6 +18,7 @@ package com.starrocks.sql.optimizer.statistics;
|
|||
import com.starrocks.catalog.ColumnId;
|
||||
import com.starrocks.common.FeConstants;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
public interface IDictManager {
|
||||
|
|
@ -36,6 +37,24 @@ public interface IDictManager {
|
|||
// You should call `hasGlobalDict` firstly to ensure the global dict exist
|
||||
Optional<ColumnDict> getGlobalDict(long tableId, ColumnId columnName);
|
||||
|
||||
/**
|
||||
* For JSON type column, we use a different dict manager
|
||||
* For example, '{"f1": "a", "f2": "b"}' will be stored as multiple entries in the cache.
|
||||
* Cache Invalidation: All fields in the JSON will be invalidated when the JSON is updated
|
||||
* Cache Population: the cache is populated when the JSON field is first accessed
|
||||
* TODO(murphy) invalidate individual json field when the JSON is updated
|
||||
*/
|
||||
default boolean hasGlobalDictForJson(long tableId, ColumnId columnName) {
|
||||
return false;
|
||||
}
|
||||
|
||||
default List<ColumnDict> getGlobalDictForJson(long tableId, ColumnId columnName) {
|
||||
return List.of();
|
||||
}
|
||||
|
||||
default void removeGlobalDictForJson(long tableId, ColumnId columnName) {
|
||||
}
|
||||
|
||||
static IDictManager getInstance() {
|
||||
if (FeConstants.USE_MOCK_DICT_MANAGER) {
|
||||
return MockDictManager.getInstance();
|
||||
|
|
|
|||
|
|
@ -825,7 +825,8 @@ public class PlanFragmentBuilder {
|
|||
.collect(Collectors.toSet());
|
||||
if (scan.getPredicate() == null) {
|
||||
// remove path which has pruned
|
||||
return scan.getColumnAccessPaths().stream().filter(p -> checkNames.contains(p.getPath()))
|
||||
return scan.getColumnAccessPaths().stream()
|
||||
.filter(p -> checkNames.contains(p.getPath()) || p.isExtended())
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
|
@ -1031,6 +1032,7 @@ public class PlanFragmentBuilder {
|
|||
scan.getSelectPartitionNames(),
|
||||
context.getConnectContext().getCurrentComputeResource());
|
||||
|
||||
scanNode.setColumnAccessPaths(scan.getColumnAccessPaths());
|
||||
scanNode.computeRangeLocations(computeResource);
|
||||
scanNode.computeStatistics(optExpression.getStatistics());
|
||||
currentExecGroup.add(scanNode, true);
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ import com.starrocks.catalog.Column;
|
|||
import com.starrocks.catalog.ColumnId;
|
||||
import com.starrocks.catalog.Database;
|
||||
import com.starrocks.catalog.InternalCatalog;
|
||||
import com.starrocks.catalog.OlapTable;
|
||||
import com.starrocks.catalog.PhysicalPartition;
|
||||
import com.starrocks.catalog.Table;
|
||||
import com.starrocks.catalog.Type;
|
||||
|
|
@ -48,6 +47,7 @@ import com.starrocks.sql.ast.StatementBase;
|
|||
import com.starrocks.sql.common.ErrorType;
|
||||
import com.starrocks.sql.common.MetaUtils;
|
||||
import com.starrocks.sql.common.StarRocksPlannerException;
|
||||
import com.starrocks.sql.optimizer.rule.tree.prunesubfield.SubfieldAccessPathNormalizer;
|
||||
import com.starrocks.sql.optimizer.statistics.CacheDictManager;
|
||||
import com.starrocks.sql.optimizer.statistics.IRelaxDictManager;
|
||||
import com.starrocks.sql.parser.SqlParser;
|
||||
|
|
@ -322,15 +322,46 @@ public class StatisticExecutor {
|
|||
table.getName());
|
||||
}
|
||||
|
||||
OlapTable olapTable = (OlapTable) table;
|
||||
long version = table.getPartitions().stream().flatMap(p -> p.getSubPartitions().stream()).map(
|
||||
PhysicalPartition::getVisibleVersionTime).max(Long::compareTo).orElse(0L);
|
||||
String columnName = MetaUtils.getColumnNameByColumnId(dbId, tableId, columnId);
|
||||
List<String> pieces = SubfieldAccessPathNormalizer.parseSimpleJsonPath(columnId.getId());
|
||||
if (pieces.size() == 1) {
|
||||
String columnName = MetaUtils.getColumnNameByColumnId(dbId, tableId, columnId);
|
||||
String catalogName = InternalCatalog.DEFAULT_INTERNAL_CATALOG_NAME;
|
||||
String sql = "select cast(" + StatsConstants.STATISTIC_DICT_VERSION + " as Int), " +
|
||||
"cast(" + version + " as bigint), " +
|
||||
"dict_merge(" + StatisticUtils.quoting(columnName) + ", " +
|
||||
CacheDictManager.LOW_CARDINALITY_THRESHOLD + ") as _dict_merge_" + columnName +
|
||||
" from " + StatisticUtils.quoting(catalogName, db.getOriginName(), table.getName()) + " [_META_]";
|
||||
return executeStatisticDQLWithoutContext(sql);
|
||||
} else {
|
||||
return queryDictSyncForJson(dbId, tableId, columnId, version, db, table);
|
||||
}
|
||||
}
|
||||
|
||||
private static Pair<List<TStatisticData>, Status> queryDictSyncForJson(Long dbId, Long tableId,
|
||||
ColumnId columnId,
|
||||
long version,
|
||||
Database db, Table table) throws TException {
|
||||
List<String> pieces = SubfieldAccessPathNormalizer.parseSimpleJsonPath(columnId.getId());
|
||||
if (pieces.isEmpty()) {
|
||||
throw new RuntimeException("invalid json path: " + columnId);
|
||||
}
|
||||
ColumnId realColumnId = ColumnId.create(pieces.get(0));
|
||||
Column column = MetaUtils.getColumnByColumnId(dbId, tableId, realColumnId);
|
||||
if (!column.getType().equals(Type.JSON)) {
|
||||
throw new SemanticException("Column '%s' is not a JSON type", column.getName());
|
||||
}
|
||||
String fullPath = String.join(".", pieces);
|
||||
String path = pieces.stream().skip(1).collect(Collectors.joining("."));
|
||||
String columnRef = String.format("get_json_string(%s, '%s')", StatisticUtils.quoting(column.getName()), path);
|
||||
String catalogName = InternalCatalog.DEFAULT_INTERNAL_CATALOG_NAME;
|
||||
String columnAlias = fullPath.replace(".", "_");
|
||||
|
||||
String sql = "select cast(" + StatsConstants.STATISTIC_DICT_VERSION + " as Int), " +
|
||||
"cast(" + version + " as bigint), " +
|
||||
"dict_merge(" + StatisticUtils.quoting(columnName) + ", " +
|
||||
CacheDictManager.LOW_CARDINALITY_THRESHOLD + ") as _dict_merge_" + columnName +
|
||||
"dict_merge(" + columnRef + ", " +
|
||||
CacheDictManager.LOW_CARDINALITY_THRESHOLD + ") as _dict_merge_" + columnAlias +
|
||||
" from " + StatisticUtils.quoting(catalogName, db.getOriginName(), table.getName()) + " [_META_]";
|
||||
|
||||
return executeStatisticDQLWithoutContext(sql);
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
package com.starrocks.transaction;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.starrocks.catalog.Column;
|
||||
import com.starrocks.catalog.ColumnId;
|
||||
import com.starrocks.catalog.Database;
|
||||
import com.starrocks.catalog.LocalTablet;
|
||||
|
|
@ -207,6 +208,15 @@ public class OlapTableTxnLogApplier implements TransactionLogApplier {
|
|||
maxPartitionVersionTime = Math.max(maxPartitionVersionTime, versionTime);
|
||||
}
|
||||
|
||||
// TODO(murphy) don't invalidate all cache columns, only invalidate the columns that are changed
|
||||
// Invalidate the dict for JSON type
|
||||
List<Column> jsonColumns = table.getColumns().stream()
|
||||
.filter(x -> x.getType().isJsonType())
|
||||
.toList();
|
||||
for (Column column : jsonColumns) {
|
||||
IDictManager.getInstance().removeGlobalDictForJson(tableId, column.getColumnId());
|
||||
}
|
||||
|
||||
if (!GlobalStateMgr.isCheckpointThread() && dictCollectedVersions.size() == validDictCacheColumns.size()) {
|
||||
for (int i = 0; i < validDictCacheColumns.size(); i++) {
|
||||
ColumnId columnName = validDictCacheColumns.get(i);
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
package com.starrocks.sql.plan;
|
||||
|
||||
import com.starrocks.common.FeConstants;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.Arguments;
|
||||
|
|
@ -30,12 +31,20 @@ public class JsonPathRewriteTest extends PlanTestBase {
|
|||
starRocksAssert.withTable("create table extend_predicate( c1 int, c2 json ) properties('replication_num'='1')");
|
||||
starRocksAssert.withTable("create table extend_predicate2( c1 int, c2 json ) properties" +
|
||||
"('replication_num'='1')");
|
||||
|
||||
FeConstants.USE_MOCK_DICT_MANAGER = true;
|
||||
connectContext.getSessionVariable().setEnableLowCardinalityOptimize(true);
|
||||
connectContext.getSessionVariable().setUseLowCardinalityOptimizeV2(true);
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource("jsonPathRewriteTestCases")
|
||||
public void testExtendPredicateParameterized(String sql, String expectedPlanFragment, String expectedColumnPath)
|
||||
throws Exception {
|
||||
connectContext.getSessionVariable().setEnableJSONV2DictOpt(false);
|
||||
connectContext.getSessionVariable().setUseLowCardinalityOptimizeV2(false);
|
||||
connectContext.getSessionVariable().setEnableLowCardinalityOptimize(false);
|
||||
connectContext.getSessionVariable().setQueryTimeoutS(10000);
|
||||
String plan = getFragmentPlan(sql);
|
||||
assertContains(plan, expectedPlanFragment);
|
||||
|
||||
|
|
@ -45,46 +54,50 @@ public class JsonPathRewriteTest extends PlanTestBase {
|
|||
|
||||
private static Stream<Arguments> jsonPathRewriteTestCases() {
|
||||
return of(
|
||||
// Projection: JSON expression in SELECT list
|
||||
// [1] Projection: JSON expression in SELECT list
|
||||
Arguments.of(
|
||||
"select get_json_string(c2, 'f2') as f2_str from extend_predicate",
|
||||
" 1:Project\n | <slot 3> : 4: c2.f2\n",
|
||||
"ExtendedColumnAccessPath: [/c2(varchar)/f2(varchar)]"
|
||||
),
|
||||
// [2]
|
||||
Arguments.of(
|
||||
"select get_json_string(c2, '$.f2.f3') as f2_str from extend_predicate",
|
||||
" 1:Project\n | <slot 3> : 4: c2.f2.f3\n",
|
||||
"ExtendedColumnAccessPath: [/c2(varchar)/f2(varchar)/f3(varchar)]"
|
||||
),
|
||||
// [3]
|
||||
Arguments.of(
|
||||
"select get_json_string(c2, 'f1.f2.f3') as f2_str from extend_predicate",
|
||||
" 1:Project\n | <slot 3> : 4: c2.f1.f2.f3\n",
|
||||
"ExtendedColumnAccessPath: [/c2(varchar)/f1(varchar)/f2(varchar)/f3(varchar)]"
|
||||
),
|
||||
// [4]
|
||||
Arguments.of(
|
||||
"select get_json_string(c2, 'f1[1]') as f2_str from extend_predicate",
|
||||
" | <slot 3> : get_json_string(2: c2, 'f1[1]')\n",
|
||||
""
|
||||
),
|
||||
// TODO: support this case
|
||||
// [5]
|
||||
Arguments.of(
|
||||
"select get_json_string(c2, 'f1\".a\"') as f2_str from extend_predicate",
|
||||
" | <slot 3> : get_json_string(2: c2, 'f1\".a\"')\n",
|
||||
""
|
||||
),
|
||||
// Filter: JSON expression in WHERE clause with different function
|
||||
// [6] Filter: JSON expression in WHERE clause with different function
|
||||
Arguments.of(
|
||||
"select * from extend_predicate where get_json_double(c2, 'f3') > 1.5",
|
||||
"PREDICATES: 3: c2.f3 > 1.5",
|
||||
""
|
||||
),
|
||||
// Order By: JSON expression in ORDER BY
|
||||
// [7] Order By: JSON expression in ORDER BY
|
||||
Arguments.of(
|
||||
"select * from extend_predicate order by get_json_string(c2, 'f4')",
|
||||
"order by: <slot 3> 3: get_json_string ASC",
|
||||
""
|
||||
),
|
||||
// Aggregation: JSON expression in GROUP BY
|
||||
// [8] Aggregation: JSON expression in GROUP BY
|
||||
Arguments.of(
|
||||
"select get_json_int(c2, 'f5'), count(*) from extend_predicate group by get_json_int(c2, 'f5')",
|
||||
" 2:AGGREGATE (update finalize)\n" +
|
||||
|
|
@ -95,7 +108,7 @@ public class JsonPathRewriteTest extends PlanTestBase {
|
|||
" | <slot 3> : 5: c2.f5\n",
|
||||
"ExtendedColumnAccessPath: [/c2(bigint(20))/f5(bigint(20))]"
|
||||
),
|
||||
// Aggregation: JSON expression in aggregation function
|
||||
// [9] Aggregation: JSON expression in aggregation function
|
||||
Arguments.of(
|
||||
"select sum(get_json_int(c2, 'f6')) from extend_predicate",
|
||||
" 2:AGGREGATE (update finalize)\n" +
|
||||
|
|
@ -106,23 +119,23 @@ public class JsonPathRewriteTest extends PlanTestBase {
|
|||
" | <slot 3> : 5: c2.f6\n",
|
||||
"ExtendedColumnAccessPath: [/c2(bigint(20))/f6(bigint(20))]"
|
||||
),
|
||||
// Join: JSON expression in join condition
|
||||
// [10] Join: JSON expression in join condition
|
||||
Arguments.of(
|
||||
"select * from extend_predicate t1 join extend_predicate2 t2 on get_json_int(t1.c2, 'f7') = " +
|
||||
"get_json_int(t2.c2, 'f7')",
|
||||
" | <slot 5> : 9: c2.f7\n",
|
||||
" | <slot 5> : 10: c2.f7\n",
|
||||
""
|
||||
),
|
||||
// Join: JSON expression in projection after join
|
||||
// [11] Join: JSON expression in projection after join
|
||||
Arguments.of(
|
||||
"select get_json_string(t1.c2, 'f8'), get_json_string(t2.c2, 'f8') from extend_predicate t1 " +
|
||||
"join extend_predicate2 t2 on t1.c1 = t2.c1",
|
||||
" 3:Project\n" +
|
||||
" | <slot 3> : 3: c1\n" +
|
||||
" | <slot 8> : 10: c2.f8\n",
|
||||
" | <slot 8> : 9: c2.f8\n",
|
||||
"ExtendedColumnAccessPath: [/c2(varchar)/f8(varchar)]"
|
||||
),
|
||||
// Join: self-join with JSON predicate pushdown
|
||||
// [12] Join: self-join with JSON predicate pushdown
|
||||
Arguments.of(
|
||||
"""
|
||||
select t1.c2
|
||||
|
|
@ -134,7 +147,7 @@ public class JsonPathRewriteTest extends PlanTestBase {
|
|||
"c2.f2",
|
||||
"ExtendedColumnAccessPath: [/c2(bigint(20))/f2(bigint(20))]"
|
||||
),
|
||||
// Join: self-join with JSON projection pushdown
|
||||
// [13] Join: self-join with JSON projection pushdown
|
||||
Arguments.of(
|
||||
"""
|
||||
select get_json_int(t1.c2 , 'f2'), get_json_int(t2.c2, 'f2')
|
||||
|
|
@ -145,7 +158,7 @@ public class JsonPathRewriteTest extends PlanTestBase {
|
|||
"c2.f2",
|
||||
"ExtendedColumnAccessPath: [/c2(bigint(20))/f2(bigint(20))]"
|
||||
),
|
||||
// Join: self-join without JSON expression pushdown
|
||||
// [14] Join: self-join without JSON expression pushdown
|
||||
Arguments.of(
|
||||
"""
|
||||
select t1.c2
|
||||
|
|
@ -156,40 +169,145 @@ public class JsonPathRewriteTest extends PlanTestBase {
|
|||
"get_json_int",
|
||||
"equal join conjunct: [5: get_json_int, BIGINT, true] = [6: get_json_int, BIGINT, true]"
|
||||
),
|
||||
// JSON expression in HAVING clause
|
||||
// [15] JSON expression in HAVING clause
|
||||
Arguments.of(
|
||||
"select get_json_int(c2, 'f9'), count(*) from extend_predicate group by get_json_int(c2, " +
|
||||
"'f9') having get_json_int(c2, 'f9') > 10",
|
||||
" 1:Project\n" +
|
||||
" | <slot 3> : 5: c2.f9\n",
|
||||
"""
|
||||
select get_json_int(c2, 'f9'), count(*) from extend_predicate
|
||||
group by get_json_int(c2, 'f9')
|
||||
having get_json_int(c2, 'f9') > 10
|
||||
""",
|
||||
" 1:Project\n | <slot 3> : 5: c2.f9\n",
|
||||
"ExtendedColumnAccessPath: [/c2(bigint(20))/f9(bigint(20))]"
|
||||
),
|
||||
// JSON expression in complex filter (AND/OR)
|
||||
// [16] JSON expression in complex filter (AND/OR)
|
||||
Arguments.of(
|
||||
"select * from extend_predicate where get_json_int(c2, 'f10') = 1 or get_json_string(c2, " +
|
||||
"'f11') = 'abc'",
|
||||
"PREDICATES: (3: c2.f10 = 1) OR (4: c2.f11 = 'abc')",
|
||||
" ExtendedColumnAccessPath: [/c2(bigint(20))/f10(bigint(20)), /c2(varchar)/f11(varchar)]"
|
||||
),
|
||||
// JSON expression in nested function
|
||||
// [17] JSON expression in nested function
|
||||
Arguments.of(
|
||||
"select abs(get_json_int(c2, 'f12')) from extend_predicate",
|
||||
" <slot 3> : abs(4: c2.f12)",
|
||||
"ExtendedColumnAccessPath: [/c2(bigint(20))/f12(bigint(20))]"
|
||||
),
|
||||
// JSON path exceeding depth limit should not be rewritten
|
||||
// [18] JSON path exceeding depth limit should not be rewritten
|
||||
Arguments.of(
|
||||
"select get_json_string(c2, 'f1.f2.f3.f4.f5.f6.f7.f8.f9.f10.f11.f12.f13.f14.f15.f16.f17.f18" +
|
||||
".f19.f20.f21') from extend_predicate",
|
||||
" | <slot 3> : get_json_string(2: c2, 'f1.f2.f3.f4.f5.f6.f7.f8.f9.f10.f11.f12.f13.f14.f1.." +
|
||||
".')",
|
||||
"<slot 3> : 4: c2.f1.f2.f3.f4.f5.f6.f7.f8.f9.f10.f11.f12.f13.f14.f15.f16.f17.f18.f19.f20.f21\n",
|
||||
""
|
||||
),
|
||||
// JSON path with mixed data types should not be rewritten
|
||||
// [19] JSON path with mixed data types should not be rewritten
|
||||
Arguments.of(
|
||||
"select get_json_string(c2, 'f2'), get_json_int(c2, 'f2') from extend_predicate",
|
||||
"get_json_string(2: c2, 'f2')",
|
||||
""
|
||||
),
|
||||
// [20] MetaScan
|
||||
Arguments.of(
|
||||
"select dict_merge(get_json_string(c2, 'f1'), 255) from extend_predicate [_META_]",
|
||||
"0:MetaScan\n" +
|
||||
" Table: extend_predicate\n" +
|
||||
" <id 6> : dict_merge_c2.f1\n",
|
||||
" ExtendedColumnAccessPath: [/c2(varchar)/f1(varchar)]\n"
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource("globalDictRewriteCases")
|
||||
public void testGlobalDictRewrite(String sql, String expectedPlanFragment) throws Exception {
|
||||
connectContext.getSessionVariable().setEnableJSONV2DictOpt(true);
|
||||
connectContext.getSessionVariable().setUseLowCardinalityOptimizeV2(true);
|
||||
connectContext.getSessionVariable().setEnableLowCardinalityOptimize(true);
|
||||
String verbosePlan = getVerboseExplain(sql);
|
||||
assertContains(verbosePlan, expectedPlanFragment);
|
||||
}
|
||||
|
||||
private static Stream<Arguments> globalDictRewriteCases() {
|
||||
return of(
|
||||
// 1. Predicate
|
||||
Arguments.of(
|
||||
"select count(*) from extend_predicate2 where get_json_string(c2, 'f1') = 'value'",
|
||||
"Predicates: DictDecode(7: c2.f1, [<place-holder> = 'value'])"
|
||||
),
|
||||
// 2. Predicate with count on get_json_string
|
||||
Arguments.of(
|
||||
"select count(get_json_string(c2, 'f1')) from extend_predicate2 where get_json_string(c2, " +
|
||||
"'f1') = 'value'",
|
||||
"Predicates: DictDecode(6: c2.f1, [<place-holder> = 'value'])"
|
||||
),
|
||||
|
||||
// 3. Aggregation with group by JSON path
|
||||
Arguments.of(
|
||||
"select get_json_string(c2, 'f1') k1, count(*) from extend_predicate2 group by k1",
|
||||
"7 <-> DictDefine(6: c2.f1, [<place-holder>])"
|
||||
),
|
||||
// 4. Aggregation with min on JSON path
|
||||
Arguments.of(
|
||||
"select min(get_json_string(c2, 'f1')) from extend_predicate2",
|
||||
" 7 <-> DictDefine(6: c2.f1, [<place-holder>])"
|
||||
),
|
||||
|
||||
// 5. Multiple JSON path accesses in select and where clause
|
||||
Arguments.of(
|
||||
"select get_json_string(c2, 'f1'), get_json_string(c2, 'f2') from extend_predicate2 where " +
|
||||
"get_json_string(c2, 'f1') = 'value' and get_json_string(c2, 'f2') = 'other'",
|
||||
"Predicates: DictDecode(7: c2.f1, [<place-holder> = 'value']), DictDecode(8: c2.f2, " +
|
||||
"[<place-holder> = 'other'])"
|
||||
),
|
||||
// 6. Aggregation on JSON path
|
||||
Arguments.of(
|
||||
"select get_json_string(c2, 'f3'), sum(c1) from extend_predicate2 group by get_json_string" +
|
||||
"(c2, 'f3')",
|
||||
" 7 <-> DictDefine(6: c2.f3, [<place-holder>])"
|
||||
),
|
||||
// 7. Nested function using JSON path
|
||||
Arguments.of(
|
||||
"select upper(get_json_string(c2, 'f4')) from extend_predicate2",
|
||||
" 3 <-> DictDecode(5: c2.f4, [upper(<place-holder>)])"
|
||||
),
|
||||
// 8. JSON path in having clause
|
||||
Arguments.of(
|
||||
"select get_json_string(c2, 'f5'), count(*) from extend_predicate2 group by get_json_string" +
|
||||
"(c2, 'f5') having get_json_string(c2, 'f5') = 'foo'",
|
||||
"7 <-> DictDefine(6: c2.f5, [<place-holder>])"
|
||||
),
|
||||
// 9. JSON path in order by
|
||||
Arguments.of(
|
||||
"select get_json_string(c2, 'f6') from extend_predicate2 order by get_json_string(c2, 'f6')",
|
||||
" 6: DictDefine(5: c2.f6, [<place-holder>])"
|
||||
),
|
||||
// 10. JSON path in both select and where, different fields
|
||||
Arguments.of(
|
||||
"select get_json_string(c2, 'f7') from extend_predicate2 where get_json_string(c2, 'f8') = " +
|
||||
"'bar'",
|
||||
"3 <-> DictDecode(6: c2.f7, [<place-holder>])"
|
||||
),
|
||||
// 11. JSON path in a function argument
|
||||
Arguments.of(
|
||||
"select length(get_json_string(c2, 'f9')) from extend_predicate2",
|
||||
"3 <-> DictDecode(5: c2.f9, [length(<place-holder>)])"
|
||||
),
|
||||
// 12. JSON path in a filter with OR
|
||||
Arguments.of(
|
||||
"select count(*) from extend_predicate2 where get_json_string(c2, 'f10') = 'a' or " +
|
||||
"get_json_string(c2, 'f11') = 'b'",
|
||||
" Predicates: (DictDecode(8: c2.f10, [<place-holder> = 'a'])) OR (DictDecode(9: c2.f11, " +
|
||||
"[<place-holder> = 'b']))"
|
||||
),
|
||||
// 13. JSON path in a subquery
|
||||
Arguments.of(
|
||||
"select * from extend_predicate2 where c1 in (select c1 from extend_predicate2 where " +
|
||||
"get_json_string(c2, 'f12') = 'baz')",
|
||||
"Predicates: 3: c1 IS NOT NULL, DictDecode(7: c2.f12, [<place-holder> = 'baz'])"
|
||||
),
|
||||
// 14. JSON path in a case expression
|
||||
Arguments.of(
|
||||
"select case when get_json_string(c2, 'f13') = 'x' then 1 else 0 end from extend_predicate2",
|
||||
"3 <-> DictDecode(5: c2.f13, [if(<place-holder> = 'x', 1, 0)])"
|
||||
)
|
||||
|
||||
);
|
||||
|
|
|
|||
|
|
@ -125,19 +125,6 @@ class ColumnUsageTest extends PlanTestBase {
|
|||
"|analyze table t0 predicate columns|v1",
|
||||
"select v1, count(v2) from (select * from t0 order by v1 limit 100) r group by v1" +
|
||||
"|analyze table t0 predicate columns|v1",
|
||||
"select case when get_json_string(v_json, 'a') > 1 " +
|
||||
" then get_json_string(v_json, 'b') else 'small' end as v1_case, " +
|
||||
"count(*) from tjson " +
|
||||
"group by 1" +
|
||||
"|analyze table tjson predicate columns|v_json",
|
||||
"select case when get_json_string(vvv, 'a') > 1 " +
|
||||
" then get_json_string(vvv, 'b') else 'small' end as v1_case, " +
|
||||
"count(*) from (" +
|
||||
" select json_object('a', get_json_string(v_json, 'a'), " +
|
||||
" 'b', get_json_int(v_json, 'b')) as vvv " +
|
||||
" from tjson) r " +
|
||||
"group by 1" +
|
||||
"|analyze table tjson predicate columns|v_json",
|
||||
|
||||
// with join
|
||||
"select * from t0 join t1 on t0.v1 = t1.v4" +
|
||||
|
|
|
|||
|
|
@ -0,0 +1,480 @@
|
|||
-- name: test_normal_flat_json_dict @sequential
|
||||
update information_schema.be_configs set value = 'true' where name = 'enable_json_flat';
|
||||
-- result:
|
||||
-- !result
|
||||
set enable_profile = true;
|
||||
-- result:
|
||||
-- !result
|
||||
set enable_async_profile = false;
|
||||
-- result:
|
||||
-- !result
|
||||
set cbo_json_v2_rewrite = true;
|
||||
-- result:
|
||||
-- !result
|
||||
set cbo_json_v2_dict_opt = true;
|
||||
-- result:
|
||||
-- !result
|
||||
create view profile_decode as
|
||||
select trim(unnest)
|
||||
from table(unnest(split(get_query_profile(last_query_id()), '\n')))
|
||||
where unnest like '%DICT_DECODE%'
|
||||
order by unnest;
|
||||
-- result:
|
||||
-- !result
|
||||
CREATE TABLE js2 (
|
||||
v1 BIGINT NULL,
|
||||
c1 JSON NULL
|
||||
)
|
||||
DUPLICATE KEY (v1)
|
||||
DISTRIBUTED BY HASH(`v1`) BUCKETS 1
|
||||
PROPERTIES ( "replication_num" = "1" );
|
||||
-- result:
|
||||
-- !result
|
||||
insert into js2
|
||||
select
|
||||
generate_series,
|
||||
json_object(
|
||||
'f1', concat('a', generate_series % 10),
|
||||
'f2', concat('a', generate_series % 100),
|
||||
'f3', concat('a', generate_series % 200),
|
||||
'f4', concat('a', generate_series % 500)
|
||||
)
|
||||
from (table(generate_series(1, 1000)));
|
||||
-- result:
|
||||
-- !result
|
||||
select dict_merge(get_json_string(c1, 'f1'), 255) from js2 [_META_];
|
||||
-- result:
|
||||
{"2":{"lst":["str",10,"YTA","YTE","YTI","YTM","YTQ","YTU","YTY","YTc","YTg","YTk"]},"3":{"lst":["i32",10,1,2,3,4,5,6,7,8,9,10]}}
|
||||
-- !result
|
||||
select dict_merge(get_json_string(c1, 'f2'), 255) from js2 [_META_];
|
||||
-- result:
|
||||
{"2":{"lst":["str",100,"YTA","YTE","YTEw","YTEx","YTEy","YTEz","YTE0","YTE1","YTE2","YTE3","YTE4","YTE5","YTI","YTIw","YTIx","YTIy","YTIz","YTI0","YTI1","YTI2","YTI3","YTI4","YTI5","YTM","YTMw","YTMx","YTMy","YTMz","YTM0","YTM1","YTM2","YTM3","YTM4","YTM5","YTQ","YTQw","YTQx","YTQy","YTQz","YTQ0","YTQ1","YTQ2","YTQ3","YTQ4","YTQ5","YTU","YTUw","YTUx","YTUy","YTUz","YTU0","YTU1","YTU2","YTU3","YTU4","YTU5","YTY","YTYw","YTYx","YTYy","YTYz","YTY0","YTY1","YTY2","YTY3","YTY4","YTY5","YTc","YTcw","YTcx","YTcy","YTcz","YTc0","YTc1","YTc2","YTc3","YTc4","YTc5","YTg","YTgw","YTgx","YTgy","YTgz","YTg0","YTg1","YTg2","YTg3","YTg4","YTg5","YTk","YTkw","YTkx","YTky","YTkz","YTk0","YTk1","YTk2","YTk3","YTk4","YTk5"]},"3":{"lst":["i32",100,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100]}}
|
||||
-- !result
|
||||
select dict_merge(get_json_string(c1, 'f3'), 255) from js2 [_META_];
|
||||
-- result:
|
||||
{"2":{"lst":["str",200,"YTA","YTE","YTEw","YTEwMA","YTEwMQ","YTEwMg","YTEwMw","YTEwNA","YTEwNQ","YTEwNg","YTEwNw","YTEwOA","YTEwOQ","YTEx","YTExMA","YTExMQ","YTExMg","YTExMw","YTExNA","YTExNQ","YTExNg","YTExNw","YTExOA","YTExOQ","YTEy","YTEyMA","YTEyMQ","YTEyMg","YTEyMw","YTEyNA","YTEyNQ","YTEyNg","YTEyNw","YTEyOA","YTEyOQ","YTEz","YTEzMA","YTEzMQ","YTEzMg","YTEzMw","YTEzNA","YTEzNQ","YTEzNg","YTEzNw","YTEzOA","YTEzOQ","YTE0","YTE0MA","YTE0MQ","YTE0Mg","YTE0Mw","YTE0NA","YTE0NQ","YTE0Ng","YTE0Nw","YTE0OA","YTE0OQ","YTE1","YTE1MA","YTE1MQ","YTE1Mg","YTE1Mw","YTE1NA","YTE1NQ","YTE1Ng","YTE1Nw","YTE1OA","YTE1OQ","YTE2","YTE2MA","YTE2MQ","YTE2Mg","YTE2Mw","YTE2NA","YTE2NQ","YTE2Ng","YTE2Nw","YTE2OA","YTE2OQ","YTE3","YTE3MA","YTE3MQ","YTE3Mg","YTE3Mw","YTE3NA","YTE3NQ","YTE3Ng","YTE3Nw","YTE3OA","YTE3OQ","YTE4","YTE4MA","YTE4MQ","YTE4Mg","YTE4Mw","YTE4NA","YTE4NQ","YTE4Ng","YTE4Nw","YTE4OA","YTE4OQ","YTE5","YTE5MA","YTE5MQ","YTE5Mg","YTE5Mw","YTE5NA","YTE5NQ","YTE5Ng","YTE5Nw","YTE5OA","YTE5OQ","YTI","YTIw","YTIx","YTIy","YTIz","YTI0","YTI1","YTI2","YTI3","YTI4","YTI5","YTM","YTMw","YTMx","YTMy","YTMz","YTM0","YTM1","YTM2","YTM3","YTM4","YTM5","YTQ","YTQw","YTQx","YTQy","YTQz","YTQ0","YTQ1","YTQ2","YTQ3","YTQ4","YTQ5","YTU","YTUw","YTUx","YTUy","YTUz","YTU0","YTU1","YTU2","YTU3","YTU4","YTU5","YTY","YTYw","YTYx","YTYy","YTYz","YTY0","YTY1","YTY2","YTY3","YTY4","YTY5","YTc","YTcw","YTcx","YTcy","YTcz","YTc0","YTc1","YTc2","YTc3","YTc4","YTc5","YTg","YTgw","YTgx","YTgy","YTgz","YTg0","YTg1","YTg2","YTg3","YTg4","YTg5","YTk","YTkw","YTkx","YTky","YTkz","YTk0","YTk1","YTk2","YTk3","YTk4","YTk5"]},"3":{"lst":["i32",200,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200]}}
|
||||
-- !result
|
||||
select dict_merge(get_json_string(c1, 'f4'), 255) from js2 [_META_];
|
||||
-- result:
|
||||
[REGEX]E: \(1064, 'global dict size:500 greater than low_cardinality_threshold:255: BE:.*'\)
|
||||
-- !result
|
||||
select dict_merge(get_json_string(c1, 'f5'), 255) from js2 [_META_];
|
||||
-- result:
|
||||
[REGEX]E: \(1064, 'no global dict: BE:.*'\)
|
||||
-- !result
|
||||
insert into js2
|
||||
select
|
||||
generate_series,
|
||||
json_object(
|
||||
'f1', generate_series % 10, -- integer
|
||||
'f2', cast((generate_series % 20) * 1.5 as double), -- float
|
||||
'f3', if(generate_series % 2 = 0, true, false), -- boolean
|
||||
'f4', null, -- null
|
||||
'f5', json_array(generate_series % 5, 'arr', 1.23) -- array
|
||||
)
|
||||
from (table(generate_series(1, 100)));
|
||||
-- result:
|
||||
-- !result
|
||||
select dict_merge(get_json_string(c1, 'f1'), 255) from js2 [_META_];
|
||||
-- result:
|
||||
[REGEX]E: \(1064, 'no global dict: BE:.*'\)
|
||||
-- !result
|
||||
select dict_merge(get_json_string(c1, 'f2'), 255) from js2 [_META_];
|
||||
-- result:
|
||||
[REGEX]E: \(1064, 'no global dict: BE:.*'\)
|
||||
-- !result
|
||||
select dict_merge(get_json_string(c1, 'f3'), 255) from js2 [_META_];
|
||||
-- result:
|
||||
[REGEX]E: \(1064, 'no global dict: BE:.*'\)
|
||||
-- !result
|
||||
select dict_merge(get_json_string(c1, 'f4'), 255) from js2 [_META_];
|
||||
-- result:
|
||||
[REGEX]E: \(1064, 'global dict size:500 greater than low_cardinality_threshold:255: BE:.*'\)
|
||||
-- !result
|
||||
select dict_merge(get_json_string(c1, 'f5'), 255) from js2 [_META_];
|
||||
-- result:
|
||||
[REGEX]E: \(1064, 'no global dict: BE:.*'\)
|
||||
-- !result
|
||||
truncate table js2;
|
||||
-- result:
|
||||
-- !result
|
||||
insert into js2
|
||||
select
|
||||
generate_series,
|
||||
json_object(
|
||||
'f1', concat('a', generate_series % 10),
|
||||
'f2', concat('a', generate_series % 100),
|
||||
'f3', concat('a', generate_series % 200),
|
||||
'f4', concat('a', generate_series % 500)
|
||||
)
|
||||
from (table(generate_series(1, 1000)));
|
||||
-- result:
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1'), 'dict');
|
||||
-- result:
|
||||
{"a0": 1, "a1": 2, "a10": 3, "a100": 4, "a101": 5, "a102": 6, "a103": 7, "a104": 8, "a105": 9, "a106": 10, "a107": 11, "a108": 12, "a109": 13, "a11": 14, "a110": 15, "a111": 16, "a112": 17, "a113": 18, "a114": 19, "a115": 20, "a116": 21, "a117": 22, "a118": 23, "a119": 24, "a12": 25, "a120": 26, "a121": 27, "a122": 28, "a123": 29, "a124": 30, "a125": 31, "a126": 32, "a127": 33, "a128": 34, "a129": 35, "a13": 36, "a130": 37, "a131": 38, "a132": 39, "a133": 40, "a134": 41, "a135": 42, "a136": 43, "a137": 44, "a138": 45, "a139": 46, "a14": 47, "a140": 48, "a141": 49, "a142": 50, "a143": 51, "a144": 52, "a145": 53, "a146": 54, "a147": 55, "a148": 56, "a149": 57, "a15": 58, "a150": 59, "a151": 60, "a152": 61, "a153": 62, "a154": 63, "a155": 64, "a156": 65, "a157": 66, "a158": 67, "a159": 68, "a16": 69, "a160": 70, "a161": 71, "a162": 72, "a163": 73, "a164": 74, "a165": 75, "a166": 76, "a167": 77, "a168": 78, "a169": 79, "a17": 80, "a170": 81, "a171": 82, "a172": 83, "a173": 84, "a174": 85, "a175": 86, "a176": 87, "a177": 88, "a178": 89, "a179": 90, "a18": 91, "a180": 92, "a181": 93, "a182": 94, "a183": 95, "a184": 96, "a185": 97, "a186": 98, "a187": 99, "a188": 100, "a189": 101, "a19": 102, "a190": 103, "a191": 104, "a192": 105, "a193": 106, "a194": 107, "a195": 108, "a196": 109, "a197": 110, "a198": 111, "a199": 112, "a2": 113, "a20": 114, "a21": 115, "a22": 116, "a23": 117, "a24": 118, "a25": 119, "a26": 120, "a27": 121, "a28": 122, "a29": 123, "a3": 124, "a30": 125, "a31": 126, "a32": 127, "a33": 128, "a34": 129, "a35": 130, "a36": 131, "a37": 132, "a38": 133, "a39": 134, "a4": 135, "a40": 136, "a41": 137, "a42": 138, "a43": 139, "a44": 140, "a45": 141, "a46": 142, "a47": 143, "a48": 144, "a49": 145, "a5": 146, "a50": 147, "a51": 148, "a52": 149, "a53": 150, "a54": 151, "a55": 152, "a56": 153, "a57": 154, "a58": 155, "a59": 156, "a6": 157, "a60": 158, "a61": 159, "a62": 160, "a63": 161, "a64": 162, "a65": 163, "a66": 164, "a67": 165, "a68": 166, "a69": 167, "a7": 168, "a70": 169, "a71": 170, "a72": 171, "a73": 172, "a74": 173, "a75": 174, "a76": 175, "a77": 176, "a78": 177, "a79": 178, "a8": 179, "a80": 180, "a81": 181, "a82": 182, "a83": 183, "a84": 184, "a85": 185, "a86": 186, "a87": 187, "a88": 188, "a89": 189, "a9": 190, "a90": 191, "a91": 192, "a92": 193, "a93": 194, "a94": 195, "a95": 196, "a96": 197, "a97": 198, "a98": 199, "a99": 200}
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
|
||||
-- result:
|
||||
{"a0": 1, "a1": 2, "a2": 3, "a3": 4, "a4": 5, "a5": 6, "a6": 7, "a7": 8, "a8": 9, "a9": 10}
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
|
||||
-- result:
|
||||
{"a0": 1, "a1": 2, "a10": 3, "a11": 4, "a12": 5, "a13": 6, "a14": 7, "a15": 8, "a16": 9, "a17": 10, "a18": 11, "a19": 12, "a2": 13, "a20": 14, "a21": 15, "a22": 16, "a23": 17, "a24": 18, "a25": 19, "a26": 20, "a27": 21, "a28": 22, "a29": 23, "a3": 24, "a30": 25, "a31": 26, "a32": 27, "a33": 28, "a34": 29, "a35": 30, "a36": 31, "a37": 32, "a38": 33, "a39": 34, "a4": 35, "a40": 36, "a41": 37, "a42": 38, "a43": 39, "a44": 40, "a45": 41, "a46": 42, "a47": 43, "a48": 44, "a49": 45, "a5": 46, "a50": 47, "a51": 48, "a52": 49, "a53": 50, "a54": 51, "a55": 52, "a56": 53, "a57": 54, "a58": 55, "a59": 56, "a6": 57, "a60": 58, "a61": 59, "a62": 60, "a63": 61, "a64": 62, "a65": 63, "a66": 64, "a67": 65, "a68": 66, "a69": 67, "a7": 68, "a70": 69, "a71": 70, "a72": 71, "a73": 72, "a74": 73, "a75": 74, "a76": 75, "a77": 76, "a78": 77, "a79": 78, "a8": 79, "a80": 80, "a81": 81, "a82": 82, "a83": 83, "a84": 84, "a85": 85, "a86": 86, "a87": 87, "a88": 88, "a89": 89, "a9": 90, "a90": 91, "a91": 92, "a92": 93, "a93": 94, "a94": 95, "a95": 96, "a96": 97, "a97": 98, "a98": 99, "a99": 100}
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
|
||||
-- result:
|
||||
{"a0": 1, "a1": 2, "a10": 3, "a100": 4, "a101": 5, "a102": 6, "a103": 7, "a104": 8, "a105": 9, "a106": 10, "a107": 11, "a108": 12, "a109": 13, "a11": 14, "a110": 15, "a111": 16, "a112": 17, "a113": 18, "a114": 19, "a115": 20, "a116": 21, "a117": 22, "a118": 23, "a119": 24, "a12": 25, "a120": 26, "a121": 27, "a122": 28, "a123": 29, "a124": 30, "a125": 31, "a126": 32, "a127": 33, "a128": 34, "a129": 35, "a13": 36, "a130": 37, "a131": 38, "a132": 39, "a133": 40, "a134": 41, "a135": 42, "a136": 43, "a137": 44, "a138": 45, "a139": 46, "a14": 47, "a140": 48, "a141": 49, "a142": 50, "a143": 51, "a144": 52, "a145": 53, "a146": 54, "a147": 55, "a148": 56, "a149": 57, "a15": 58, "a150": 59, "a151": 60, "a152": 61, "a153": 62, "a154": 63, "a155": 64, "a156": 65, "a157": 66, "a158": 67, "a159": 68, "a16": 69, "a160": 70, "a161": 71, "a162": 72, "a163": 73, "a164": 74, "a165": 75, "a166": 76, "a167": 77, "a168": 78, "a169": 79, "a17": 80, "a170": 81, "a171": 82, "a172": 83, "a173": 84, "a174": 85, "a175": 86, "a176": 87, "a177": 88, "a178": 89, "a179": 90, "a18": 91, "a180": 92, "a181": 93, "a182": 94, "a183": 95, "a184": 96, "a185": 97, "a186": 98, "a187": 99, "a188": 100, "a189": 101, "a19": 102, "a190": 103, "a191": 104, "a192": 105, "a193": 106, "a194": 107, "a195": 108, "a196": 109, "a197": 110, "a198": 111, "a199": 112, "a2": 113, "a20": 114, "a21": 115, "a22": 116, "a23": 117, "a24": 118, "a25": 119, "a26": 120, "a27": 121, "a28": 122, "a29": 123, "a3": 124, "a30": 125, "a31": 126, "a32": 127, "a33": 128, "a34": 129, "a35": 130, "a36": 131, "a37": 132, "a38": 133, "a39": 134, "a4": 135, "a40": 136, "a41": 137, "a42": 138, "a43": 139, "a44": 140, "a45": 141, "a46": 142, "a47": 143, "a48": 144, "a49": 145, "a5": 146, "a50": 147, "a51": 148, "a52": 149, "a53": 150, "a54": 151, "a55": 152, "a56": 153, "a57": 154, "a58": 155, "a59": 156, "a6": 157, "a60": 158, "a61": 159, "a62": 160, "a63": 161, "a64": 162, "a65": 163, "a66": 164, "a67": 165, "a68": 166, "a69": 167, "a7": 168, "a70": 169, "a71": 170, "a72": 171, "a73": 172, "a74": 173, "a75": 174, "a76": 175, "a77": 176, "a78": 177, "a79": 178, "a8": 179, "a80": 180, "a81": 181, "a82": 182, "a83": 183, "a84": 184, "a85": 185, "a86": 186, "a87": 187, "a88": 188, "a89": 189, "a9": 190, "a90": 191, "a91": 192, "a92": 193, "a93": 194, "a94": 195, "a95": 196, "a96": 197, "a97": 198, "a98": 199, "a99": 200}
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select min(get_json_string(c1, 'f1')) from js2;
|
||||
-- result:
|
||||
a0
|
||||
-- !result
|
||||
select * from profile_decode;
|
||||
-- result:
|
||||
DICT_DECODE (plan_node_id=3):
|
||||
-- !result
|
||||
select min(get_json_string(c1, 'f2')) from js2;
|
||||
-- result:
|
||||
a0
|
||||
-- !result
|
||||
select * from profile_decode;
|
||||
-- result:
|
||||
DICT_DECODE (plan_node_id=3):
|
||||
-- !result
|
||||
select min(get_json_string(c1, 'f3')) from js2;
|
||||
-- result:
|
||||
a0
|
||||
-- !result
|
||||
select * from profile_decode;
|
||||
-- result:
|
||||
DICT_DECODE (plan_node_id=3):
|
||||
-- !result
|
||||
select min(get_json_string(c1, 'f4')) from js2;
|
||||
-- result:
|
||||
a0
|
||||
-- !result
|
||||
select * from profile_decode;
|
||||
-- result:
|
||||
-- !result
|
||||
select min(get_json_string(c1, 'f5')) from js2;
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select * from profile_decode;
|
||||
-- result:
|
||||
-- !result
|
||||
select get_json_string(c1, 'f1') k1, count(*) from js2 group by k1 order by k1 limit 3;
|
||||
-- result:
|
||||
a0 100
|
||||
a1 100
|
||||
a2 100
|
||||
-- !result
|
||||
select * from profile_decode;
|
||||
-- result:
|
||||
DICT_DECODE (plan_node_id=5):
|
||||
-- !result
|
||||
select get_json_string(c1, 'f2') k1, count(*) from js2 group by k1 order by k1 limit 3;
|
||||
-- result:
|
||||
a0 10
|
||||
a1 10
|
||||
a10 10
|
||||
-- !result
|
||||
select * from profile_decode;
|
||||
-- result:
|
||||
DICT_DECODE (plan_node_id=5):
|
||||
-- !result
|
||||
select get_json_string(c1, 'f3') k1, count(*) from js2 group by k1 order by k1 limit 3;
|
||||
-- result:
|
||||
a0 5
|
||||
a1 5
|
||||
a10 5
|
||||
-- !result
|
||||
select * from profile_decode;
|
||||
-- result:
|
||||
DICT_DECODE (plan_node_id=5):
|
||||
-- !result
|
||||
select get_json_string(c1, 'f4') k1, count(*) from js2 group by k1 order by k1 limit 3;
|
||||
-- result:
|
||||
a0 2
|
||||
a1 2
|
||||
a10 2
|
||||
-- !result
|
||||
select * from profile_decode;
|
||||
-- result:
|
||||
-- !result
|
||||
select get_json_string(c1, 'f5') k1, count(*) from js2 group by k1 order by k1 limit 3;
|
||||
-- result:
|
||||
None 1000
|
||||
-- !result
|
||||
select * from profile_decode;
|
||||
-- result:
|
||||
-- !result
|
||||
insert into js2
|
||||
select
|
||||
generate_series,
|
||||
json_object(
|
||||
'f1', concat('a', generate_series % 10),
|
||||
'f2', concat('a', generate_series % 300),
|
||||
'f3', concat('a', generate_series % 500),
|
||||
'f4', concat('a', generate_series % 500)
|
||||
)
|
||||
from (table(generate_series(1, 1000)));
|
||||
-- result:
|
||||
-- !result
|
||||
select get_json_string((inspect_global_dict('js2', 'c1.f1')), 'dict');
|
||||
-- result:
|
||||
{"a0": 1, "a1": 2, "a2": 3, "a3": 4, "a4": 5, "a5": 6, "a6": 7, "a7": 8, "a8": 9, "a9": 10}
|
||||
-- !result
|
||||
select get_json_string((inspect_global_dict('js2', 'c1.f2')), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string((inspect_global_dict('js2', 'c1.f3')), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
insert into js2
|
||||
select
|
||||
generate_series,
|
||||
json_object(
|
||||
'f1', concat('a', generate_series % 1000),
|
||||
'f2', concat('a', generate_series % 1000),
|
||||
'f3', concat('a', generate_series % 1000),
|
||||
'f4', concat('a', generate_series % 1000)
|
||||
)
|
||||
from (table(generate_series(1, 1000)));
|
||||
-- result:
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
truncate table js2;
|
||||
-- result:
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
insert into js2
|
||||
select
|
||||
generate_series,
|
||||
json_object(
|
||||
'f1', concat('a', generate_series % 10),
|
||||
'f2', concat('a', generate_series % 20),
|
||||
'f3', concat('a', generate_series % 30),
|
||||
'f4', concat('a', generate_series % 40),
|
||||
'f5', concat('a', generate_series % 50)
|
||||
)
|
||||
from (table(generate_series(1, 1000)));
|
||||
-- result:
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
|
||||
-- result:
|
||||
{"a0": 1, "a1": 2, "a2": 3, "a3": 4, "a4": 5, "a5": 6, "a6": 7, "a7": 8, "a8": 9, "a9": 10}
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
|
||||
-- result:
|
||||
{"a0": 1, "a1": 2, "a10": 3, "a11": 4, "a12": 5, "a13": 6, "a14": 7, "a15": 8, "a16": 9, "a17": 10, "a18": 11, "a19": 12, "a2": 13, "a3": 14, "a4": 15, "a5": 16, "a6": 17, "a7": 18, "a8": 19, "a9": 20}
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
|
||||
-- result:
|
||||
{"a0": 1, "a1": 2, "a10": 3, "a11": 4, "a12": 5, "a13": 6, "a14": 7, "a15": 8, "a16": 9, "a17": 10, "a18": 11, "a19": 12, "a2": 13, "a20": 14, "a21": 15, "a22": 16, "a23": 17, "a24": 18, "a25": 19, "a26": 20, "a27": 21, "a28": 22, "a29": 23, "a3": 24, "a4": 25, "a5": 26, "a6": 27, "a7": 28, "a8": 29, "a9": 30}
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
|
||||
-- result:
|
||||
{"a0": 1, "a1": 2, "a10": 3, "a11": 4, "a12": 5, "a13": 6, "a14": 7, "a15": 8, "a16": 9, "a17": 10, "a18": 11, "a19": 12, "a2": 13, "a20": 14, "a21": 15, "a22": 16, "a23": 17, "a24": 18, "a25": 19, "a26": 20, "a27": 21, "a28": 22, "a29": 23, "a3": 24, "a30": 25, "a31": 26, "a32": 27, "a33": 28, "a34": 29, "a35": 30, "a36": 31, "a37": 32, "a38": 33, "a39": 34, "a4": 35, "a5": 36, "a6": 37, "a7": 38, "a8": 39, "a9": 40}
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
|
||||
-- result:
|
||||
{"a0": 1, "a1": 2, "a10": 3, "a11": 4, "a12": 5, "a13": 6, "a14": 7, "a15": 8, "a16": 9, "a17": 10, "a18": 11, "a19": 12, "a2": 13, "a20": 14, "a21": 15, "a22": 16, "a23": 17, "a24": 18, "a25": 19, "a26": 20, "a27": 21, "a28": 22, "a29": 23, "a3": 24, "a30": 25, "a31": 26, "a32": 27, "a33": 28, "a34": 29, "a35": 30, "a36": 31, "a37": 32, "a38": 33, "a39": 34, "a4": 35, "a40": 36, "a41": 37, "a42": 38, "a43": 39, "a44": 40, "a45": 41, "a46": 42, "a47": 43, "a48": 44, "a49": 45, "a5": 46, "a6": 47, "a7": 48, "a8": 49, "a9": 50}
|
||||
-- !result
|
||||
insert into js2
|
||||
select
|
||||
generate_series,
|
||||
json_object(
|
||||
'f1', generate_series % 10, -- integer
|
||||
'f2', cast((generate_series % 20) * 1.5 as double), -- float
|
||||
'f3', if(generate_series % 2 = 0, true, false), -- boolean
|
||||
'f4', null, -- null
|
||||
'f5', json_array(generate_series % 5, 'arr', 1.23) -- array
|
||||
)
|
||||
from (table(generate_series(1, 1000)));
|
||||
-- result:
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
CREATE TABLE js3 (
|
||||
v1 BIGINT NULL,
|
||||
c1 JSON NULL
|
||||
)
|
||||
DUPLICATE KEY (v1)
|
||||
DISTRIBUTED BY HASH(`v1`) BUCKETS 1
|
||||
PROPERTIES ( "replication_num" = "1" );
|
||||
-- result:
|
||||
-- !result
|
||||
insert into js3
|
||||
select
|
||||
generate_series,
|
||||
json_object(
|
||||
'str_field', concat('str_', generate_series % 10),
|
||||
'int_field', generate_series % 100,
|
||||
'float_field', (generate_series % 100) * 1.23,
|
||||
'bool_field', if(generate_series % 2 = 0, true, false),
|
||||
'null_field', null,
|
||||
'array_field', json_array(
|
||||
generate_series % 5,
|
||||
concat('arr_', generate_series % 3),
|
||||
(generate_series % 10) * 0.5
|
||||
),
|
||||
'object_field', json_object(
|
||||
'nested_str', concat('nested_', generate_series % 7),
|
||||
'nested_int', generate_series % 7,
|
||||
'nested_bool', if(generate_series % 3 = 0, true, false)
|
||||
),
|
||||
'deep_nested', json_object(
|
||||
'level1', json_object(
|
||||
'level2', json_array(
|
||||
json_object(
|
||||
'leaf_str', concat('leaf_', generate_series % 2),
|
||||
'leaf_num', generate_series % 2
|
||||
),
|
||||
generate_series % 2
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
from (table(generate_series(1, 1000)));
|
||||
-- result:
|
||||
-- !result
|
||||
select dict_merge(get_json_string(c1, 'str_field'), 255) from js3 [_META_];
|
||||
-- result:
|
||||
{"2":{"lst":["str",10,"c3RyXzA","c3RyXzE","c3RyXzI","c3RyXzM","c3RyXzQ","c3RyXzU","c3RyXzY","c3RyXzc","c3RyXzg","c3RyXzk"]},"3":{"lst":["i32",10,1,2,3,4,5,6,7,8,9,10]}}
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.str_field'), 'dict');
|
||||
-- result:
|
||||
{"str_0": 1, "str_1": 2, "str_2": 3, "str_3": 4, "str_4": 5, "str_5": 6, "str_6": 7, "str_7": 8, "str_8": 9, "str_9": 10}
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.int_field'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.float_field'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.bool_field'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.null_field'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.array_field'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.object_field'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.object_field.nested_str'), 'dict');
|
||||
-- result:
|
||||
{"nested_0": 1, "nested_1": 2, "nested_2": 3, "nested_3": 4, "nested_4": 5, "nested_5": 6, "nested_6": 7}
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.object_field.nested_int'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.object_field.nested_bool'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested.level1'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested.level1.level2'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested.level1.level2.leaf_str'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested.level1.level2.leaf_num'), 'dict');
|
||||
-- result:
|
||||
None
|
||||
-- !result
|
||||
|
|
@ -168,8 +168,8 @@ select get_json_int(j1, '$.f1') from js2 where get_json_int(j1, '$.f1') = 1;
|
|||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 1
|
||||
- AccessPathHits: 3
|
||||
- PushdownAccessPaths: 1
|
||||
- AccessPathHits: 1
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select get_json_int(j1, '$.f2') from js2 where get_json_int(j1, '$.f1') = 1;
|
||||
-- result:
|
||||
|
|
@ -178,8 +178,8 @@ None
|
|||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 2
|
||||
- AccessPathHits: 4
|
||||
- PushdownAccessPaths: 2
|
||||
- AccessPathHits: 2
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select get_json_int(j1, '$.f1') from js2 where get_json_int(j1, '$.f2') = 1;
|
||||
-- result:
|
||||
|
|
@ -188,8 +188,8 @@ None
|
|||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 2
|
||||
- AccessPathHits: 4
|
||||
- PushdownAccessPaths: 2
|
||||
- AccessPathHits: 2
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select get_json_int(j1, '$.f2'), j1 from js2 where get_json_int(j1, '$.f1') = 1;
|
||||
-- result:
|
||||
|
|
@ -217,8 +217,8 @@ None None
|
|||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 2
|
||||
- AccessPathHits: 4
|
||||
- PushdownAccessPaths: 2
|
||||
- AccessPathHits: 2
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select count(get_json_int(j1, '$.f2')) from js2 ;
|
||||
-- result:
|
||||
|
|
@ -227,8 +227,8 @@ select count(get_json_int(j1, '$.f2')) from js2 ;
|
|||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 1
|
||||
- AccessPathHits: 3
|
||||
- PushdownAccessPaths: 1
|
||||
- AccessPathHits: 1
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select count(get_json_int(j1, '$.f3')) from js2 ;
|
||||
-- result:
|
||||
|
|
@ -237,8 +237,8 @@ select count(get_json_int(j1, '$.f3')) from js2 ;
|
|||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 2
|
||||
- AccessPathHits: 4
|
||||
- PushdownAccessPaths: 1
|
||||
- AccessPathHits: 2
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select * from js2 where get_json_int(j1, '$.f1') = -1;
|
||||
-- result:
|
||||
|
|
@ -508,8 +508,8 @@ select count(*) from js3 where get_json_double(j1, 'f_bool') = 1.0;
|
|||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 1
|
||||
- AccessPathHits: 3
|
||||
- PushdownAccessPaths: 1
|
||||
- AccessPathHits: 1
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select count(*) from js3 where get_json_int(j1, 'f_int') < 500;
|
||||
-- result:
|
||||
|
|
@ -530,8 +530,8 @@ select count(*) from js3 where get_json_double(j1, 'f_int') < 500;
|
|||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 1
|
||||
- AccessPathHits: 3
|
||||
- PushdownAccessPaths: 1
|
||||
- AccessPathHits: 1
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select count(*) from js3 where get_json_double(j1, 'f_double') < 500.0;
|
||||
-- result:
|
||||
|
|
@ -552,8 +552,8 @@ select count(*) from js3 where get_json_string(j1, 'f_double') < '500';
|
|||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 1
|
||||
- AccessPathHits: 3
|
||||
- PushdownAccessPaths: 1
|
||||
- AccessPathHits: 1
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select count(*) from js3 where get_json_int(j1, 'f_none') IS NULL;
|
||||
-- result:
|
||||
|
|
@ -582,6 +582,6 @@ select count(*) from js3 where get_json_double(j1, 'f_none') < 500;
|
|||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 2
|
||||
- AccessPathHits: 4
|
||||
- PushdownAccessPaths: 1
|
||||
- AccessPathHits: 2
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
|
|
@ -0,0 +1,254 @@
|
|||
-- name: test_normal_flat_json_dict @sequential
|
||||
|
||||
update information_schema.be_configs set value = 'true' where name = 'enable_json_flat';
|
||||
set enable_profile = true;
|
||||
set enable_async_profile = false;
|
||||
set cbo_json_v2_rewrite = true;
|
||||
set cbo_json_v2_dict_opt = true;
|
||||
|
||||
create view profile_decode as
|
||||
select trim(unnest)
|
||||
from table(unnest(split(get_query_profile(last_query_id()), '\n')))
|
||||
where unnest like '%DICT_DECODE%'
|
||||
order by unnest;
|
||||
|
||||
CREATE TABLE js2 (
|
||||
v1 BIGINT NULL,
|
||||
c1 JSON NULL
|
||||
)
|
||||
DUPLICATE KEY (v1)
|
||||
DISTRIBUTED BY HASH(`v1`) BUCKETS 1
|
||||
PROPERTIES ( "replication_num" = "1" );
|
||||
|
||||
|
||||
insert into js2
|
||||
select
|
||||
generate_series,
|
||||
json_object(
|
||||
'f1', concat('a', generate_series % 10),
|
||||
'f2', concat('a', generate_series % 100),
|
||||
'f3', concat('a', generate_series % 200),
|
||||
'f4', concat('a', generate_series % 500)
|
||||
)
|
||||
from (table(generate_series(1, 1000)));
|
||||
|
||||
-- Test dict_merge function
|
||||
select dict_merge(get_json_string(c1, 'f1'), 255) from js2 [_META_];
|
||||
select dict_merge(get_json_string(c1, 'f2'), 255) from js2 [_META_];
|
||||
select dict_merge(get_json_string(c1, 'f3'), 255) from js2 [_META_];
|
||||
select dict_merge(get_json_string(c1, 'f4'), 255) from js2 [_META_];
|
||||
select dict_merge(get_json_string(c1, 'f5'), 255) from js2 [_META_];
|
||||
|
||||
insert into js2
|
||||
select
|
||||
generate_series,
|
||||
json_object(
|
||||
'f1', generate_series % 10, -- integer
|
||||
'f2', cast((generate_series % 20) * 1.5 as double), -- float
|
||||
'f3', if(generate_series % 2 = 0, true, false), -- boolean
|
||||
'f4', null, -- null
|
||||
'f5', json_array(generate_series % 5, 'arr', 1.23) -- array
|
||||
)
|
||||
from (table(generate_series(1, 100)));
|
||||
select dict_merge(get_json_string(c1, 'f1'), 255) from js2 [_META_];
|
||||
select dict_merge(get_json_string(c1, 'f2'), 255) from js2 [_META_];
|
||||
select dict_merge(get_json_string(c1, 'f3'), 255) from js2 [_META_];
|
||||
select dict_merge(get_json_string(c1, 'f4'), 255) from js2 [_META_];
|
||||
select dict_merge(get_json_string(c1, 'f5'), 255) from js2 [_META_];
|
||||
|
||||
|
||||
-- Test the dict
|
||||
|
||||
truncate table js2;
|
||||
insert into js2
|
||||
select
|
||||
generate_series,
|
||||
json_object(
|
||||
'f1', concat('a', generate_series % 10),
|
||||
'f2', concat('a', generate_series % 100),
|
||||
'f3', concat('a', generate_series % 200),
|
||||
'f4', concat('a', generate_series % 500)
|
||||
)
|
||||
from (table(generate_series(1, 1000)));
|
||||
|
||||
|
||||
-- inspect the dict itself
|
||||
select get_json_string(inspect_global_dict('js2', 'c1'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
|
||||
|
||||
-- test some simple queries
|
||||
select min(get_json_string(c1, 'f1')) from js2;
|
||||
select * from profile_decode;
|
||||
select min(get_json_string(c1, 'f2')) from js2;
|
||||
select * from profile_decode;
|
||||
select min(get_json_string(c1, 'f3')) from js2;
|
||||
select * from profile_decode;
|
||||
select min(get_json_string(c1, 'f4')) from js2;
|
||||
select * from profile_decode;
|
||||
select min(get_json_string(c1, 'f5')) from js2;
|
||||
select * from profile_decode;
|
||||
select get_json_string(c1, 'f1') k1, count(*) from js2 group by k1 order by k1 limit 3;
|
||||
select * from profile_decode;
|
||||
select get_json_string(c1, 'f2') k1, count(*) from js2 group by k1 order by k1 limit 3;
|
||||
select * from profile_decode;
|
||||
select get_json_string(c1, 'f3') k1, count(*) from js2 group by k1 order by k1 limit 3;
|
||||
select * from profile_decode;
|
||||
select get_json_string(c1, 'f4') k1, count(*) from js2 group by k1 order by k1 limit 3;
|
||||
select * from profile_decode;
|
||||
select get_json_string(c1, 'f5') k1, count(*) from js2 group by k1 order by k1 limit 3;
|
||||
select * from profile_decode;
|
||||
|
||||
-- dict update, invalidate some of dicts
|
||||
-- f1: valid, cardinality: 10
|
||||
-- f2: invalid, cardinality: 300
|
||||
-- f3: invalid, cardinality: 500
|
||||
-- f4: invalid, cardinality: 500
|
||||
insert into js2
|
||||
select
|
||||
generate_series,
|
||||
json_object(
|
||||
'f1', concat('a', generate_series % 10),
|
||||
'f2', concat('a', generate_series % 300),
|
||||
'f3', concat('a', generate_series % 500),
|
||||
'f4', concat('a', generate_series % 500)
|
||||
)
|
||||
from (table(generate_series(1, 1000)));
|
||||
|
||||
select get_json_string((inspect_global_dict('js2', 'c1.f1')), 'dict');
|
||||
select get_json_string((inspect_global_dict('js2', 'c1.f2')), 'dict');
|
||||
select get_json_string((inspect_global_dict('js2', 'c1.f3')), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
|
||||
|
||||
|
||||
-- dict update: invalidate all dicts
|
||||
insert into js2
|
||||
select
|
||||
generate_series,
|
||||
json_object(
|
||||
'f1', concat('a', generate_series % 1000),
|
||||
'f2', concat('a', generate_series % 1000),
|
||||
'f3', concat('a', generate_series % 1000),
|
||||
'f4', concat('a', generate_series % 1000)
|
||||
)
|
||||
from (table(generate_series(1, 1000)));
|
||||
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
|
||||
|
||||
|
||||
-- dict update: truncate table
|
||||
truncate table js2;
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
|
||||
|
||||
-- dict update: activate all dicts
|
||||
insert into js2
|
||||
select
|
||||
generate_series,
|
||||
json_object(
|
||||
'f1', concat('a', generate_series % 10),
|
||||
'f2', concat('a', generate_series % 20),
|
||||
'f3', concat('a', generate_series % 30),
|
||||
'f4', concat('a', generate_series % 40),
|
||||
'f5', concat('a', generate_series % 50)
|
||||
)
|
||||
from (table(generate_series(1, 1000)));
|
||||
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
|
||||
|
||||
-- Test mixed data types with string
|
||||
insert into js2
|
||||
select
|
||||
generate_series,
|
||||
json_object(
|
||||
'f1', generate_series % 10, -- integer
|
||||
'f2', cast((generate_series % 20) * 1.5 as double), -- float
|
||||
'f3', if(generate_series % 2 = 0, true, false), -- boolean
|
||||
'f4', null, -- null
|
||||
'f5', json_array(generate_series % 5, 'arr', 1.23) -- array
|
||||
)
|
||||
from (table(generate_series(1, 1000)));
|
||||
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f1'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f2'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f3'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f4'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js2', 'c1.f5'), 'dict');
|
||||
|
||||
|
||||
|
||||
-- Test with various JSON data types (not just string):
|
||||
-- int, float, bool, null, array, object, and nested structures
|
||||
CREATE TABLE js3 (
|
||||
v1 BIGINT NULL,
|
||||
c1 JSON NULL
|
||||
)
|
||||
DUPLICATE KEY (v1)
|
||||
DISTRIBUTED BY HASH(`v1`) BUCKETS 1
|
||||
PROPERTIES ( "replication_num" = "1" );
|
||||
|
||||
insert into js3
|
||||
select
|
||||
generate_series,
|
||||
json_object(
|
||||
'str_field', concat('str_', generate_series % 10),
|
||||
'int_field', generate_series % 100,
|
||||
'float_field', (generate_series % 100) * 1.23,
|
||||
'bool_field', if(generate_series % 2 = 0, true, false),
|
||||
'null_field', null,
|
||||
'array_field', json_array(
|
||||
generate_series % 5,
|
||||
concat('arr_', generate_series % 3),
|
||||
(generate_series % 10) * 0.5
|
||||
),
|
||||
'object_field', json_object(
|
||||
'nested_str', concat('nested_', generate_series % 7),
|
||||
'nested_int', generate_series % 7,
|
||||
'nested_bool', if(generate_series % 3 = 0, true, false)
|
||||
),
|
||||
'deep_nested', json_object(
|
||||
'level1', json_object(
|
||||
'level2', json_array(
|
||||
json_object(
|
||||
'leaf_str', concat('leaf_', generate_series % 2),
|
||||
'leaf_num', generate_series % 2
|
||||
),
|
||||
generate_series % 2
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
from (table(generate_series(1, 1000)));
|
||||
|
||||
select dict_merge(get_json_string(c1, 'str_field'), 255) from js3 [_META_];
|
||||
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.str_field'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.int_field'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.float_field'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.bool_field'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.null_field'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.array_field'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.object_field'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.object_field.nested_str'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.object_field.nested_int'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.object_field.nested_bool'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested.level1'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested.level1.level2'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested.level1.level2.leaf_str'), 'dict');
|
||||
select get_json_string(inspect_global_dict('js3', 'c1.deep_nested.level1.level2.leaf_num'), 'dict');
|
||||
Loading…
Reference in New Issue