[BugFix] Fix mv rewriter binder bugs (#62919)
Signed-off-by: shuming.li <ming.moriarty@gmail.com>
This commit is contained in:
parent
3688bc4bd4
commit
54317498bc
|
|
@ -47,7 +47,8 @@ public class Binder {
|
|||
// `nextIdx` marks the current idx which iterates calling `next()` method and it's used for MULTI_JOIN pattern
|
||||
// to optimize iteration expansions.
|
||||
private int nextIdx = 0;
|
||||
|
||||
// if the binder is exhausted, no need to check again
|
||||
private boolean isExhausted = false;
|
||||
/**
|
||||
* Extract a expression from GroupExpression which match the given pattern
|
||||
*
|
||||
|
|
@ -101,7 +102,7 @@ public class Binder {
|
|||
this.groupExpressionIndex.set(lastNode, lastNodeIndex + 1);
|
||||
|
||||
expression = match(pattern, groupExpression);
|
||||
} while (expression == null && this.groupExpressionIndex.size() != 1);
|
||||
} while (!isExhausted && expression == null && this.groupExpressionIndex.size() != 1);
|
||||
|
||||
nextIdx++;
|
||||
return expression;
|
||||
|
|
@ -206,7 +207,7 @@ public class Binder {
|
|||
public OptExpression match(GroupExpression ge) {
|
||||
// 1. Check if the entire tree is MULTI_JOIN
|
||||
// 2. Enumerate GE
|
||||
if (ge == null || !isMultiJoin(ge)) {
|
||||
if (ge == null || isExhausted || !isMultiJoin(ge)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
@ -217,14 +218,20 @@ public class Binder {
|
|||
* Check whether the binder is exhausted.
|
||||
*/
|
||||
private boolean exhausted() {
|
||||
if (loopCount++ % CHECK_EXHAUSTED_INTERVAL == 0) {
|
||||
final long elapsed = watch.elapsed(TimeUnit.MILLISECONDS);
|
||||
final boolean exhausted = elapsed > timeLimit;
|
||||
if (exhausted) {
|
||||
if (isExhausted) {
|
||||
return true;
|
||||
}
|
||||
// Only check elapsed time every CHECK_EXHAUSTED_INTERVAL iterations
|
||||
if ((++loopCount % CHECK_EXHAUSTED_INTERVAL) == 0) {
|
||||
long elapsed = watch.elapsed(TimeUnit.MILLISECONDS);
|
||||
if (elapsed > timeLimit) {
|
||||
isExhausted = true;
|
||||
// Log only once to avoid log flooding
|
||||
Tracers.log(Tracers.Module.MV, args ->
|
||||
String.format("[MV TRACE] MultiJoinBinder %s exhausted(loop:%s)\n", this, loopCount));
|
||||
String.format("[MV TRACE] MultiJoinBinder exhausted after %d loops (elapsed: %d ms, limit: %d ms)%n",
|
||||
loopCount, elapsed, timeLimit));
|
||||
return true;
|
||||
}
|
||||
return exhausted;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
@ -279,6 +286,7 @@ public class Binder {
|
|||
}
|
||||
// directly return if next has rewritten by mv
|
||||
if (next.hasAppliedMVRules()) {
|
||||
groupExpressionIndex.remove(groupTraceKey);
|
||||
return next;
|
||||
}
|
||||
|
||||
|
|
@ -294,7 +302,7 @@ public class Binder {
|
|||
|
||||
next = group.getLogicalExpressions().get(valueIndex);
|
||||
if (next.hasAppliedMVRules()) {
|
||||
groupExpressionIndex.set(groupTraceKey, valueIndex);
|
||||
groupExpressionIndex.remove(groupTraceKey);
|
||||
return next;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ import com.starrocks.sql.ast.expression.Expr;
|
|||
import com.starrocks.sql.ast.expression.FunctionName;
|
||||
import com.starrocks.sql.optimizer.OptExpression;
|
||||
import com.starrocks.sql.optimizer.OptimizerContext;
|
||||
import com.starrocks.sql.optimizer.OptimizerTraceUtil;
|
||||
import com.starrocks.sql.optimizer.Utils;
|
||||
import com.starrocks.sql.optimizer.operator.AggType;
|
||||
import com.starrocks.sql.optimizer.operator.ColumnOutputInfo;
|
||||
|
|
@ -213,6 +214,7 @@ public class FineGrainedRangePredicateRule extends TransformationRule {
|
|||
|
||||
LogicalAggregationOperator newAggOp = rewriteAggOperator(aggColInfoList, unionOutputCols);
|
||||
|
||||
OptimizerTraceUtil.logMVRewrite(context, this, "FineGrainedRangePredicateRule applied");
|
||||
return Lists.newArrayList(OptExpression.create(newAggOp, unionOpt));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,14 +15,15 @@
|
|||
|
||||
package com.starrocks.sql.optimizer.rule.transformation.materialization.rule;
|
||||
|
||||
import com.starrocks.sql.optimizer.OptExpression;
|
||||
import com.starrocks.sql.optimizer.OptimizerContext;
|
||||
import com.starrocks.sql.optimizer.operator.OperatorType;
|
||||
import com.starrocks.sql.optimizer.operator.pattern.Pattern;
|
||||
import com.starrocks.sql.optimizer.rule.RuleType;
|
||||
import com.starrocks.sql.optimizer.rule.transformation.materialization.MvUtils;
|
||||
|
||||
/*
|
||||
*
|
||||
* Here is the rule for pattern Join
|
||||
*
|
||||
/**
|
||||
* OnlyJoinRule is used to match SPJ query pattern and rewrite it by mv.
|
||||
*/
|
||||
public class OnlyJoinRule extends BaseMaterializedViewRewriteRule {
|
||||
private static final OnlyJoinRule INSTANCE = new OnlyJoinRule();
|
||||
|
|
@ -35,4 +36,16 @@ public class OnlyJoinRule extends BaseMaterializedViewRewriteRule {
|
|||
return INSTANCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean check(OptExpression input, OptimizerContext context) {
|
||||
// NOTE:
|
||||
// 1. For only-join rule, only SPJ is supported
|
||||
// 2. Don't limit the input must contain a join because it may be a single table query but we still can rewrite it
|
||||
// in this rule, because of a multi table plan after some rules(eg: FineGrainedRangePredicateRule) may
|
||||
// become a single table plan.
|
||||
if (!MvUtils.isLogicalSPJ(input)) {
|
||||
return false;
|
||||
}
|
||||
return super.check(input, context);
|
||||
}
|
||||
}
|
||||
|
|
@ -32,11 +32,11 @@ public class MvRewriteNestedMVTest extends MVTestBase {
|
|||
|
||||
starRocksAssert.withTable(cluster, "depts");
|
||||
starRocksAssert.withTable(cluster, "emps");
|
||||
starRocksAssert.withTable(cluster, "t1");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNestedMv() throws Exception {
|
||||
starRocksAssert.withTable(cluster, "t1");
|
||||
starRocksAssert.withTable("CREATE TABLE nest_base_table_1 (\n" +
|
||||
" k1 INT,\n" +
|
||||
" v1 INT,\n" +
|
||||
|
|
@ -114,4 +114,77 @@ public class MvRewriteNestedMVTest extends MVTestBase {
|
|||
dropMv("test", "hive_nested_mv_2");
|
||||
dropMv("test", "hive_nested_mv_3");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRangePredicateRewrite() throws Exception {
|
||||
starRocksAssert.withTable("CREATE TABLE `t0` (\n" +
|
||||
" `date_col` date,\n" +
|
||||
" `id` int(11),\n" +
|
||||
" `int_col` int(11),\n" +
|
||||
" `float_col_1` float,\n" +
|
||||
" `float_col_2` float,\n" +
|
||||
" `varchar_col` varchar(255),\n" +
|
||||
" `tinyint_col` tinyint(4)\n" +
|
||||
") ENGINE=OLAP\n" +
|
||||
"DUPLICATE KEY(`date_col`, `id`)\n" +
|
||||
"DISTRIBUTED BY HASH(`id`)\n" +
|
||||
"PROPERTIES (\n" +
|
||||
"\"replication_num\" = \"1\"\n" +
|
||||
");\n");
|
||||
starRocksAssert.withTable("CREATE TABLE `t1` (\n" +
|
||||
" `id_1` int(11),\n" +
|
||||
" `varchar_col_1` varchar(255),\n" +
|
||||
" `varchar_col_2` varchar(255),\n" +
|
||||
" `int_col_1` int(11),\n" +
|
||||
" `tinyint_col_1` tinyint(4)\n" +
|
||||
") ENGINE=OLAP\n" +
|
||||
"DUPLICATE KEY(`id_1`, `varchar_col_1`)\n" +
|
||||
"DISTRIBUTED BY HASH(`id_1`)\n" +
|
||||
"PROPERTIES (\n" +
|
||||
"\"replication_num\" = \"1\"\n" +
|
||||
");");
|
||||
executeInsertSql("INSERT INTO `t0` VALUES ('2024-02-01', 1, 100, 10.5, 20.5, 'varchar_value_1', 1);");
|
||||
executeInsertSql("INSERT INTO `t1` VALUES (1, 'varchar_value_1', 'varchar_value_21', 100, 1);");
|
||||
starRocksAssert.withRefreshedMaterializedView("create MATERIALIZED VIEW flat_mv\n" +
|
||||
"REFRESH DEFERRED MANUAL\n" +
|
||||
"PROPERTIES (\n" +
|
||||
"\"replication_num\" = \"1\"\n" +
|
||||
") as select t0.id, t0.date_col, t0.float_col_1, t0.float_col_2, t0.varchar_col, " +
|
||||
"t0.tinyint_col, t1.varchar_col_1, t1.varchar_col_2, t1.int_col_1, t1.tinyint_col_1 " +
|
||||
"from t0 join t1 on t0.tinyint_col = t1.tinyint_col_1;\n");
|
||||
starRocksAssert.withRefreshedMaterializedView("create MATERIALIZED VIEW join_filter_mv\n" +
|
||||
"REFRESH DEFERRED MANUAL\n" +
|
||||
"PROPERTIES (\n" +
|
||||
"\"replication_num\" = \"1\"\n" +
|
||||
") as select id, date_col, float_col_1, int_col_1, tinyint_col, " +
|
||||
"tinyint_col_1 from flat_mv where id in (1, 2, 3, 4, 5, 6, 6, 7, 9, 10);\n");
|
||||
starRocksAssert.withRefreshedMaterializedView("create MATERIALIZED VIEW date_mv\n" +
|
||||
"REFRESH DEFERRED MANUAL\n" +
|
||||
"PROPERTIES (\n" +
|
||||
"\"replication_num\" = \"1\"\n" +
|
||||
") as select tinyint_col, date_col , sum(float_col_1 * int_col_1) as sum_value " +
|
||||
"from join_filter_mv group by tinyint_col, date_col;\n");
|
||||
starRocksAssert.withRefreshedMaterializedView("create MATERIALIZED VIEW month_mv\n" +
|
||||
"REFRESH DEFERRED MANUAL\n" +
|
||||
"PROPERTIES (\n" +
|
||||
"\"replication_num\" = \"1\"\n" +
|
||||
") as select tinyint_col, date_trunc('month', date_col) as date_col, sum(sum_value) as sum_value " +
|
||||
"from date_mv group by tinyint_col, date_trunc('month', date_col);\n");
|
||||
starRocksAssert.withRefreshedMaterializedView("create MATERIALIZED VIEW year_mv\n" +
|
||||
"REFRESH DEFERRED MANUAL\n" +
|
||||
"PROPERTIES (\n" +
|
||||
"\"replication_num\" = \"1\"\n" +
|
||||
") as select tinyint_col, date_trunc('year', date_col) as date_col, sum(sum_value) " +
|
||||
"as sum_value from date_mv group by tinyint_col, date_trunc('year', date_col);\n");
|
||||
String sql = "select sum(t0.float_col_1 * t1.int_col_1), t0.tinyint_col " +
|
||||
"from t0 join t1 on t0.tinyint_col = t1.tinyint_col_1 where t0.id in (1, 2, 3, 4, 5, 6, 6, 7, 9, 10)\n" +
|
||||
"and date_col > '2024-02-11' and date_col < '2028-05-14' group by tinyint_col order by 1;";
|
||||
connectContext.getSessionVariable().setNestedMvRewriteMaxLevel(10);
|
||||
connectContext.getSessionVariable().setMaterializedViewRewriteMode("force");
|
||||
connectContext.getSessionVariable().setEnableFineGrainedRangePredicate(true);
|
||||
connectContext.getSessionVariable().setEnableMaterializedViewTimeSeriesPushDownRewrite(false);
|
||||
connectContext.getSessionVariable().setEnableMaterializedViewPushDownRewrite(false);
|
||||
String plan = getFragmentPlan(sql);
|
||||
PlanTestBase.assertContains(plan, "date_mv", "month_mv", "year_mv");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,6 +62,7 @@ public class MvTransparentUnionRewriteOlapTest extends MVTestBase {
|
|||
)
|
||||
);
|
||||
connectContext.getSessionVariable().setEnableMaterializedViewTransparentUnionRewrite(true);
|
||||
connectContext.getSessionVariable().setMaterializedViewRewriteMode("force");
|
||||
}
|
||||
|
||||
private void withPartialScanMv(StarRocksAssert.ExceptionRunnable runner) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,239 @@
|
|||
-- name: test_mv_rewrite_bugs1
|
||||
create database db_${uuid0};
|
||||
-- result:
|
||||
-- !result
|
||||
use db_${uuid0};
|
||||
-- result:
|
||||
-- !result
|
||||
CREATE TABLE `sales_data` (
|
||||
`customer_id` bigint(20) NOT NULL,
|
||||
`order_id` bigint(20) NOT NULL,
|
||||
`line_item` smallint(6) NOT NULL,
|
||||
`order_date` datetime NOT NULL,
|
||||
`original_customer_id` varchar(1048576) NOT NULL,
|
||||
`original_order_id` varchar(1048576) NOT NULL,
|
||||
`month_order` datetime NULL,
|
||||
`week_order` datetime NULL,
|
||||
`quarter_order` datetime NULL,
|
||||
`year_order` datetime NULL,
|
||||
`store_name` varchar(1048576) NULL,
|
||||
`retailer_brand` varchar(1048576) NULL,
|
||||
`sales_channel` varchar(1048576) NULL,
|
||||
`parent_company` varchar(1048576) NULL,
|
||||
`store_postal_code` varchar(1048576) NULL,
|
||||
`state` varchar(1048576) NULL,
|
||||
`region` varchar(18) NULL,
|
||||
`age` decimal(16, 0) NULL,
|
||||
`generation` varchar(17) NULL,
|
||||
`gender` varchar(11) NULL,
|
||||
`is_adult` boolean NULL,
|
||||
`is_hispanic` boolean NULL,
|
||||
`recent_login` boolean NULL,
|
||||
`customer_postal_code` varchar(1048576) NULL,
|
||||
`loyalty_id` varchar(1048576) NULL,
|
||||
`product_code` varchar(1048576) NULL,
|
||||
`brand` varchar(1048576) NULL,
|
||||
`manufacturer` varchar(1048576) NULL,
|
||||
`category_l1` varchar(1048576) NULL,
|
||||
`category_l2` varchar(1048576) NULL,
|
||||
`category_l3` varchar(1048576) NULL,
|
||||
`category_l4` varchar(1048576) NULL,
|
||||
`product_description` varchar(1048576) NULL,
|
||||
`private_label` boolean NULL,
|
||||
`digital_receipt` boolean NULL,
|
||||
`quantity` decimal(10, 2) NULL,
|
||||
`sales_amount` decimal(10, 2) NULL,
|
||||
`loyalty_program_a` boolean NULL,
|
||||
`loyalty_program_b` boolean NULL,
|
||||
`loyalty_program_c` boolean NULL,
|
||||
`category_partition_key` varchar(1048576) NULL,
|
||||
`load_timestamp` datetime NULL
|
||||
) ENGINE=OLAP
|
||||
PRIMARY KEY(`customer_id`, `order_id`, `line_item`, `order_date`)
|
||||
PARTITION BY date_trunc('day', order_date)
|
||||
DISTRIBUTED BY HASH(`customer_id`) BUCKETS 12
|
||||
ORDER BY(`brand`, `category_l1`)
|
||||
PROPERTIES (
|
||||
"colocate_with" = "sales_data_orders",
|
||||
"compression" = "LZ4",
|
||||
"enable_persistent_index" = "true",
|
||||
"replication_num" = "1"
|
||||
);
|
||||
-- result:
|
||||
-- !result
|
||||
INSERT INTO `sales_data` (
|
||||
`customer_id`, `order_id`, `line_item`, `order_date`,
|
||||
`original_customer_id`, `original_order_id`,
|
||||
`month_order`, `week_order`, `quarter_order`, `year_order`,
|
||||
`store_name`, `retailer_brand`, `sales_channel`, `parent_company`,
|
||||
`store_postal_code`, `state`, `region`, `age`, `generation`, `gender`,
|
||||
`is_adult`, `is_hispanic`, `recent_login`,
|
||||
`customer_postal_code`, `loyalty_id`, `product_code`, `brand`, `manufacturer`,
|
||||
`category_l1`, `category_l2`, `category_l3`, `category_l4`, `product_description`,
|
||||
`private_label`, `digital_receipt`, `quantity`, `sales_amount`,
|
||||
`loyalty_program_a`, `loyalty_program_b`, `loyalty_program_c`,
|
||||
`category_partition_key`, `load_timestamp`
|
||||
) VALUES
|
||||
(10001, 500001, 1, '2025-09-01 10:15:00',
|
||||
'10001', '500001',
|
||||
'2025-09-01', '2025-09-01', '2025-07-01', '2025-01-01',
|
||||
'SuperMart Downtown', 'SuperMart', 'Offline', 'SuperMart Inc.',
|
||||
'10001', 'NY', 'Northeast', 32, 'Millennial', 'F',
|
||||
TRUE, FALSE, TRUE,
|
||||
'10001', 'LOYAL123', '1234567890123', 'Store Brand', 'SuperMart',
|
||||
'Dairy', 'Milk', 'Whole Milk', NULL, '1 Gallon Whole Milk',
|
||||
FALSE, TRUE, 1.00, 3.49,
|
||||
FALSE, TRUE, FALSE,
|
||||
'Dairy', '2025-09-01 12:00:00'),
|
||||
(10002, 500002, 1, '2025-09-02 15:30:00',
|
||||
'10002', '500002',
|
||||
'2025-09-01', '2025-09-01', '2025-07-01', '2025-01-01',
|
||||
'MegaStore', 'MegaStore', 'Offline', 'MegaStore Corp.',
|
||||
'90001', 'CA', 'West', 25, 'Gen Z', 'M',
|
||||
TRUE, TRUE, FALSE,
|
||||
'90001', 'LOYAL456', '9876543210987', 'CrunchyChips', 'SnackCorp',
|
||||
'Snacks', 'Chips', NULL, NULL, 'Classic Potato Chips',
|
||||
FALSE, FALSE, 2.00, 5.98,
|
||||
FALSE, TRUE, TRUE,
|
||||
'Snacks', '2025-09-02 16:00:00'),
|
||||
(10003, 500003, 1, '2025-09-03 09:45:00',
|
||||
'10003', '500003',
|
||||
'2025-09-01', '2025-09-01', '2025-07-01', '2025-01-01',
|
||||
'BulkMart', 'BulkMart', 'Offline', 'BulkMart Wholesale',
|
||||
'77001', 'TX', 'South', 29, 'Millennial', 'F',
|
||||
TRUE, FALSE, TRUE,
|
||||
'77001', 'LOYAL789', '5555555555555', 'BabyComfort', 'BabyCorp',
|
||||
'Baby', 'Diapers', NULL, NULL, 'Comfort Diapers Size 2',
|
||||
FALSE, TRUE, 1.00, 39.99,
|
||||
TRUE, FALSE, FALSE,
|
||||
'Baby', '2025-09-03 10:00:00');
|
||||
-- result:
|
||||
-- !result
|
||||
CREATE MATERIALIZED VIEW `sales_data_mv2`
|
||||
PARTITION BY (date_trunc('day', `order_date`))
|
||||
DISTRIBUTED BY HASH(`customer_id`) BUCKETS 12
|
||||
ORDER BY (category_l1, category_l2, category_l3)
|
||||
REFRESH MANUAL
|
||||
PROPERTIES (
|
||||
"replicated_storage" = "true",
|
||||
"replication_num" = "1",
|
||||
"partition_refresh_number" = "60",
|
||||
"bloom_filter_columns" = "category_l1, category_l2, category_l3, product_description, manufacturer, retailer_brand, store_name",
|
||||
"colocate_with" = "sales_data_orders"
|
||||
)
|
||||
AS SELECT
|
||||
`sales_data`.`customer_id`,
|
||||
`sales_data`.`order_id`,
|
||||
`sales_data`.`line_item`,
|
||||
`sales_data`.`order_date`,
|
||||
`sales_data`.`month_order`,
|
||||
`sales_data`.`week_order`,
|
||||
`sales_data`.`quarter_order`,
|
||||
`sales_data`.`year_order`,
|
||||
`sales_data`.`store_name`,
|
||||
`sales_data`.`retailer_brand`,
|
||||
`sales_data`.`sales_channel`,
|
||||
`sales_data`.`parent_company`,
|
||||
`sales_data`.`store_postal_code`,
|
||||
`sales_data`.`state`,
|
||||
`sales_data`.`region`,
|
||||
`sales_data`.`age`,
|
||||
`sales_data`.`generation`,
|
||||
`sales_data`.`gender`,
|
||||
`sales_data`.`is_adult`,
|
||||
`sales_data`.`is_hispanic`,
|
||||
`sales_data`.`recent_login`,
|
||||
`sales_data`.`customer_postal_code`,
|
||||
`sales_data`.`loyalty_id`,
|
||||
`sales_data`.`product_code`,
|
||||
`sales_data`.`brand`,
|
||||
`sales_data`.`manufacturer`,
|
||||
`sales_data`.`category_l1`,
|
||||
`sales_data`.`category_l2`,
|
||||
`sales_data`.`category_l3`,
|
||||
`sales_data`.`category_l4`,
|
||||
`sales_data`.`product_description`,
|
||||
`sales_data`.`private_label`,
|
||||
`sales_data`.`digital_receipt`,
|
||||
`sales_data`.`quantity`,
|
||||
`sales_data`.`sales_amount`,
|
||||
`sales_data`.`loyalty_program_a`,
|
||||
`sales_data`.`loyalty_program_b`,
|
||||
`sales_data`.`loyalty_program_c`,
|
||||
`sales_data`.`category_partition_key`,
|
||||
`sales_data`.`load_timestamp`
|
||||
FROM `sales_data`;
|
||||
-- result:
|
||||
-- !result
|
||||
CREATE MATERIALIZED VIEW `sales_data_mv1`
|
||||
PARTITION BY (date_trunc('day', `order_date`))
|
||||
DISTRIBUTED BY HASH(`order_id`) BUCKETS 12
|
||||
ORDER BY (brand, category_l1)
|
||||
REFRESH MANUAL
|
||||
PROPERTIES (
|
||||
"replicated_storage" = "true",
|
||||
"replication_num" = "1",
|
||||
"partition_refresh_number" = "60",
|
||||
"bloom_filter_columns" = "brand, category_l1, category_l2, category_l3, product_description, manufacturer, retailer_brand, store_name",
|
||||
"colocate_with" = "sales_data_mv1_orders_mv1"
|
||||
)
|
||||
AS SELECT
|
||||
`sales_data`.`customer_id`,
|
||||
`sales_data`.`order_id`,
|
||||
`sales_data`.`line_item`,
|
||||
`sales_data`.`order_date`,
|
||||
`sales_data`.`month_order`,
|
||||
`sales_data`.`week_order`,
|
||||
`sales_data`.`quarter_order`,
|
||||
`sales_data`.`year_order`,
|
||||
`sales_data`.`store_name`,
|
||||
`sales_data`.`retailer_brand`,
|
||||
`sales_data`.`sales_channel`,
|
||||
`sales_data`.`parent_company`,
|
||||
`sales_data`.`store_postal_code`,
|
||||
`sales_data`.`state`,
|
||||
`sales_data`.`region`,
|
||||
`sales_data`.`age`,
|
||||
`sales_data`.`generation`,
|
||||
`sales_data`.`gender`,
|
||||
`sales_data`.`is_adult`,
|
||||
`sales_data`.`is_hispanic`,
|
||||
`sales_data`.`recent_login`,
|
||||
`sales_data`.`customer_postal_code`,
|
||||
`sales_data`.`loyalty_id`,
|
||||
`sales_data`.`product_code`,
|
||||
`sales_data`.`brand`,
|
||||
`sales_data`.`manufacturer`,
|
||||
`sales_data`.`category_l1`,
|
||||
`sales_data`.`category_l2`,
|
||||
`sales_data`.`category_l3`,
|
||||
`sales_data`.`category_l4`,
|
||||
`sales_data`.`product_description`,
|
||||
`sales_data`.`private_label`,
|
||||
`sales_data`.`digital_receipt`,
|
||||
`sales_data`.`quantity`,
|
||||
`sales_data`.`sales_amount`,
|
||||
`sales_data`.`loyalty_program_a`,
|
||||
`sales_data`.`loyalty_program_b`,
|
||||
`sales_data`.`loyalty_program_c`,
|
||||
`sales_data`.`category_partition_key`,
|
||||
`sales_data`.`load_timestamp`
|
||||
FROM `sales_data`;
|
||||
-- result:
|
||||
-- !result
|
||||
WITH snack_buyers AS (
|
||||
SELECT DISTINCT customer_id
|
||||
FROM sales_data
|
||||
WHERE category_l1 = 'Snacks'
|
||||
),
|
||||
dairy_buyers AS (
|
||||
SELECT DISTINCT customer_id
|
||||
FROM sales_data
|
||||
WHERE category_l1 = 'Dairy'
|
||||
)
|
||||
SELECT
|
||||
(SELECT APPROX_COUNT_DISTINCT(customer_id) FROM dairy_buyers) AS dairy_customers,
|
||||
(SELECT APPROX_COUNT_DISTINCT(customer_id) FROM snack_buyers) AS snack_customers;
|
||||
-- result:
|
||||
1 1
|
||||
-- !result
|
||||
|
|
@ -0,0 +1,229 @@
|
|||
-- name: test_mv_rewrite_bugs1
|
||||
create database db_${uuid0};
|
||||
use db_${uuid0};
|
||||
|
||||
CREATE TABLE `sales_data` (
|
||||
`customer_id` bigint(20) NOT NULL,
|
||||
`order_id` bigint(20) NOT NULL,
|
||||
`line_item` smallint(6) NOT NULL,
|
||||
`order_date` datetime NOT NULL,
|
||||
`original_customer_id` varchar(1048576) NOT NULL,
|
||||
`original_order_id` varchar(1048576) NOT NULL,
|
||||
`month_order` datetime NULL,
|
||||
`week_order` datetime NULL,
|
||||
`quarter_order` datetime NULL,
|
||||
`year_order` datetime NULL,
|
||||
`store_name` varchar(1048576) NULL,
|
||||
`retailer_brand` varchar(1048576) NULL,
|
||||
`sales_channel` varchar(1048576) NULL,
|
||||
`parent_company` varchar(1048576) NULL,
|
||||
`store_postal_code` varchar(1048576) NULL,
|
||||
`state` varchar(1048576) NULL,
|
||||
`region` varchar(18) NULL,
|
||||
`age` decimal(16, 0) NULL,
|
||||
`generation` varchar(17) NULL,
|
||||
`gender` varchar(11) NULL,
|
||||
`is_adult` boolean NULL,
|
||||
`is_hispanic` boolean NULL,
|
||||
`recent_login` boolean NULL,
|
||||
`customer_postal_code` varchar(1048576) NULL,
|
||||
`loyalty_id` varchar(1048576) NULL,
|
||||
`product_code` varchar(1048576) NULL,
|
||||
`brand` varchar(1048576) NULL,
|
||||
`manufacturer` varchar(1048576) NULL,
|
||||
`category_l1` varchar(1048576) NULL,
|
||||
`category_l2` varchar(1048576) NULL,
|
||||
`category_l3` varchar(1048576) NULL,
|
||||
`category_l4` varchar(1048576) NULL,
|
||||
`product_description` varchar(1048576) NULL,
|
||||
`private_label` boolean NULL,
|
||||
`digital_receipt` boolean NULL,
|
||||
`quantity` decimal(10, 2) NULL,
|
||||
`sales_amount` decimal(10, 2) NULL,
|
||||
`loyalty_program_a` boolean NULL,
|
||||
`loyalty_program_b` boolean NULL,
|
||||
`loyalty_program_c` boolean NULL,
|
||||
`category_partition_key` varchar(1048576) NULL,
|
||||
`load_timestamp` datetime NULL
|
||||
) ENGINE=OLAP
|
||||
PRIMARY KEY(`customer_id`, `order_id`, `line_item`, `order_date`)
|
||||
PARTITION BY date_trunc('day', order_date)
|
||||
DISTRIBUTED BY HASH(`customer_id`) BUCKETS 12
|
||||
ORDER BY(`brand`, `category_l1`)
|
||||
PROPERTIES (
|
||||
"colocate_with" = "sales_data_orders",
|
||||
"compression" = "LZ4",
|
||||
"enable_persistent_index" = "true",
|
||||
"replication_num" = "1"
|
||||
);
|
||||
|
||||
INSERT INTO `sales_data` (
|
||||
`customer_id`, `order_id`, `line_item`, `order_date`,
|
||||
`original_customer_id`, `original_order_id`,
|
||||
`month_order`, `week_order`, `quarter_order`, `year_order`,
|
||||
`store_name`, `retailer_brand`, `sales_channel`, `parent_company`,
|
||||
`store_postal_code`, `state`, `region`, `age`, `generation`, `gender`,
|
||||
`is_adult`, `is_hispanic`, `recent_login`,
|
||||
`customer_postal_code`, `loyalty_id`, `product_code`, `brand`, `manufacturer`,
|
||||
`category_l1`, `category_l2`, `category_l3`, `category_l4`, `product_description`,
|
||||
`private_label`, `digital_receipt`, `quantity`, `sales_amount`,
|
||||
`loyalty_program_a`, `loyalty_program_b`, `loyalty_program_c`,
|
||||
`category_partition_key`, `load_timestamp`
|
||||
) VALUES
|
||||
(10001, 500001, 1, '2025-09-01 10:15:00',
|
||||
'10001', '500001',
|
||||
'2025-09-01', '2025-09-01', '2025-07-01', '2025-01-01',
|
||||
'SuperMart Downtown', 'SuperMart', 'Offline', 'SuperMart Inc.',
|
||||
'10001', 'NY', 'Northeast', 32, 'Millennial', 'F',
|
||||
TRUE, FALSE, TRUE,
|
||||
'10001', 'LOYAL123', '1234567890123', 'Store Brand', 'SuperMart',
|
||||
'Dairy', 'Milk', 'Whole Milk', NULL, '1 Gallon Whole Milk',
|
||||
FALSE, TRUE, 1.00, 3.49,
|
||||
FALSE, TRUE, FALSE,
|
||||
'Dairy', '2025-09-01 12:00:00'),
|
||||
(10002, 500002, 1, '2025-09-02 15:30:00',
|
||||
'10002', '500002',
|
||||
'2025-09-01', '2025-09-01', '2025-07-01', '2025-01-01',
|
||||
'MegaStore', 'MegaStore', 'Offline', 'MegaStore Corp.',
|
||||
'90001', 'CA', 'West', 25, 'Gen Z', 'M',
|
||||
TRUE, TRUE, FALSE,
|
||||
'90001', 'LOYAL456', '9876543210987', 'CrunchyChips', 'SnackCorp',
|
||||
'Snacks', 'Chips', NULL, NULL, 'Classic Potato Chips',
|
||||
FALSE, FALSE, 2.00, 5.98,
|
||||
FALSE, TRUE, TRUE,
|
||||
'Snacks', '2025-09-02 16:00:00'),
|
||||
(10003, 500003, 1, '2025-09-03 09:45:00',
|
||||
'10003', '500003',
|
||||
'2025-09-01', '2025-09-01', '2025-07-01', '2025-01-01',
|
||||
'BulkMart', 'BulkMart', 'Offline', 'BulkMart Wholesale',
|
||||
'77001', 'TX', 'South', 29, 'Millennial', 'F',
|
||||
TRUE, FALSE, TRUE,
|
||||
'77001', 'LOYAL789', '5555555555555', 'BabyComfort', 'BabyCorp',
|
||||
'Baby', 'Diapers', NULL, NULL, 'Comfort Diapers Size 2',
|
||||
FALSE, TRUE, 1.00, 39.99,
|
||||
TRUE, FALSE, FALSE,
|
||||
'Baby', '2025-09-03 10:00:00');
|
||||
|
||||
CREATE MATERIALIZED VIEW `sales_data_mv2`
|
||||
PARTITION BY (date_trunc('day', `order_date`))
|
||||
DISTRIBUTED BY HASH(`customer_id`) BUCKETS 12
|
||||
ORDER BY (category_l1, category_l2, category_l3)
|
||||
REFRESH MANUAL
|
||||
PROPERTIES (
|
||||
"replicated_storage" = "true",
|
||||
"replication_num" = "1",
|
||||
"partition_refresh_number" = "60",
|
||||
"bloom_filter_columns" = "category_l1, category_l2, category_l3, product_description, manufacturer, retailer_brand, store_name",
|
||||
"colocate_with" = "sales_data_orders"
|
||||
)
|
||||
AS SELECT
|
||||
`sales_data`.`customer_id`,
|
||||
`sales_data`.`order_id`,
|
||||
`sales_data`.`line_item`,
|
||||
`sales_data`.`order_date`,
|
||||
`sales_data`.`month_order`,
|
||||
`sales_data`.`week_order`,
|
||||
`sales_data`.`quarter_order`,
|
||||
`sales_data`.`year_order`,
|
||||
`sales_data`.`store_name`,
|
||||
`sales_data`.`retailer_brand`,
|
||||
`sales_data`.`sales_channel`,
|
||||
`sales_data`.`parent_company`,
|
||||
`sales_data`.`store_postal_code`,
|
||||
`sales_data`.`state`,
|
||||
`sales_data`.`region`,
|
||||
`sales_data`.`age`,
|
||||
`sales_data`.`generation`,
|
||||
`sales_data`.`gender`,
|
||||
`sales_data`.`is_adult`,
|
||||
`sales_data`.`is_hispanic`,
|
||||
`sales_data`.`recent_login`,
|
||||
`sales_data`.`customer_postal_code`,
|
||||
`sales_data`.`loyalty_id`,
|
||||
`sales_data`.`product_code`,
|
||||
`sales_data`.`brand`,
|
||||
`sales_data`.`manufacturer`,
|
||||
`sales_data`.`category_l1`,
|
||||
`sales_data`.`category_l2`,
|
||||
`sales_data`.`category_l3`,
|
||||
`sales_data`.`category_l4`,
|
||||
`sales_data`.`product_description`,
|
||||
`sales_data`.`private_label`,
|
||||
`sales_data`.`digital_receipt`,
|
||||
`sales_data`.`quantity`,
|
||||
`sales_data`.`sales_amount`,
|
||||
`sales_data`.`loyalty_program_a`,
|
||||
`sales_data`.`loyalty_program_b`,
|
||||
`sales_data`.`loyalty_program_c`,
|
||||
`sales_data`.`category_partition_key`,
|
||||
`sales_data`.`load_timestamp`
|
||||
FROM `sales_data`;
|
||||
|
||||
CREATE MATERIALIZED VIEW `sales_data_mv1`
|
||||
PARTITION BY (date_trunc('day', `order_date`))
|
||||
DISTRIBUTED BY HASH(`order_id`) BUCKETS 12
|
||||
ORDER BY (brand, category_l1)
|
||||
REFRESH MANUAL
|
||||
PROPERTIES (
|
||||
"replicated_storage" = "true",
|
||||
"replication_num" = "1",
|
||||
"partition_refresh_number" = "60",
|
||||
"bloom_filter_columns" = "brand, category_l1, category_l2, category_l3, product_description, manufacturer, retailer_brand, store_name",
|
||||
"colocate_with" = "sales_data_mv1_orders_mv1"
|
||||
)
|
||||
AS SELECT
|
||||
`sales_data`.`customer_id`,
|
||||
`sales_data`.`order_id`,
|
||||
`sales_data`.`line_item`,
|
||||
`sales_data`.`order_date`,
|
||||
`sales_data`.`month_order`,
|
||||
`sales_data`.`week_order`,
|
||||
`sales_data`.`quarter_order`,
|
||||
`sales_data`.`year_order`,
|
||||
`sales_data`.`store_name`,
|
||||
`sales_data`.`retailer_brand`,
|
||||
`sales_data`.`sales_channel`,
|
||||
`sales_data`.`parent_company`,
|
||||
`sales_data`.`store_postal_code`,
|
||||
`sales_data`.`state`,
|
||||
`sales_data`.`region`,
|
||||
`sales_data`.`age`,
|
||||
`sales_data`.`generation`,
|
||||
`sales_data`.`gender`,
|
||||
`sales_data`.`is_adult`,
|
||||
`sales_data`.`is_hispanic`,
|
||||
`sales_data`.`recent_login`,
|
||||
`sales_data`.`customer_postal_code`,
|
||||
`sales_data`.`loyalty_id`,
|
||||
`sales_data`.`product_code`,
|
||||
`sales_data`.`brand`,
|
||||
`sales_data`.`manufacturer`,
|
||||
`sales_data`.`category_l1`,
|
||||
`sales_data`.`category_l2`,
|
||||
`sales_data`.`category_l3`,
|
||||
`sales_data`.`category_l4`,
|
||||
`sales_data`.`product_description`,
|
||||
`sales_data`.`private_label`,
|
||||
`sales_data`.`digital_receipt`,
|
||||
`sales_data`.`quantity`,
|
||||
`sales_data`.`sales_amount`,
|
||||
`sales_data`.`loyalty_program_a`,
|
||||
`sales_data`.`loyalty_program_b`,
|
||||
`sales_data`.`loyalty_program_c`,
|
||||
`sales_data`.`category_partition_key`,
|
||||
`sales_data`.`load_timestamp`
|
||||
FROM `sales_data`;
|
||||
|
||||
WITH snack_buyers AS (
|
||||
SELECT DISTINCT customer_id
|
||||
FROM sales_data
|
||||
WHERE category_l1 = 'Snacks'
|
||||
),
|
||||
dairy_buyers AS (
|
||||
SELECT DISTINCT customer_id
|
||||
FROM sales_data
|
||||
WHERE category_l1 = 'Dairy'
|
||||
)
|
||||
SELECT
|
||||
(SELECT APPROX_COUNT_DISTINCT(customer_id) FROM dairy_buyers) AS dairy_customers,
|
||||
(SELECT APPROX_COUNT_DISTINCT(customer_id) FROM snack_buyers) AS snack_customers;
|
||||
Loading…
Reference in New Issue