[BugFix] Fix mv refresh bug with case-insensitive partition names (backport #62389) (#62444)

Signed-off-by: shuming.li <ming.moriarty@gmail.com>
Co-authored-by: shuming.li <ming.moriarty@gmail.com>
This commit is contained in:
mergify[bot] 2025-08-28 16:24:19 +08:00 committed by GitHub
parent c03f27bca3
commit b045d6efbd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 383 additions and 41 deletions

View File

@ -88,6 +88,10 @@ public class CatalogUtils {
if (partitionDesc.isSetIfNotExists()) {
existPartitionNameSet.add(partitionName);
} else {
// add more information for user
Partition existedPartition = olapTable.getPartition(partitionName);
LOG.warn("Duplicate partition name {}, existed partition:{}, current partition:{}", partitionName,
existedPartition, partitionDesc);
ErrorReport.reportDdlException(ErrorCode.ERR_SAME_NAME_PARTITION, partitionName);
}
}

View File

@ -115,6 +115,7 @@ import com.starrocks.sql.ast.UpdateStmt;
import com.starrocks.sql.ast.ValuesRelation;
import com.starrocks.sql.ast.ViewRelation;
import com.starrocks.sql.common.ErrorType;
import com.starrocks.sql.common.PCell;
import com.starrocks.sql.common.PListCell;
import com.starrocks.sql.common.StarRocksPlannerException;
import com.starrocks.sql.optimizer.operator.scalar.CallOperator;
@ -1426,7 +1427,7 @@ public class AnalyzerUtils {
ImmutableMap.of("replication_num", String.valueOf(replicationNum));
// table partitions for check
TreeMap<String, PListCell> tablePartitions = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
TreeMap<String, PCell> tablePartitions = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
tablePartitions.putAll(olapTable.getListPartitionItems());
List<String> partitionColNames = Lists.newArrayList();
List<PartitionDesc> partitionDescs = Lists.newArrayList();
@ -1474,8 +1475,8 @@ public class AnalyzerUtils {
/**
* Calculate the unique partition name for list partition.
*/
private static String calculateUniquePartitionName(String partitionName, PListCell cell,
Map<String, PListCell> tablePartitions) throws AnalysisException {
public static String calculateUniquePartitionName(String partitionName, PCell cell,
Map<String, PCell> tablePartitions) throws AnalysisException {
String orignialPartitionName = partitionName;
int i = 0;
// If the partition name already exists and their partition values are different, change the partition name.

View File

@ -25,6 +25,8 @@ import com.starrocks.catalog.PartitionKey;
import com.starrocks.catalog.Table;
import com.starrocks.common.util.DebugUtil;
import com.starrocks.connector.PartitionUtil;
import com.starrocks.sql.analyzer.AnalyzerUtils;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -52,49 +54,72 @@ public final class ListPartitionDiffer extends PartitionDiffer {
* @return the list partition diff between the base table and the mv
*/
public static PartitionDiff getListPartitionDiff(Map<String, PCell> baseItems,
Map<String, PCell> mvItems) {
Map<String, PCell> mvItems,
Set<String> uniqueResultNames) {
// This synchronization method has a one-to-one correspondence
// between the base table and the partition of the mv.
Map<String, PCell> adds = diffList(baseItems, mvItems);
Map<String, PCell> deletes = diffList(mvItems, baseItems);
// for addition, we need to ensure the partition name is unique in case-insensitive
Map<String, PCell> adds = diffList(baseItems, mvItems, uniqueResultNames);
// for deletion, we don't need to ensure the partition name is unique since mvItems is used as the reference
Map<String, PCell> deletes = diffList(mvItems, baseItems, null);
return new PartitionDiff(adds, deletes);
}
/**
* Iterate srcListMap, if the partition name is not in dstListMap or the partition value is different, add into result.
* @param srcListMap src partition list map
* @param dstListMap dst partition list map
* @return the different partition list map
* When `uniqueResultNames` is set, use it to ensure the output partition name is unique in case-insensitive.
* NOTE: Ensure output map keys are distinct in case-insensitive which is because the key is used for partition name,
* and StarRocks partition name is case-insensitive.
*/
public static Map<String, PCell> diffList(Map<String, PCell> srcListMap,
Map<String, PCell> dstListMap) {
Map<String, PCell> result = Maps.newTreeMap();
Map<String, PCell> dstListMap,
Set<String> uniqueResultNames) {
if (CollectionUtils.sizeIsEmpty(srcListMap)) {
return Maps.newHashMap();
}
// PListCell may contain multi values, we need to ensure they are not duplicated from each other
// NOTE: dstListMap's partition items may be duplicated, we need to collect them first
Map<PListAtom, PListCell> dstAtomMaps = Maps.newHashMap();
dstListMap.values().stream()
.forEach(l -> {
Preconditions.checkArgument(l instanceof PListCell, "PListCell expected");
PListCell cell = (PListCell) l;
cell.toAtoms().stream().forEach(x -> dstAtomMaps.put(x, cell));
});
for (PCell l : dstListMap.values()) {
Preconditions.checkArgument(l instanceof PListCell, "PListCell expected");
PListCell pListCell = (PListCell) l;
pListCell.toAtoms().stream().forEach(x -> dstAtomMaps.put(x, pListCell));
}
Map<String, PCell> result = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
for (Map.Entry<String, PCell> srcEntry : srcListMap.entrySet()) {
String key = srcEntry.getKey();
String pName = srcEntry.getKey();
PListCell srcItem = (PListCell) srcEntry.getValue();
if (srcItem.equals(dstListMap.get(key))) {
if (srcItem.equals(dstListMap.get(pName))) {
continue;
}
// distinct atoms
Set<PListAtom> srcAtoms = srcItem.toAtoms();
List<PListAtom> srcDistinctAtoms = srcAtoms.stream()
.filter(x -> !dstAtomMaps.containsKey(x))
.collect(Collectors.toList());
if (srcDistinctAtoms.isEmpty()) {
continue;
List<PListAtom> srcDistinctAtoms = srcItem.toAtoms().stream()
.filter(atom -> !dstAtomMaps.containsKey(atom))
.collect(Collectors.toList());
if (!srcDistinctAtoms.isEmpty()) {
srcDistinctAtoms.forEach(atom -> dstAtomMaps.put(atom, srcItem));
PListCell newValue = new PListCell(
srcDistinctAtoms.stream().map(PListAtom::getPartitionItem).collect(Collectors.toList()));
// ensure the partition name is unique
if (uniqueResultNames != null) {
if (uniqueResultNames.contains(pName)) {
try {
// it's fine to use result to keep it unique here, since we always
pName = AnalyzerUtils.calculateUniquePartitionName(pName, newValue, result);
} catch (Exception e) {
throw new RuntimeException("Fail to calculate unique partition name: " + e.getMessage());
}
}
uniqueResultNames.add(pName);
}
result.put(pName, newValue);
}
dstAtomMaps.putAll(srcDistinctAtoms.stream().collect(Collectors.toMap(x -> x, x -> srcItem)));
PListCell newValue =
new PListCell(srcDistinctAtoms.stream().map(x -> x.getPartitionItem()).collect(Collectors.toList()));
result.put(key, newValue);
}
return result;
}
@ -202,16 +227,17 @@ public final class ListPartitionDiffer extends PartitionDiffer {
public static Map<String, PCell> collectBasePartitionCells(Map<Table, Map<String, PCell>> basePartitionMaps) {
Map<String, PCell> allBasePartitionItems = Maps.newHashMap();
for (Map<String, PCell> e : basePartitionMaps.values()) {
// merge into a total map to compute the difference
e.entrySet()
.stream()
.forEach(x -> {
PListCell cell = (PListCell) allBasePartitionItems.computeIfAbsent(x.getKey(),
k -> new PListCell(Lists.newArrayList()));
cell.addItems(((PListCell) x.getValue()).getPartitionItems());
});
}
// NOTE: how to handle the partition name conflict between different base tables?
// case1: partition name not equal but partition value equal, it's ok
// case2: partition name is equal, but partition value is different,
// merge into a total map to compute the difference
basePartitionMaps.values().forEach(partitionMap ->
partitionMap.forEach((key, value) -> {
PListCell cell = (PListCell) allBasePartitionItems
.computeIfAbsent(key, k -> new PListCell(Lists.newArrayList()));
cell.addItems(((PListCell) value).getPartitionItems());
})
);
return allBasePartitionItems;
}
@ -234,8 +260,15 @@ public final class ListPartitionDiffer extends PartitionDiffer {
// TODO: prune the partitions based on ttl
Map<String, PCell> mvPartitionNameToListMap = mv.getPartitionCells(Optional.empty());
// collect all base table partition cells
Map<String, PCell> allBasePartitionItems = collectBasePartitionCells(refBaseTablePartitionMap);
PartitionDiff diff = ListPartitionDiffer.getListPartitionDiff(allBasePartitionItems, mvPartitionNameToListMap);
// ensure the result partition name is unique in case-insensitive
Set<String> uniqueResultNames = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER);
uniqueResultNames.addAll(mvPartitionNameToListMap.keySet());
PartitionDiff diff = ListPartitionDiffer.getListPartitionDiff(allBasePartitionItems,
mvPartitionNameToListMap, uniqueResultNames);
// collect external partition column mapping
Map<Table, Map<String, Set<String>>> externalPartitionMaps = Maps.newHashMap();

View File

@ -1551,4 +1551,61 @@ public class PCTRefreshListPartitionOlapTest extends MVTestBase {
" PREDICATES: 5: dt2 = '2025-05-17'\n" +
" partitions=1/1");
}
private void testMVWithDuplicatedPartitionNames(String sql1,
String sql2,
String mvName) throws Exception {
starRocksAssert.withTable("CREATE TABLE `s1` (\n" +
" `col1` varchar(100),\n" +
" `col2` varchar(100),\n" +
" `col3` bigint(20) \n" +
") PARTITION BY (`col1`)");
starRocksAssert.withTable("CREATE TABLE `s2` (\n" +
" `col1` varchar(100),\n" +
" `col2` varchar(100),\n" +
" `col3` bigint(20) \n" +
") PARTITION BY (`col1`)");
executeInsertSql(sql1);
executeInsertSql(sql2);
List<String> s1PartitionNames = extractColumnValues(sql1, 0);
System.out.println(s1PartitionNames);
addListPartition("s1", s1PartitionNames);
List<String> s2PartitionNames = extractColumnValues(sql2, 0);
System.out.println(s2PartitionNames);
addListPartition("s2", s2PartitionNames);
starRocksAssert.withRefreshedMaterializedView(String.format("CREATE MATERIALIZED VIEW %s\n" +
"PARTITION BY col1\n" +
"REFRESH DEFERRED MANUAL \n" +
"PROPERTIES (\n" +
" \"partition_refresh_number\" = \"-1\"\n" +
")\n" +
"AS\n" +
"select t1.col1, t1.col2 from s1 as t1 left join s2 as t2 on t1.col1 = t2.col1;\n",
mvName));
starRocksAssert.dropTable("s1");
starRocksAssert.dropTable("s2");
starRocksAssert.dropMaterializedView(mvName);
}
@Test
public void testCreateMVWithDuplicatedPartitionNames1() throws Exception {
String query1 = "insert into s1 values('demo-diu.com', 'b', 2), " +
"('demo-dIu.com', 'a', 1) , ('demo-Diu.com', 'b', 2), ('demo-DIU.com', 'a', 1), " +
"('demo-diu.com', 'b', 2);";
String query2 = "insert into s2 values('demo-DIU.com', 'a', 1) , ('demo-diu.com', 'b', 2), " +
"('demo-dIU.com', 'a', 1) , ('demo-Diu.com', 'b', 2);";
testMVWithDuplicatedPartitionNames(query1, query2, "test_mv1");
}
@Test
public void testCreateMVWithDuplicatedPartitionNames2() throws Exception {
String query1 = "insert into s1 values('demo-diu.com', 'b', 2), " +
"('demo-DIU.com', 'a', 1) , ('demo-diu.com', 'b', 2);";
String query2 = "insert into s2 values('demo-diu.com', 'b', 2), " +
"('demo-dIu.com', 'a', 1) , ('demo-Diu.com', 'b', 2), ('demo-DIU.com', 'a', 1), " +
"('demo-diu.com', 'b', 2);";
testMVWithDuplicatedPartitionNames(query1, query2, "test_mv2");
}
}

View File

@ -188,7 +188,8 @@ public class SyncPartitionUtilsTest {
private Map<String, PListCell> diffList(Map<String, PCell> baseListMap,
Map<String, PCell> mvListMap) {
Map<String, PCell> result = ListPartitionDiffer.diffList(baseListMap, mvListMap);
Map<String, PCell> result = ListPartitionDiffer.diffList(baseListMap, mvListMap,
mvListMap.keySet().stream().collect(Collectors.toSet()));
return result.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, entry -> (PListCell) entry.getValue()));
}

View File

@ -17,6 +17,7 @@ package com.starrocks.sql.optimizer.rule.transformation.materialization;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.starrocks.analysis.StringLiteral;
import com.starrocks.analysis.TableName;
@ -47,11 +48,14 @@ import com.starrocks.scheduler.TaskRun;
import com.starrocks.scheduler.TaskRunBuilder;
import com.starrocks.server.GlobalStateMgr;
import com.starrocks.sql.analyzer.Analyzer;
import com.starrocks.sql.analyzer.AnalyzerUtils;
import com.starrocks.sql.ast.CreateMaterializedViewStatement;
import com.starrocks.sql.ast.QueryRelation;
import com.starrocks.sql.ast.QueryStatement;
import com.starrocks.sql.ast.StatementBase;
import com.starrocks.sql.ast.SystemVariable;
import com.starrocks.sql.common.PCell;
import com.starrocks.sql.common.PListCell;
import com.starrocks.sql.optimizer.CachingMvPlanContextBuilder;
import com.starrocks.sql.optimizer.MaterializedViewOptimizer;
import com.starrocks.sql.optimizer.OptExpression;
@ -85,10 +89,13 @@ import org.junit.jupiter.api.io.TempDir;
import java.io.File;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
@ -601,4 +608,73 @@ public class MVTestBase extends StarRocksTestBase {
}
});
}
public static List<String> extractColumnValues(String sql, int columnIndex) {
List<String> result = new ArrayList<>();
// Regex pattern to match the VALUES clause and all parenthesized value groups
Pattern pattern = Pattern.compile("(?i)values\\s*([^)]+)(?:\\s*,\\s*([^)]+))*\\s*;?");
Matcher matcher = pattern.matcher(sql);
if (matcher.find()) {
// Process first set of values
processValueGroup(matcher.group(1), columnIndex, result);
// Process subsequent value groups
for (int i = 2; i <= matcher.groupCount(); i++) {
if (matcher.group(i) != null) {
processValueGroup(matcher.group(i), columnIndex, result);
}
}
}
return result;
}
private static void processValueGroup(String valueGroup, int columnIndex, List<String> result) {
// Split by commas but ignore commas inside quotes
String[] values = valueGroup.split(",(?=(?:[^']*'[^']*')*[^']*$)");
if (columnIndex < values.length) {
String value = values[columnIndex].trim();
// Remove surrounding quotes if present
value = cleanSqlValue(value);
result.add(value);
}
}
public static String cleanSqlValue(String input) {
if (input == null) {
return null;
}
String result = input.trim();
int len = result.length();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < len; i++) {
if (result.charAt(i) == '\'' || result.charAt(i) == '\"' || result.charAt(i) == '`' ||
result.charAt(i) == '(' || result.charAt(i) == ')') {
continue;
} else {
sb.append(result.charAt(i));
}
}
return sb.toString();
}
protected void addListPartition(String tbl, List<String> values) {
Map<String, PCell> partitions = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
for (String val : values) {
PListCell pListCell = new PListCell(val);
String pName = AnalyzerUtils.getFormatPartitionValue(val);
if (partitions.containsKey(pName)) {
try {
pName = AnalyzerUtils.calculateUniquePartitionName(pName, pListCell, partitions);
} catch (Exception e) {
Assertions.fail("add partition failed:" + e);
}
}
partitions.put(pName, pListCell);
addListPartition(tbl, pName, val);
}
}
}

View File

@ -0,0 +1,103 @@
-- name: test_mv_refresh_list_partitions_with_duplicated_partition_names
create database db_${uuid0};
-- result:
-- !result
use db_${uuid0};
-- result:
-- !result
CREATE TABLE `t1` (
`col1` varchar(100) NOT NULL COMMENT "",
`col2` varchar(100) NULL COMMENT "",
`col3` bigint(20) NULL COMMENT ""
) ENGINE=OLAP
PRIMARY KEY(`col1`)
PARTITION BY (`col1`)
DISTRIBUTED BY HASH(`col1`) BUCKETS 5
ORDER BY(`col2`)
PROPERTIES (
"compression" = "LZ4",
"enable_persistent_index" = "true",
"fast_schema_evolution" = "true",
"replicated_storage" = "true",
"replication_num" = "1"
);
-- result:
-- !result
insert into t1 values('demo-diu.com', 'b', 2), ('demo-DIU.com', 'a', 1) , ('demo-diu.com', 'b', 2);
-- result:
-- !result
insert into t1 values('demo-dIU.com', 'a', 1) , ('demo-Diu.com', 'b', 2);
-- result:
-- !result
CREATE TABLE `t2` (
`col1` varchar(100) NOT NULL COMMENT "",
`col2` varchar(100) NULL COMMENT "",
`col3` bigint(20) NULL COMMENT ""
) ENGINE=OLAP
PRIMARY KEY(`col1`)
PARTITION BY (`col1`)
DISTRIBUTED BY HASH(`col1`) BUCKETS 5
ORDER BY(`col2`)
PROPERTIES (
"compression" = "LZ4",
"enable_persistent_index" = "true",
"fast_schema_evolution" = "true",
"replicated_storage" = "true",
"replication_num" = "1"
);
-- result:
-- !result
insert into t2 values('demo-DIU.com', 'a', 1) , ('demo-diu.com', 'b', 2), ('demo-dIU.com', 'a', 1) , ('demo-Diu.com', 'b', 2);
-- result:
-- !result
CREATE MATERIALIZED VIEW test_mv1
PARTITION BY col1
DISTRIBUTED BY HASH(col1) BUCKETS 5
REFRESH DEFERRED MANUAL
PROPERTIES (
"partition_refresh_number" = "-1"
)
AS
select t1.col1, t1.col2 from t1 left join t2 on t1.col1 = t2.col1;
-- result:
-- !result
REFRESH MATERIALIZED VIEW test_mv1 WITH SYNC MODE;
SELECT * FROM test_mv1 ORDER BY col1, col2;
-- result:
demo-DIU.com a
demo-Diu.com b
demo-dIU.com a
demo-diu.com b
-- !result
insert into t1 values('demo-diu.com', 'b', 2), ('demo-DIU.com', 'a', 1) , ('demo-diu.com', 'b', 2);
-- result:
-- !result
REFRESH MATERIALIZED VIEW test_mv1 WITH SYNC MODE;
SELECT * FROM test_mv1 ORDER BY col1, col2;
-- result:
demo-DIU.com a
demo-Diu.com b
demo-dIU.com a
demo-diu.com b
-- !result
insert into t1 values('demo-DIU.com', 'a', 1) , ('demo-DiU.com', 'b', 2);
-- result:
-- !result
insert into t2 values('demo-DIU.com', 'a', 1) , ('demo-dIu.com', 'b', 2), ('demo-dIU.com', 'a', 1) , ('demo-DIu.com', 'b', 2);
-- result:
-- !result
insert into t1 values('demo-diu.com', 'b', 2), ('demo-DIU.com', 'a', 1) , ('demo-diu.com', 'b', 2);
-- result:
-- !result
REFRESH MATERIALIZED VIEW test_mv1 WITH SYNC MODE;
SELECT * FROM test_mv1 ORDER BY col1, col2;
-- result:
demo-DIU.com a
demo-DiU.com b
demo-Diu.com b
demo-dIU.com a
demo-diu.com b
-- !result
DROP database db_${uuid0};
-- result:
-- !result

View File

@ -0,0 +1,67 @@
-- name: test_mv_refresh_list_partitions_with_duplicated_partition_names
create database db_${uuid0};
use db_${uuid0};
CREATE TABLE `t1` (
`col1` varchar(100) NOT NULL COMMENT "",
`col2` varchar(100) NULL COMMENT "",
`col3` bigint(20) NULL COMMENT ""
) ENGINE=OLAP
PRIMARY KEY(`col1`)
PARTITION BY (`col1`)
DISTRIBUTED BY HASH(`col1`) BUCKETS 5
ORDER BY(`col2`)
PROPERTIES (
"compression" = "LZ4",
"enable_persistent_index" = "true",
"fast_schema_evolution" = "true",
"replicated_storage" = "true",
"replication_num" = "1"
);
insert into t1 values('demo-diu.com', 'b', 2), ('demo-DIU.com', 'a', 1) , ('demo-diu.com', 'b', 2);
insert into t1 values('demo-dIU.com', 'a', 1) , ('demo-Diu.com', 'b', 2);
CREATE TABLE `t2` (
`col1` varchar(100) NOT NULL COMMENT "",
`col2` varchar(100) NULL COMMENT "",
`col3` bigint(20) NULL COMMENT ""
) ENGINE=OLAP
PRIMARY KEY(`col1`)
PARTITION BY (`col1`)
DISTRIBUTED BY HASH(`col1`) BUCKETS 5
ORDER BY(`col2`)
PROPERTIES (
"compression" = "LZ4",
"enable_persistent_index" = "true",
"fast_schema_evolution" = "true",
"replicated_storage" = "true",
"replication_num" = "1"
);
insert into t2 values('demo-DIU.com', 'a', 1) , ('demo-diu.com', 'b', 2), ('demo-dIU.com', 'a', 1) , ('demo-Diu.com', 'b', 2);
CREATE MATERIALIZED VIEW test_mv1
PARTITION BY col1
DISTRIBUTED BY HASH(col1) BUCKETS 5
REFRESH DEFERRED MANUAL
PROPERTIES (
"partition_refresh_number" = "-1"
)
AS
select t1.col1, t1.col2 from t1 left join t2 on t1.col1 = t2.col1;
REFRESH MATERIALIZED VIEW test_mv1 WITH SYNC MODE;
SELECT * FROM test_mv1 ORDER BY col1, col2;
insert into t1 values('demo-diu.com', 'b', 2), ('demo-DIU.com', 'a', 1) , ('demo-diu.com', 'b', 2);
REFRESH MATERIALIZED VIEW test_mv1 WITH SYNC MODE;
SELECT * FROM test_mv1 ORDER BY col1, col2;
insert into t1 values('demo-DIU.com', 'a', 1) , ('demo-DiU.com', 'b', 2);
insert into t2 values('demo-DIU.com', 'a', 1) , ('demo-dIu.com', 'b', 2), ('demo-dIU.com', 'a', 1) , ('demo-DIu.com', 'b', 2);
insert into t1 values('demo-diu.com', 'b', 2), ('demo-DIU.com', 'a', 1) , ('demo-diu.com', 'b', 2);
REFRESH MATERIALIZED VIEW test_mv1 WITH SYNC MODE;
SELECT * FROM test_mv1 ORDER BY col1, col2;
DROP database db_${uuid0};