[BugFix] Fix incompatible zonemap reuse for fast schema evolution in shared-data (backport #63143) (#63318)

Signed-off-by: PengFei Li <lpengfei2016@gmail.com>
This commit is contained in:
PengFei Li 2025-09-20 21:53:43 +08:00 committed by GitHub
parent f338bc6ad8
commit 060b5f60a4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 277 additions and 11 deletions

View File

@ -64,6 +64,7 @@ import com.starrocks.catalog.OlapTable.OlapTableState;
import com.starrocks.catalog.Partition;
import com.starrocks.catalog.PhysicalPartition;
import com.starrocks.catalog.Replica;
import com.starrocks.catalog.SchemaChangeTypeCompatibility;
import com.starrocks.catalog.SchemaInfo;
import com.starrocks.catalog.StructField;
import com.starrocks.catalog.StructType;
@ -912,6 +913,10 @@ public class SchemaChangeHandler extends AlterHandler {
}
}
if (!SchemaChangeTypeCompatibility.canReuseZonemapIndex(oriColumn.getType(), modColumn.getType())) {
fastSchemaEvolution = false;
}
return fastSchemaEvolution;
}

View File

@ -471,7 +471,7 @@ public class Column implements Writable, GsonPreProcessable, GsonPostProcessable
throw new DdlException("Dest column name is empty");
}
if (!ColumnType.isSchemaChangeAllowed(type, other.type)) {
if (!SchemaChangeTypeCompatibility.isSchemaChangeAllowed(type, other.type)) {
throw new DdlException("Can not change " + getType() + " to " + other.getType());
}

View File

@ -34,7 +34,7 @@
package com.starrocks.catalog;
public abstract class ColumnType {
public class SchemaChangeTypeCompatibility {
private static Boolean[][] schemaChangeMatrix;
static {
@ -150,11 +150,98 @@ public abstract class ColumnType {
return false;
}
static boolean isSchemaChangeAllowed(Type lhs, Type rhs) {
public static boolean isSchemaChangeAllowed(Type lhs, Type rhs) {
if (lhs.isDecimalV3() || rhs.isDecimalV3()) {
return isSchemaChangeAllowedInvolvingDecimalV3(lhs, rhs);
}
return schemaChangeMatrix[lhs.getPrimitiveType().ordinal()][rhs.getPrimitiveType().ordinal()];
}
/**
* Matrix defining allowed type conversions for ZoneMap index reuse during schema change.
* ZoneMap indexes store min/max values and nullability information (has_null, has_not_null) for data blocks.
* For a ZoneMap index to be reusable after a type conversion, the following conditions must be met:
* 1. The type conversion must be monotonically non-decreasing, ensuring that the original min/max values remain valid
* min/max boundaries after conversion.
* 2. The type conversion must not change non-null values to null, ensuring that has_null/has_not_null metadata
* remains accurate.
*
* For example, converting a `STRING` column to an `INT` column can not reuse zonemap index:
* - Min/Max change: For values like "16", "423", "5", "97" in string format, min is "16" and max is "97".
* After conversion to integers (5, 16, 97, 423), the min becomes 5 and max becomes 423. The original zonemap
* (min="16", max="97") would incorrectly prune valid data (e.g., a query for `WHERE col = 5`).
* - Nullability change: If the string column contains values like "abc", converting it to `INT` would result in `NULL`.
* If the original column had `has_null=false`, this conversion would make the zonemap's nullability metadata incorrect.
*/
private static final Boolean[][] ZONEMAP_REUSE_COMPATIBILITY_MATRIX;
static {
ZONEMAP_REUSE_COMPATIBILITY_MATRIX = new Boolean[PrimitiveType.values().length][PrimitiveType.values().length];
for (int i = 0; i < ZONEMAP_REUSE_COMPATIBILITY_MATRIX.length; i++) {
for (int j = 0; j < ZONEMAP_REUSE_COMPATIBILITY_MATRIX[i].length; j++) {
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[i][j] = (i > 0 && i == j); // 0 is PrimitiveType.INVALID_TYPE
}
}
// Integer family
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.TINYINT.ordinal()][PrimitiveType.SMALLINT.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.TINYINT.ordinal()][PrimitiveType.INT.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.TINYINT.ordinal()][PrimitiveType.BIGINT.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.TINYINT.ordinal()][PrimitiveType.LARGEINT.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.TINYINT.ordinal()][PrimitiveType.DOUBLE.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.SMALLINT.ordinal()][PrimitiveType.INT.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.SMALLINT.ordinal()][PrimitiveType.BIGINT.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.SMALLINT.ordinal()][PrimitiveType.LARGEINT.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.SMALLINT.ordinal()][PrimitiveType.DOUBLE.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.INT.ordinal()][PrimitiveType.BIGINT.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.INT.ordinal()][PrimitiveType.LARGEINT.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.INT.ordinal()][PrimitiveType.DOUBLE.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.BIGINT.ordinal()][PrimitiveType.LARGEINT.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.BIGINT.ordinal()][PrimitiveType.DOUBLE.ordinal()] = true;
// Floating-point family
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.FLOAT.ordinal()][PrimitiveType.DOUBLE.ordinal()] = true;
// Decimal family
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.DECIMALV2.ordinal()][PrimitiveType.DECIMAL128.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.DECIMAL32.ordinal()][PrimitiveType.DECIMAL64.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.DECIMAL32.ordinal()][PrimitiveType.DECIMAL128.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.DECIMAL32.ordinal()][PrimitiveType.DECIMAL256.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.DECIMAL64.ordinal()][PrimitiveType.DECIMAL128.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.DECIMAL64.ordinal()][PrimitiveType.DECIMAL256.ordinal()] = true;
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.DECIMAL128.ordinal()][PrimitiveType.DECIMAL256.ordinal()] = true;
// Date/Datetime family
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.DATE.ordinal()][PrimitiveType.DATETIME.ordinal()] = true;
// String family
ZONEMAP_REUSE_COMPATIBILITY_MATRIX[PrimitiveType.CHAR.ordinal()][PrimitiveType.VARCHAR.ordinal()] = true;
}
/**
* Determines if a type conversion allows ZoneMap indexes to be reused.
* <p>
* This method evaluates two conditions:
* 1. If the source type does not support ZoneMap, no index exists to reuse, so it returns true.
* 2. For ZoneMap-supported source types, it checks if the target type is within the predefined compatibility matrix.
* <p>
* It assumes {@link #isSchemaChangeAllowed(Type, Type)} has been checked.
*
* @param fromType The original column type.
* @param toType The new column type.
* @return True if the ZoneMap index can be reused for the type promotion, or if ZoneMap is not applicable; otherwise, false.
*/
public static boolean canReuseZonemapIndex(Type fromType, Type toType) {
if (!fromType.supportZoneMap()) {
return true;
}
return ZONEMAP_REUSE_COMPATIBILITY_MATRIX[fromType.getPrimitiveType().ordinal()][toType.getPrimitiveType().ordinal()];
}
}

View File

@ -904,6 +904,10 @@ public abstract class Type implements Cloneable {
!isJsonType() && !isOnlyMetricType() && !isFunctionType() && !isBinaryType();
}
public boolean supportZoneMap() {
return isScalarType() && (isNumericType() || isDateType() || isStringType());
}
public static final String NOT_SUPPORT_JOIN_ERROR_MSG =
"Type (nested) percentile/hll/bitmap/json not support join";

View File

@ -266,15 +266,23 @@ public class LakeTableAsyncFastSchemaChangeJobTest extends StarRocksTestBase {
@Test
public void testModifyColumnType() throws Exception {
LakeTable table = createTable(connectContext, "CREATE TABLE t_modify_type" +
"(c0 INT, c1 INT, c2 VARCHAR(5), c3 DATE)" +
"DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) " +
"BUCKETS 1 PROPERTIES('fast_schema_evolution'='true')");
"(c0 INT, c1 INT, c2 FLOAT, c3 DATE, c4 VARCHAR(10))" +
"DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) " +
"BUCKETS 1 PROPERTIES('fast_schema_evolution'='true')");
long oldSchemaId = table.getIndexIdToMeta().get(table.getBaseIndexId()).getSchemaId();
// zonemap index can be reused
{
executeAlterAndWaitFinish(table, "ALTER TABLE t_modify_type MODIFY COLUMN c1 BIGINT, MODIFY COLUMN c2 VARCHAR(10)," +
"MODIFY COLUMN c3 DATETIME", true);
String alterSql = """
ALTER TABLE t_modify_type
MODIFY COLUMN c1 BIGINT,
MODIFY COLUMN c2 DOUBLE,
MODIFY COLUMN c3 DATETIME,
MODIFY COLUMN c4 VARCHAR(20)
""";
executeAlterAndWaitFinish(table, alterSql, true);
List<Column> columns = table.getBaseSchema();
Assertions.assertEquals(4, columns.size());
Assertions.assertEquals(5, columns.size());
Assertions.assertEquals("c0", columns.get(0).getName());
Assertions.assertEquals(0, columns.get(0).getUniqueId());
@ -286,16 +294,29 @@ public class LakeTableAsyncFastSchemaChangeJobTest extends StarRocksTestBase {
Assertions.assertEquals("c2", columns.get(2).getName());
Assertions.assertEquals(2, columns.get(2).getUniqueId());
Assertions.assertEquals(PrimitiveType.VARCHAR, columns.get(2).getType().getPrimitiveType());
Assertions.assertEquals(10, ((ScalarType) columns.get(2).getType()).getLength());
Assertions.assertEquals(ScalarType.DOUBLE, columns.get(2).getType());
Assertions.assertEquals("c3", columns.get(3).getName());
Assertions.assertEquals(3, columns.get(3).getUniqueId());
Assertions.assertEquals(ScalarType.DATETIME, columns.get(3).getType());
Assertions.assertEquals("c4", columns.get(4).getName());
Assertions.assertEquals(4, columns.get(4).getUniqueId());
Assertions.assertEquals(PrimitiveType.VARCHAR, columns.get(4).getType().getPrimitiveType());
Assertions.assertEquals(20, ((ScalarType) columns.get(4).getType()).getLength());
Assertions.assertTrue(table.getIndexIdToMeta().get(table.getBaseIndexId()).getSchemaId() > oldSchemaId);
Assertions.assertEquals(OlapTable.OlapTableState.NORMAL, table.getState());
}
// zonemap index can not be reused
{
executeAlterAndWaitFinish(table, "ALTER TABLE t_modify_type MODIFY COLUMN c3 DATE", false);
Assertions.assertEquals(ScalarType.DATE, table.getBaseSchema().get(3).getType());
executeAlterAndWaitFinish(table, "ALTER TABLE t_modify_type MODIFY COLUMN c4 INT", false);
Assertions.assertEquals(ScalarType.INT, table.getBaseSchema().get(4).getType());
}
}
private List<Column> getShortKeyColumns(Database db, OlapTable table) {

View File

@ -0,0 +1,113 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.starrocks.catalog;
import org.junit.jupiter.api.Test;
import static com.starrocks.catalog.SchemaChangeTypeCompatibility.canReuseZonemapIndex;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class SchemaChangeTypeCompatibilityTest {
@Test
public void testZoneMapIndexReuse() {
assertTrue(canReuseZonemapIndex(Type.TINYINT, Type.TINYINT));
assertTrue(canReuseZonemapIndex(Type.TINYINT, Type.SMALLINT));
assertTrue(canReuseZonemapIndex(Type.TINYINT, Type.INT));
assertTrue(canReuseZonemapIndex(Type.TINYINT, Type.BIGINT));
assertTrue(canReuseZonemapIndex(Type.TINYINT, Type.LARGEINT));
assertTrue(canReuseZonemapIndex(Type.TINYINT, Type.DOUBLE));
assertTrue(canReuseZonemapIndex(Type.SMALLINT, Type.SMALLINT));
assertTrue(canReuseZonemapIndex(Type.SMALLINT, Type.INT));
assertTrue(canReuseZonemapIndex(Type.SMALLINT, Type.BIGINT));
assertTrue(canReuseZonemapIndex(Type.SMALLINT, Type.LARGEINT));
assertTrue(canReuseZonemapIndex(Type.SMALLINT, Type.DOUBLE));
assertTrue(canReuseZonemapIndex(Type.INT, Type.INT));
assertTrue(canReuseZonemapIndex(Type.INT, Type.BIGINT));
assertTrue(canReuseZonemapIndex(Type.INT, Type.LARGEINT));
assertTrue(canReuseZonemapIndex(Type.INT, Type.DOUBLE));
assertTrue(canReuseZonemapIndex(Type.BIGINT, Type.BIGINT));
assertTrue(canReuseZonemapIndex(Type.BIGINT, Type.LARGEINT));
assertTrue(canReuseZonemapIndex(Type.BIGINT, Type.DOUBLE));
assertTrue(canReuseZonemapIndex(Type.LARGEINT, Type.LARGEINT));
assertTrue(canReuseZonemapIndex(Type.FLOAT, Type.FLOAT));
assertTrue(canReuseZonemapIndex(Type.FLOAT, Type.DOUBLE));
assertTrue(canReuseZonemapIndex(Type.DOUBLE, Type.DOUBLE));
assertTrue(canReuseZonemapIndex(Type.DECIMALV2, Type.DECIMALV2));
assertTrue(canReuseZonemapIndex(Type.DECIMALV2, Type.DECIMAL128));
assertTrue(canReuseZonemapIndex(Type.DECIMAL32, Type.DECIMAL32));
assertTrue(canReuseZonemapIndex(Type.DECIMAL32, Type.DECIMAL64));
assertTrue(canReuseZonemapIndex(Type.DECIMAL32, Type.DECIMAL128));
assertTrue(canReuseZonemapIndex(Type.DECIMAL32, Type.DECIMAL256));
assertTrue(canReuseZonemapIndex(Type.DECIMAL64, Type.DECIMAL64));
assertTrue(canReuseZonemapIndex(Type.DECIMAL64, Type.DECIMAL128));
assertTrue(canReuseZonemapIndex(Type.DECIMAL64, Type.DECIMAL256));
assertTrue(canReuseZonemapIndex(Type.DECIMAL128, Type.DECIMAL128));
assertTrue(canReuseZonemapIndex(Type.DECIMAL128, Type.DECIMAL256));
assertTrue(canReuseZonemapIndex(Type.DECIMAL256, Type.DECIMAL256));
assertTrue(canReuseZonemapIndex(Type.DATE, Type.DATE));
assertTrue(canReuseZonemapIndex(Type.DATE, Type.DATETIME));
assertTrue(canReuseZonemapIndex(Type.DATETIME, Type.DATETIME));
ScalarType char10 = ScalarType.createCharType(10);
ScalarType varchar20 = ScalarType.createVarcharType(20);
assertTrue(canReuseZonemapIndex(char10, char10));
assertTrue(canReuseZonemapIndex(char10, varchar20));
ScalarType varchar30 = ScalarType.createVarcharType(30);
assertTrue(canReuseZonemapIndex(varchar20, varchar30));
}
@Test
public void testZoneMapIndexNotReuse() {
// decreasing width
assertFalse(canReuseZonemapIndex(Type.INT, Type.SMALLINT));
assertFalse(canReuseZonemapIndex(Type.BIGINT, Type.INT));
// integer to float is not in allowed matrix (only float->double allowed)
assertFalse(canReuseZonemapIndex(Type.INT, Type.FLOAT));
// double to float narrowing not allowed
assertFalse(canReuseZonemapIndex(Type.DOUBLE, Type.FLOAT));
// decimal narrowing
assertFalse(canReuseZonemapIndex(Type.DECIMAL128, Type.DECIMAL64));
assertFalse(canReuseZonemapIndex(Type.DATETIME, Type.DATE));
// varchar to char not allowed by reuse matrix
ScalarType varchar20 = ScalarType.createVarcharType(20);
ScalarType char10 = ScalarType.createCharType(10);
assertFalse(canReuseZonemapIndex(varchar20, char10));
// string <-> int not allowed
assertFalse(canReuseZonemapIndex(varchar20, Type.INT));
assertFalse(canReuseZonemapIndex(Type.INT, varchar20));
}
}

View File

@ -416,4 +416,40 @@ public class TypeTest {
Assertions.assertTrue(Type.canCastTo(jsonType, mapType));
}
}
@Test
public void testSupportZonemap() {
// Positive cases: Scalar types that are numeric, date, or string
Assertions.assertTrue(Type.TINYINT.supportZoneMap());
Assertions.assertTrue(Type.SMALLINT.supportZoneMap());
Assertions.assertTrue(Type.INT.supportZoneMap());
Assertions.assertTrue(Type.BIGINT.supportZoneMap());
Assertions.assertTrue(Type.LARGEINT.supportZoneMap());
Assertions.assertTrue(Type.FLOAT.supportZoneMap());
Assertions.assertTrue(Type.DOUBLE.supportZoneMap());
Assertions.assertTrue(Type.DATE.supportZoneMap());
Assertions.assertTrue(Type.DATETIME.supportZoneMap());
Assertions.assertTrue(Type.VARCHAR.supportZoneMap());
Assertions.assertTrue(Type.CHAR.supportZoneMap());
Assertions.assertTrue(Type.DEFAULT_DECIMALV2.supportZoneMap());
Assertions.assertTrue(Type.DECIMAL32.supportZoneMap());
Assertions.assertTrue(Type.DECIMAL64.supportZoneMap());
Assertions.assertTrue(Type.DECIMAL128.supportZoneMap());
Assertions.assertTrue(Type.DECIMAL256.supportZoneMap());
Assertions.assertTrue(ScalarType.createVarcharType(10).supportZoneMap());
Assertions.assertTrue(ScalarType.createCharType(5).supportZoneMap());
// Negative cases: Non-scalar types or scalar types that are not numeric, date, or string
Assertions.assertFalse(Type.NULL.supportZoneMap());
Assertions.assertFalse(Type.BOOLEAN.supportZoneMap()); // Boolean is not numeric, date or string
Assertions.assertFalse(Type.HLL.supportZoneMap());
Assertions.assertFalse(Type.BITMAP.supportZoneMap());
Assertions.assertFalse(Type.PERCENTILE.supportZoneMap());
Assertions.assertFalse(Type.JSON.supportZoneMap());
Assertions.assertFalse(Type.FUNCTION.supportZoneMap());
Assertions.assertFalse(Type.VARBINARY.supportZoneMap());
Assertions.assertFalse(Type.ARRAY_INT.supportZoneMap());
Assertions.assertFalse(Type.MAP_VARCHAR_VARCHAR.supportZoneMap());
Assertions.assertFalse(new StructType(Lists.newArrayList(Type.INT)).supportZoneMap());
}
}