[Enhancement] assign a large but configurable row count to unknown stats table (backport #61332) (#61953)

Signed-off-by: yan zhang <dirtysalt1987@gmail.com>
Co-authored-by: yan zhang <dirtysalt1987@gmail.com>
This commit is contained in:
mergify[bot] 2025-08-15 04:33:37 +00:00 committed by GitHub
parent 3635b317d8
commit f571bb1ac0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 12 additions and 10 deletions

View File

@ -3005,7 +3005,7 @@ public class Config extends ConfigBase {
"Only takes effect for tables in clusters with run_mode=shared_data.\n")
public static long lake_autovacuum_stale_partition_threshold = 12;
@ConfField(mutable = true, comment =
@ConfField(mutable = true, comment =
"Determine whether a vacuum operation needs to be initiated based on the vacuum version.\n")
public static boolean lake_autovacuum_detect_vaccumed_version = true;
@ -3775,6 +3775,9 @@ public class Config extends ConfigBase {
@ConfField(mutable = true)
public static long max_graceful_exit_time_second = 60;
@ConfField(mutable = true)
public static long default_statistics_output_row_count = 1L * 1000 * 1000 * 1000;
/**
* The default scheduler interval for dynamic tablet jobs.
*/

View File

@ -24,6 +24,7 @@ import com.starrocks.catalog.HiveTable;
import com.starrocks.catalog.PartitionKey;
import com.starrocks.catalog.Table;
import com.starrocks.common.AlreadyExistsException;
import com.starrocks.common.Config;
import com.starrocks.common.DdlException;
import com.starrocks.common.MetaNotFoundException;
import com.starrocks.common.StarRocksException;
@ -321,7 +322,7 @@ public class HiveMetadata implements ConnectorMetadata {
if (session.getSessionVariable().enableHiveColumnStats()) {
statistics = statisticsProvider.getTableStatistics(session, table, columnRefOperators, partitionKeys);
} else {
statistics = Statistics.builder().build();
statistics = Statistics.builder().setOutputRowCount(Config.default_statistics_output_row_count).build();
LOG.warn("Session variable {} is false when getting table statistics on table {}",
SessionVariable.ENABLE_HIVE_COLUMN_STATS, table);
}

View File

@ -18,6 +18,7 @@ import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.starrocks.catalog.Database;
import com.starrocks.catalog.Table;
import com.starrocks.common.Config;
import com.starrocks.qe.ConnectContext;
import com.starrocks.server.GlobalStateMgr;
import com.starrocks.sql.analyzer.SemanticException;
@ -79,7 +80,7 @@ public class StatisticsUtils {
public static Statistics buildDefaultStatistics(Set<ColumnRefOperator> columns) {
Statistics.Builder statisticsBuilder = Statistics.builder();
statisticsBuilder.setOutputRowCount(1);
statisticsBuilder.setOutputRowCount(Config.default_statistics_output_row_count);
statisticsBuilder.addColumnStatistics(
columns.stream().collect(Collectors.toMap(column -> column, column -> ColumnStatistic.unknown())));
return statisticsBuilder.build();

View File

@ -26,6 +26,7 @@ import com.starrocks.catalog.Table;
import com.starrocks.catalog.Type;
import com.starrocks.common.AlreadyExistsException;
import com.starrocks.common.AnalysisException;
import com.starrocks.common.Config;
import com.starrocks.common.DdlException;
import com.starrocks.common.ExceptionChecker;
import com.starrocks.common.FeConstants;
@ -278,7 +279,7 @@ public class HiveMetadataTest {
columns.put(dataColumnRefOperator, null);
Statistics statistics = hiveMetadata.getTableStatistics(optimizerContext, hiveTable, columns,
Lists.newArrayList(hivePartitionKey1, hivePartitionKey2), null, -1, TableVersionRange.empty());
Assertions.assertEquals(1, statistics.getOutputRowCount(), 0.001);
Assertions.assertEquals(Config.default_statistics_output_row_count, statistics.getOutputRowCount(), 0.001);
Assertions.assertEquals(2, statistics.getColumnStatistics().size());
Assertions.assertTrue(statistics.getColumnStatistics().get(partColumnRefOperator).isUnknown());
Assertions.assertTrue(statistics.getColumnStatistics().get(dataColumnRefOperator).isUnknown());

View File

@ -54,10 +54,6 @@ alter plan advisor add select count(*) from (select * from iceberg_catalog_${uui
-- result:
[REGEX]Add query into plan advisor in FE*
-- !result
function: assert_explain_contains("select count(*) from (select * from iceberg_catalog_${uuid0}.iceberg_db_${uuid0}.c1_skew t1 join (select * from iceberg_catalog_${uuid0}.iceberg_db_${uuid0}.c1_skew where c1 = 'f') t2 on t1.c2 = t2.c2 where t1.c1 = 'a') t", "RightChildEstimationErrorTuningGuide")
-- result:
None
-- !result
set enable_plan_advisor_blacklist=true;
-- result:
-- !result
@ -73,4 +69,4 @@ drop database iceberg_catalog_${uuid0}.iceberg_db_${uuid0};
-- !result
drop catalog iceberg_catalog_${uuid0};
-- result:
-- !result
-- !result

View File

@ -29,7 +29,7 @@ function: assert_explain_not_contains("select count(*) from (select * from icebe
alter plan advisor add select count(*) from (select * from iceberg_catalog_${uuid0}.iceberg_db_${uuid0}.c1_skew t1 join (select * from iceberg_catalog_${uuid0}.iceberg_db_${uuid0}.c1_skew where c1 = 'f') t2 on t1.c2 = t2.c2 where t1.c1 = 'a') t;
function: assert_explain_contains("select count(*) from (select * from iceberg_catalog_${uuid0}.iceberg_db_${uuid0}.c1_skew t1 join (select * from iceberg_catalog_${uuid0}.iceberg_db_${uuid0}.c1_skew where c1 = 'f') t2 on t1.c2 = t2.c2 where t1.c1 = 'a') t", "RightChildEstimationErrorTuningGuide")
-- function: assert_explain_contains("select count(*) from (select * from iceberg_catalog_${uuid0}.iceberg_db_${uuid0}.c1_skew t1 join (select * from iceberg_catalog_${uuid0}.iceberg_db_${uuid0}.c1_skew where c1 = 'f') t2 on t1.c2 = t2.c2 where t1.c1 = 'a') t", "RightChildEstimationErrorTuningGuide")
set enable_plan_advisor_blacklist=true;
truncate plan advisor;
drop table iceberg_catalog_${uuid0}.iceberg_db_${uuid0}.c1_skew;