[Enhancement] Add statistic for xx_hash3_64 function (#63791)
Why I'm doing: The function XX_HASH3_64 does not have any statistics and will be set to UNKNOWN. Even though we can not set good statistics for this function, even having trivial statistics is better then not having anything as it could be helpful for follow up functions like MOD. For example for the expression ABS(MOD(XX_HASH3_64(a),100)) without statistics for XX_HASH3_64 the whole expression statistics will be unknown, but with trivial XX_HASH3_64 statistics starrocks can set the MIN to 0 and the MAX to 100 for the whole expression. This could be the difference between a correct and a wrong join order. (there are probably other cases like this one) What I'm doing: Adding statistic for the function XX_HASH3_64. The MIN and MAX are set to negative infinity and positive infinity. The DISTINCT COUNT is set to the number of rows. Signed-off-by: m-selmi <m.selmi@celonis.com>
This commit is contained in:
parent
af76406358
commit
f754c243b4
|
|
@ -393,6 +393,8 @@ public class FunctionSet {
|
|||
// Hash functions:
|
||||
public static final String MURMUR_HASH3_32 = "murmur_hash3_32";
|
||||
public static final String CRC32_HASH = "crc32_hash";
|
||||
public static final String XX_HASH3_64 = "xx_hash3_64";
|
||||
public static final String XX_HASH3_128 = "xx_hash3_128";
|
||||
|
||||
// Percentile functions:
|
||||
public static final String PERCENTILE_APPROX_RAW = "percentile_approx_raw";
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ package com.starrocks.sql.optimizer.statistics;
|
|||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.starrocks.catalog.FunctionSet;
|
||||
import com.starrocks.sql.ast.expression.LargeIntLiteral;
|
||||
import com.starrocks.sql.optimizer.ConstantOperatorUtils;
|
||||
import com.starrocks.sql.optimizer.Utils;
|
||||
import com.starrocks.sql.optimizer.operator.scalar.CallOperator;
|
||||
|
|
@ -470,6 +471,18 @@ public class ExpressionStatisticCalculator {
|
|||
maxValue = 4294967295.0;
|
||||
distinctValue = rowCount;
|
||||
break;
|
||||
case FunctionSet.XX_HASH3_64:
|
||||
// xx_hash3_64's range is int64_t
|
||||
minValue = Long.MIN_VALUE;
|
||||
maxValue = Long.MAX_VALUE;
|
||||
distinctValue = rowCount;
|
||||
break;
|
||||
case FunctionSet.XX_HASH3_128:
|
||||
// xx_hash3_128's range is LARGE_INT
|
||||
minValue = LargeIntLiteral.LARGE_INT_MIN.doubleValue();
|
||||
maxValue = LargeIntLiteral.LARGE_INT_MAX.doubleValue();
|
||||
distinctValue = rowCount;
|
||||
break;
|
||||
case FunctionSet.POSITIVE:
|
||||
case FunctionSet.FLOOR:
|
||||
case FunctionSet.DFLOOR:
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ import com.starrocks.catalog.FunctionSet;
|
|||
import com.starrocks.catalog.Type;
|
||||
import com.starrocks.common.util.DateUtils;
|
||||
import com.starrocks.sql.ast.expression.BinaryType;
|
||||
import com.starrocks.sql.ast.expression.LargeIntLiteral;
|
||||
import com.starrocks.sql.optimizer.Utils;
|
||||
import com.starrocks.sql.optimizer.operator.scalar.BinaryPredicateOperator;
|
||||
import com.starrocks.sql.optimizer.operator.scalar.CallOperator;
|
||||
|
|
@ -393,6 +394,16 @@ public class ExpressionStatisticsCalculatorTest {
|
|||
columnStatistic = ExpressionStatisticCalculator.calculate(callOperator, statistics);
|
||||
Assertions.assertEquals(columnStatistic.getMaxValue(), 100, 0.001);
|
||||
Assertions.assertEquals(columnStatistic.getMinValue(), 0, 0.001);
|
||||
// test xx_hash3_64 function
|
||||
callOperator = new CallOperator(FunctionSet.XX_HASH3_64, Type.BIGINT, Lists.newArrayList(columnRefOperator));
|
||||
columnStatistic = ExpressionStatisticCalculator.calculate(callOperator, statistics);
|
||||
Assertions.assertEquals(columnStatistic.getMaxValue(), Long.MAX_VALUE, 0.001);
|
||||
Assertions.assertEquals(columnStatistic.getMinValue(), Long.MIN_VALUE, 0.001);
|
||||
// test xx_hash3_128 function
|
||||
callOperator = new CallOperator(FunctionSet.XX_HASH3_128, Type.LARGEINT, Lists.newArrayList(columnRefOperator));
|
||||
columnStatistic = ExpressionStatisticCalculator.calculate(callOperator, statistics);
|
||||
Assertions.assertEquals(columnStatistic.getMaxValue(), LargeIntLiteral.LARGE_INT_MAX.doubleValue(), 0.001);
|
||||
Assertions.assertEquals(columnStatistic.getMinValue(), LargeIntLiteral.LARGE_INT_MIN.doubleValue(), 0.001);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
Loading…
Reference in New Issue