[Enhancement] replace memcompare with memequal for SortedAgg (backport #62585) (#62672)

Signed-off-by: Murphy <mofei@starrocks.com>
Co-authored-by: Murphy <96611012+murphyatwork@users.noreply.github.com>
This commit is contained in:
mergify[bot] 2025-09-03 10:34:43 +08:00 committed by GitHub
parent 9b21d191af
commit 5cfe433d5b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 631 additions and 3 deletions

View File

@ -23,7 +23,6 @@
#include "column/column_visitor_adapter.h"
#include "column/nullable_column.h"
#include "column/vectorized_fwd.h"
#include "common/object_pool.h"
#include "exprs/agg/aggregate_state_allocator.h"
#include "exprs/expr_context.h"
#include "glog/logging.h"
@ -91,6 +90,8 @@ public:
return Status::NotSupported("Unsupported large binary column in column wise comparator");
}
// For types with expensive comparison operations, always check the previous comparison result
// in _cmp_vector before performing the current comparison.
Status do_visit(const BinaryColumn& column) {
size_t num_rows = column.size();
if (!_first_column->empty()) {
@ -101,15 +102,17 @@ public:
if (!_null_masks.empty()) {
DCHECK_EQ(_null_masks.size(), num_rows);
for (size_t i = 1; i < num_rows; ++i) {
if (_cmp_vector[i]) continue;
if (_null_masks[i - 1] == 0 && _null_masks[i] == 0) {
_cmp_vector[i] |= column.get_slice(i - 1).compare(column.get_slice(i)) != 0;
_cmp_vector[i] |= column.get_slice(i - 1) != (column.get_slice(i));
} else {
_cmp_vector[i] |= _null_masks[i - 1] != _null_masks[i];
}
}
} else {
for (size_t i = 1; i < num_rows; ++i) {
_cmp_vector[i] |= column.get_slice(i - 1).compare(column.get_slice(i)) != 0;
if (_cmp_vector[i]) continue;
_cmp_vector[i] |= column.get_slice(i - 1) != (column.get_slice(i));
}
}
return Status::OK();

View File

@ -204,4 +204,447 @@ None 1
3 3
3 4
4 4
-- !result
create table t3(
c0 INT,
c1 DECIMAL(10,2),
c2 DECIMAL(15,3),
c3 STRING
) DUPLICATE KEY(c0, c1) DISTRIBUTED BY HASH(c0) BUCKETS 3 PROPERTIES('replication_num' = '1');
-- result:
-- !result
insert into t3 values(1, 100.50, 1000.123, 'type1');
-- result:
-- !result
insert into t3 values(1, 200.75, 2000.456, 'type2');
-- result:
-- !result
insert into t3 values(1, 100.50, 1000.123, 'type1');
-- result:
-- !result
insert into t3 values(2, 300.25, 3000.789, 'type3');
-- result:
-- !result
insert into t3 values(2, 400.00, 4000.000, 'type4');
-- result:
-- !result
insert into t3 values(3, 500.99, 5000.999, 'type5');
-- result:
-- !result
insert into t3 values(3, 500.99, 5000.999, 'type5');
-- result:
-- !result
select c0, c1, sum(c2), count(*) from t3 group by c0, c1 order by c0, c1;
-- result:
1 100.50 2000.246 2
1 200.75 2000.456 1
2 300.25 3000.789 1
2 400.00 4000.000 1
3 500.99 10001.998 2
-- !result
select c0, sum(c1), avg(c2), max(c3), count(*) from t3 group by c0 order by c0;
-- result:
1 401.75 1333.567333333 type2 3
2 700.25 3500.394500000 type4 2
3 1001.98 5000.999000000 type5 2
-- !result
select c0, c1, c2, count(*) from t3 group by c0, c1, c2 order by c0, c1, c2;
-- result:
1 100.50 1000.123 2
1 200.75 2000.456 1
2 300.25 3000.789 1
2 400.00 4000.000 1
3 500.99 5000.999 2
-- !result
create table t4(
c0 INT,
c1 DATETIME,
c2 DATE,
c3 STRING
) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 2 PROPERTIES('replication_num' = '1');
-- result:
-- !result
insert into t4 values(1, '2023-01-01 10:00:00', '2023-01-01', 'batch1');
-- result:
-- !result
insert into t4 values(1, '2023-01-01 10:00:00', '2023-01-01', 'batch1');
-- result:
-- !result
insert into t4 values(1, '2023-01-02 11:00:00', '2023-01-02', 'batch2');
-- result:
-- !result
insert into t4 values(2, '2023-01-03 12:00:00', '2023-01-03', 'batch3');
-- result:
-- !result
insert into t4 values(2, '2023-01-03 12:00:00', '2023-01-03', 'batch3');
-- result:
-- !result
insert into t4 values(3, '2023-01-04 13:00:00', '2023-01-04', 'batch4');
-- result:
-- !result
select c0, c1, count(*) from t4 group by c0, c1 order by c0, c1;
-- result:
1 2023-01-01 10:00:00 2
1 2023-01-02 11:00:00 1
2 2023-01-03 12:00:00 2
3 2023-01-04 13:00:00 1
-- !result
select c0, c2, count(*), max(c3) from t4 group by c0, c2 order by c0, c2;
-- result:
1 2023-01-01 2 batch1
1 2023-01-02 1 batch2
2 2023-01-03 2 batch3
3 2023-01-04 1 batch4
-- !result
select c0, min(c1), max(c1), count(*) from t4 group by c0 order by c0;
-- result:
1 2023-01-01 10:00:00 2023-01-02 11:00:00 3
2 2023-01-03 12:00:00 2023-01-03 12:00:00 2
3 2023-01-04 13:00:00 2023-01-04 13:00:00 1
-- !result
create table t5(
c0 INT,
c1 BOOLEAN,
c2 BOOLEAN,
c3 STRING
) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 2 PROPERTIES('replication_num' = '1');
-- result:
-- !result
insert into t5 values(1, true, false, 'status1');
-- result:
-- !result
insert into t5 values(1, true, true, 'status2');
-- result:
-- !result
insert into t5 values(1, false, false, 'status3');
-- result:
-- !result
insert into t5 values(2, true, false, 'status4');
-- result:
-- !result
insert into t5 values(2, false, true, 'status5');
-- result:
-- !result
insert into t5 values(3, true, true, 'status6');
-- result:
-- !result
select c0, c1, count(*) from t5 group by c0, c1 order by c0, c1;
-- result:
1 0 1
1 1 2
2 0 1
2 1 1
3 1 1
-- !result
select c0, c1, c2, count(*) from t5 group by c0, c1, c2 order by c0, c1, c2;
-- result:
1 0 0 1
1 1 0 1
1 1 1 1
2 0 1 1
2 1 0 1
3 1 1 1
-- !result
select c0, sum(case when c1 then 1 else 0 end) as true_count, count(*) from t5 group by c0 order by c0;
-- result:
1 2 3
2 1 2
3 1 1
-- !result
create table t6(
c0 INT,
c1 FLOAT,
c2 DOUBLE,
c3 STRING
) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 3 PROPERTIES('replication_num' = '1');
-- result:
-- !result
insert into t6 values(1, 1.5, 1.123456789, 'float1');
-- result:
-- !result
insert into t6 values(1, 2.5, 2.234567890, 'float2');
-- result:
-- !result
insert into t6 values(1, 1.5, 1.123456789, 'float1');
-- result:
-- !result
insert into t6 values(2, 3.5, 3.345678901, 'float3');
-- result:
-- !result
insert into t6 values(2, 4.5, 4.456789012, 'float4');
-- result:
-- !result
insert into t6 values(3, 5.5, 5.567890123, 'float5');
-- result:
-- !result
select c0, c1, sum(c2), count(*) from t6 group by c0, c1 order by c0, c1;
-- result:
1 1.5 2.246913578 2
1 2.5 2.23456789 1
2 3.5 3.345678901 1
2 4.5 4.456789012 1
3 5.5 5.567890123 1
-- !result
select c0, avg(c1), avg(c2), count(*) from t6 group by c0 order by c0;
-- result:
1 1.8333333333333333 1.493827156 3
2 4.0 3.9012339564999996 2
3 5.5 5.567890123 1
-- !result
select c0, c1, c2, count(*) from t6 group by c0, c1, c2 order by c0, c1, c2;
-- result:
1 1.5 1.123456789 2
1 2.5 2.23456789 1
2 3.5 3.345678901 1
2 4.5 4.456789012 1
3 5.5 5.567890123 1
-- !result
create table t7(
c0 INT,
c1 LARGEINT,
c2 STRING
) DUPLICATE KEY(c0, c1) DISTRIBUTED BY HASH(c0) BUCKETS 2 PROPERTIES('replication_num' = '1');
-- result:
-- !result
insert into t7 values(1, 9223372036854775807, 'large1');
-- result:
-- !result
insert into t7 values(1, 9223372036854775806, 'large2');
-- result:
-- !result
insert into t7 values(1, 9223372036854775807, 'large1');
-- result:
-- !result
insert into t7 values(2, 9223372036854775805, 'large3');
-- result:
-- !result
insert into t7 values(2, 9223372036854775804, 'large4');
-- result:
-- !result
insert into t7 values(3, 9223372036854775803, 'large5');
-- result:
-- !result
select c0, c1, count(*) from t7 group by c0, c1 order by c0, c1;
-- result:
1 9223372036854775806 1
1 9223372036854775807 2
2 9223372036854775804 1
2 9223372036854775805 1
3 9223372036854775803 1
-- !result
select c0, sum(c1), count(*) from t7 group by c0 order by c0;
-- result:
1 27670116110564327420 3
2 18446744073709551609 2
3 9223372036854775803 1
-- !result
select c0, c1, max(c2), count(*) from t7 group by c0, c1 order by c0, c1;
-- result:
1 9223372036854775806 large2 1
1 9223372036854775807 large1 2
2 9223372036854775804 large4 1
2 9223372036854775805 large3 1
3 9223372036854775803 large5 1
-- !result
create table t8(
c0 INT,
c1 VARCHAR(50),
c2 VARCHAR(100),
c3 STRING
) DUPLICATE KEY(c0, c1) DISTRIBUTED BY HASH(c0) BUCKETS 3 PROPERTIES('replication_num' = '1');
-- result:
-- !result
insert into t8 values(1, 'varchar1', 'longer_varchar1', 'string1');
-- result:
-- !result
insert into t8 values(1, 'varchar2', 'longer_varchar2', 'string2');
-- result:
-- !result
insert into t8 values(1, 'varchar1', 'longer_varchar1', 'string1');
-- result:
-- !result
insert into t8 values(2, 'varchar3', 'longer_varchar3', 'string3');
-- result:
-- !result
insert into t8 values(2, 'varchar4', 'longer_varchar4', 'string4');
-- result:
-- !result
insert into t8 values(3, 'varchar5', 'longer_varchar5', 'string5');
-- result:
-- !result
select c0, c1, count(*) from t8 group by c0, c1 order by c0, c1;
-- result:
1 varchar1 2
1 varchar2 1
2 varchar3 1
2 varchar4 1
3 varchar5 1
-- !result
select c0, c1, c2, count(*) from t8 group by c0, c1, c2 order by c0, c1, c2;
-- result:
1 varchar1 longer_varchar1 2
1 varchar2 longer_varchar2 1
2 varchar3 longer_varchar3 1
2 varchar4 longer_varchar4 1
3 varchar5 longer_varchar5 1
-- !result
select c0, max(c1), max(c2), count(*) from t8 group by c0 order by c0;
-- result:
1 varchar2 longer_varchar2 3
2 varchar4 longer_varchar4 2
3 varchar5 longer_varchar5 1
-- !result
create table t9(
c1 STRING,
c0 INT,
c2 DECIMAL(10,2),
c3 DATETIME,
c4 BOOLEAN
) DUPLICATE KEY(c1, c0) DISTRIBUTED BY HASH(c1) BUCKETS 3 PROPERTIES('replication_num' = '1');
-- result:
-- !result
insert into t9 values('group1', 1, 100.50, '2023-01-01 10:00:00', true);
-- result:
-- !result
insert into t9 values('group1', 1, 100.50, '2023-01-01 10:00:00', true);
-- result:
-- !result
insert into t9 values('group1', 1, 200.75, '2023-01-01 11:00:00', false);
-- result:
-- !result
insert into t9 values('group2', 1, 300.25, '2023-01-02 10:00:00', true);
-- result:
-- !result
insert into t9 values('group3', 2, 400.00, '2023-01-03 10:00:00', false);
-- result:
-- !result
insert into t9 values('group3', 2, 500.99, '2023-01-03 11:00:00', true);
-- result:
-- !result
insert into t9 values('group4', 3, 600.50, '2023-01-04 10:00:00', true);
-- result:
-- !result
select c1, c0, c2, count(*) from t9 group by c1, c0, c2 order by c1, c0, c2;
-- result:
group1 1 100.50 2
group1 1 200.75 1
group2 1 300.25 1
group3 2 400.00 1
group3 2 500.99 1
group4 3 600.50 1
-- !result
select c1, c0, sum(c2), count(*) from t9 group by c1, c0 order by c1, c0;
-- result:
group1 1 401.75 3
group2 1 300.25 1
group3 2 900.99 2
group4 3 600.50 1
-- !result
select c1, c0, c2, c3, count(*) from t9 group by c1, c0, c2, c3 order by c1, c0, c2, c3;
-- result:
group1 1 100.50 2023-01-01 10:00:00 2
group1 1 200.75 2023-01-01 11:00:00 1
group2 1 300.25 2023-01-02 10:00:00 1
group3 2 400.00 2023-01-03 10:00:00 1
group3 2 500.99 2023-01-03 11:00:00 1
group4 3 600.50 2023-01-04 10:00:00 1
-- !result
select c1, c0, c2, c4, count(*) from t9 group by c1, c0, c2, c4 order by c1, c0, c2, c4;
-- result:
group1 1 100.50 1 2
group1 1 200.75 0 1
group2 1 300.25 1 1
group3 2 400.00 0 1
group3 2 500.99 1 1
group4 3 600.50 1 1
-- !result
create table t10(
c1 STRING NULL,
c0 INT NULL,
c2 DECIMAL(10,2) NULL,
c3 DATETIME NULL
) DUPLICATE KEY(c1, c0) DISTRIBUTED BY HASH(c1) BUCKETS 2 PROPERTIES('replication_num' = '1');
-- result:
-- !result
insert into t10 values('valid1', 1, 100.50, '2023-01-01 10:00:00');
-- result:
-- !result
insert into t10 values(null, 1, 200.75, '2023-01-01 11:00:00');
-- result:
-- !result
insert into t10 values('valid1', 1, null, '2023-01-01 12:00:00');
-- result:
-- !result
insert into t10 values('valid1', 1, 100.50, null);
-- result:
-- !result
insert into t10 values('valid2', null, 300.25, '2023-01-02 10:00:00');
-- result:
-- !result
insert into t10 values('valid3', 2, null, null);
-- result:
-- !result
insert into t10 values(null, null, null, null);
-- result:
-- !result
select c1, c0, count(*) from t10 group by c1, c0 order by c1, c0;
-- result:
None None 1
None 1 1
valid1 1 3
valid2 None 1
valid3 2 1
-- !result
select c1, c0, c2, count(*) from t10 group by c1, c0, c2 order by c1, c0, c2;
-- result:
None None None 1
None 1 200.75 1
valid1 1 None 1
valid1 1 100.50 2
valid2 None 300.25 1
valid3 2 None 1
-- !result
select c1, c0, c2, c3, count(*) from t10 group by c1, c0, c2, c3 order by c1, c0, c2, c3;
-- result:
None None None None 1
None 1 200.75 2023-01-01 11:00:00 1
valid1 1 None 2023-01-01 12:00:00 1
valid1 1 100.50 None 1
valid1 1 100.50 2023-01-01 10:00:00 1
valid2 None 300.25 2023-01-02 10:00:00 1
valid3 2 None None 1
-- !result
create table t11(
c0 INT,
c1 STRING,
c2 INT
) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 2 PROPERTIES('replication_num' = '1');
-- result:
-- !result
insert into t11 values(1, 'single1', 100);
-- result:
-- !result
insert into t11 values(2, 'single2', 200);
-- result:
-- !result
insert into t11 values(3, 'single3', 300);
-- result:
-- !result
select c0, c1, c2, count(*) from t11 group by c0, c1, c2 order by c0, c1, c2;
-- result:
1 single1 100 1
2 single2 200 1
3 single3 300 1
-- !result
select c0, sum(c2), count(*) from t11 group by c0 order by c0;
-- result:
1 100 1
2 200 1
3 300 1
-- !result
select c0, c1, sum(c2), count(*) from t11 group by c0, c1 order by c0, c1;
-- result:
1 single1 100 1
2 single2 200 1
3 single3 300 1
-- !result

View File

@ -53,3 +53,185 @@ select c0, sum(c1), max(c2), count(1) from t0 group by c0 order by c0;
select c0, c1, sum(c1) from t0 group by c0, c1 order by c0, c1;
select c0 from t1 group by c0 order by c0;
select c0, c1 from t1 group by c0, c1 order by c0, c1;
-- Enhanced tests with more data types and multi-column scenarios
-- Test with DECIMAL data type
create table t3(
c0 INT,
c1 DECIMAL(10,2),
c2 DECIMAL(15,3),
c3 STRING
) DUPLICATE KEY(c0, c1) DISTRIBUTED BY HASH(c0) BUCKETS 3 PROPERTIES('replication_num' = '1');
insert into t3 values(1, 100.50, 1000.123, 'type1');
insert into t3 values(1, 200.75, 2000.456, 'type2');
insert into t3 values(1, 100.50, 1000.123, 'type1');
insert into t3 values(2, 300.25, 3000.789, 'type3');
insert into t3 values(2, 400.00, 4000.000, 'type4');
insert into t3 values(3, 500.99, 5000.999, 'type5');
insert into t3 values(3, 500.99, 5000.999, 'type5');
-- Test DECIMAL aggregation with multi-column grouping
select c0, c1, sum(c2), count(*) from t3 group by c0, c1 order by c0, c1;
select c0, sum(c1), avg(c2), max(c3), count(*) from t3 group by c0 order by c0;
select c0, c1, c2, count(*) from t3 group by c0, c1, c2 order by c0, c1, c2;
-- Test with DATETIME data type
create table t4(
c0 INT,
c1 DATETIME,
c2 DATE,
c3 STRING
) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 2 PROPERTIES('replication_num' = '1');
insert into t4 values(1, '2023-01-01 10:00:00', '2023-01-01', 'batch1');
insert into t4 values(1, '2023-01-01 10:00:00', '2023-01-01', 'batch1');
insert into t4 values(1, '2023-01-02 11:00:00', '2023-01-02', 'batch2');
insert into t4 values(2, '2023-01-03 12:00:00', '2023-01-03', 'batch3');
insert into t4 values(2, '2023-01-03 12:00:00', '2023-01-03', 'batch3');
insert into t4 values(3, '2023-01-04 13:00:00', '2023-01-04', 'batch4');
-- Test DATETIME aggregation
select c0, c1, count(*) from t4 group by c0, c1 order by c0, c1;
select c0, c2, count(*), max(c3) from t4 group by c0, c2 order by c0, c2;
select c0, min(c1), max(c1), count(*) from t4 group by c0 order by c0;
-- Test with BOOLEAN data type
create table t5(
c0 INT,
c1 BOOLEAN,
c2 BOOLEAN,
c3 STRING
) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 2 PROPERTIES('replication_num' = '1');
insert into t5 values(1, true, false, 'status1');
insert into t5 values(1, true, true, 'status2');
insert into t5 values(1, false, false, 'status3');
insert into t5 values(2, true, false, 'status4');
insert into t5 values(2, false, true, 'status5');
insert into t5 values(3, true, true, 'status6');
-- Test BOOLEAN aggregation
select c0, c1, count(*) from t5 group by c0, c1 order by c0, c1;
select c0, c1, c2, count(*) from t5 group by c0, c1, c2 order by c0, c1, c2;
select c0, sum(case when c1 then 1 else 0 end) as true_count, count(*) from t5 group by c0 order by c0;
-- Test with FLOAT and DOUBLE data types
create table t6(
c0 INT,
c1 FLOAT,
c2 DOUBLE,
c3 STRING
) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 3 PROPERTIES('replication_num' = '1');
insert into t6 values(1, 1.5, 1.123456789, 'float1');
insert into t6 values(1, 2.5, 2.234567890, 'float2');
insert into t6 values(1, 1.5, 1.123456789, 'float1');
insert into t6 values(2, 3.5, 3.345678901, 'float3');
insert into t6 values(2, 4.5, 4.456789012, 'float4');
insert into t6 values(3, 5.5, 5.567890123, 'float5');
-- Test FLOAT/DOUBLE aggregation
select c0, c1, sum(c2), count(*) from t6 group by c0, c1 order by c0, c1;
select c0, avg(c1), avg(c2), count(*) from t6 group by c0 order by c0;
select c0, c1, c2, count(*) from t6 group by c0, c1, c2 order by c0, c1, c2;
-- Test with LARGEINT data type
create table t7(
c0 INT,
c1 LARGEINT,
c2 STRING
) DUPLICATE KEY(c0, c1) DISTRIBUTED BY HASH(c0) BUCKETS 2 PROPERTIES('replication_num' = '1');
insert into t7 values(1, 9223372036854775807, 'large1');
insert into t7 values(1, 9223372036854775806, 'large2');
insert into t7 values(1, 9223372036854775807, 'large1');
insert into t7 values(2, 9223372036854775805, 'large3');
insert into t7 values(2, 9223372036854775804, 'large4');
insert into t7 values(3, 9223372036854775803, 'large5');
-- Test LARGEINT aggregation
select c0, c1, count(*) from t7 group by c0, c1 order by c0, c1;
select c0, sum(c1), count(*) from t7 group by c0 order by c0;
select c0, c1, max(c2), count(*) from t7 group by c0, c1 order by c0, c1;
-- Test with VARCHAR data type
create table t8(
c0 INT,
c1 VARCHAR(50),
c2 VARCHAR(100),
c3 STRING
) DUPLICATE KEY(c0, c1) DISTRIBUTED BY HASH(c0) BUCKETS 3 PROPERTIES('replication_num' = '1');
insert into t8 values(1, 'varchar1', 'longer_varchar1', 'string1');
insert into t8 values(1, 'varchar2', 'longer_varchar2', 'string2');
insert into t8 values(1, 'varchar1', 'longer_varchar1', 'string1');
insert into t8 values(2, 'varchar3', 'longer_varchar3', 'string3');
insert into t8 values(2, 'varchar4', 'longer_varchar4', 'string4');
insert into t8 values(3, 'varchar5', 'longer_varchar5', 'string5');
-- Test VARCHAR aggregation
select c0, c1, count(*) from t8 group by c0, c1 order by c0, c1;
select c0, c1, c2, count(*) from t8 group by c0, c1, c2 order by c0, c1, c2;
select c0, max(c1), max(c2), count(*) from t8 group by c0 order by c0;
-- Test complex multi-column grouping scenarios
create table t9(
c1 STRING,
c0 INT,
c2 DECIMAL(10,2),
c3 DATETIME,
c4 BOOLEAN
) DUPLICATE KEY(c1, c0) DISTRIBUTED BY HASH(c1) BUCKETS 3 PROPERTIES('replication_num' = '1');
insert into t9 values('group1', 1, 100.50, '2023-01-01 10:00:00', true);
insert into t9 values('group1', 1, 100.50, '2023-01-01 10:00:00', true);
insert into t9 values('group1', 1, 200.75, '2023-01-01 11:00:00', false);
insert into t9 values('group2', 1, 300.25, '2023-01-02 10:00:00', true);
insert into t9 values('group3', 2, 400.00, '2023-01-03 10:00:00', false);
insert into t9 values('group3', 2, 500.99, '2023-01-03 11:00:00', true);
insert into t9 values('group4', 3, 600.50, '2023-01-04 10:00:00', true);
-- Test complex multi-column grouping
select c1, c0, c2, count(*) from t9 group by c1, c0, c2 order by c1, c0, c2;
select c1, c0, sum(c2), count(*) from t9 group by c1, c0 order by c1, c0;
select c1, c0, c2, c3, count(*) from t9 group by c1, c0, c2, c3 order by c1, c0, c2, c3;
select c1, c0, c2, c4, count(*) from t9 group by c1, c0, c2, c4 order by c1, c0, c2, c4;
-- Test with NULL values in multiple columns
create table t10(
c1 STRING NULL,
c0 INT NULL,
c2 DECIMAL(10,2) NULL,
c3 DATETIME NULL
) DUPLICATE KEY(c1, c0) DISTRIBUTED BY HASH(c1) BUCKETS 2 PROPERTIES('replication_num' = '1');
insert into t10 values('valid1', 1, 100.50, '2023-01-01 10:00:00');
insert into t10 values(null, 1, 200.75, '2023-01-01 11:00:00');
insert into t10 values('valid1', 1, null, '2023-01-01 12:00:00');
insert into t10 values('valid1', 1, 100.50, null);
insert into t10 values('valid2', null, 300.25, '2023-01-02 10:00:00');
insert into t10 values('valid3', 2, null, null);
insert into t10 values(null, null, null, null);
-- Test NULL handling in multi-column grouping
select c1, c0, count(*) from t10 group by c1, c0 order by c1, c0;
select c1, c0, c2, count(*) from t10 group by c1, c0, c2 order by c1, c0, c2;
select c1, c0, c2, c3, count(*) from t10 group by c1, c0, c2, c3 order by c1, c0, c2, c3;
-- Test edge cases with empty groups and single values
create table t11(
c0 INT,
c1 STRING,
c2 INT
) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 2 PROPERTIES('replication_num' = '1');
insert into t11 values(1, 'single1', 100);
insert into t11 values(2, 'single2', 200);
insert into t11 values(3, 'single3', 300);
-- Test single value groups
select c0, c1, c2, count(*) from t11 group by c0, c1, c2 order by c0, c1, c2;
select c0, sum(c2), count(*) from t11 group by c0 order by c0;
select c0, c1, sum(c2), count(*) from t11 group by c0, c1 order by c0, c1;