Signed-off-by: Murphy <mofei@starrocks.com> Co-authored-by: Murphy <96611012+murphyatwork@users.noreply.github.com>
This commit is contained in:
parent
9b21d191af
commit
5cfe433d5b
|
|
@ -23,7 +23,6 @@
|
|||
#include "column/column_visitor_adapter.h"
|
||||
#include "column/nullable_column.h"
|
||||
#include "column/vectorized_fwd.h"
|
||||
#include "common/object_pool.h"
|
||||
#include "exprs/agg/aggregate_state_allocator.h"
|
||||
#include "exprs/expr_context.h"
|
||||
#include "glog/logging.h"
|
||||
|
|
@ -91,6 +90,8 @@ public:
|
|||
return Status::NotSupported("Unsupported large binary column in column wise comparator");
|
||||
}
|
||||
|
||||
// For types with expensive comparison operations, always check the previous comparison result
|
||||
// in _cmp_vector before performing the current comparison.
|
||||
Status do_visit(const BinaryColumn& column) {
|
||||
size_t num_rows = column.size();
|
||||
if (!_first_column->empty()) {
|
||||
|
|
@ -101,15 +102,17 @@ public:
|
|||
if (!_null_masks.empty()) {
|
||||
DCHECK_EQ(_null_masks.size(), num_rows);
|
||||
for (size_t i = 1; i < num_rows; ++i) {
|
||||
if (_cmp_vector[i]) continue;
|
||||
if (_null_masks[i - 1] == 0 && _null_masks[i] == 0) {
|
||||
_cmp_vector[i] |= column.get_slice(i - 1).compare(column.get_slice(i)) != 0;
|
||||
_cmp_vector[i] |= column.get_slice(i - 1) != (column.get_slice(i));
|
||||
} else {
|
||||
_cmp_vector[i] |= _null_masks[i - 1] != _null_masks[i];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 1; i < num_rows; ++i) {
|
||||
_cmp_vector[i] |= column.get_slice(i - 1).compare(column.get_slice(i)) != 0;
|
||||
if (_cmp_vector[i]) continue;
|
||||
_cmp_vector[i] |= column.get_slice(i - 1) != (column.get_slice(i));
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
|
|
|
|||
|
|
@ -204,4 +204,447 @@ None 1
|
|||
3 3
|
||||
3 4
|
||||
4 4
|
||||
-- !result
|
||||
create table t3(
|
||||
c0 INT,
|
||||
c1 DECIMAL(10,2),
|
||||
c2 DECIMAL(15,3),
|
||||
c3 STRING
|
||||
) DUPLICATE KEY(c0, c1) DISTRIBUTED BY HASH(c0) BUCKETS 3 PROPERTIES('replication_num' = '1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t3 values(1, 100.50, 1000.123, 'type1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t3 values(1, 200.75, 2000.456, 'type2');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t3 values(1, 100.50, 1000.123, 'type1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t3 values(2, 300.25, 3000.789, 'type3');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t3 values(2, 400.00, 4000.000, 'type4');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t3 values(3, 500.99, 5000.999, 'type5');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t3 values(3, 500.99, 5000.999, 'type5');
|
||||
-- result:
|
||||
-- !result
|
||||
select c0, c1, sum(c2), count(*) from t3 group by c0, c1 order by c0, c1;
|
||||
-- result:
|
||||
1 100.50 2000.246 2
|
||||
1 200.75 2000.456 1
|
||||
2 300.25 3000.789 1
|
||||
2 400.00 4000.000 1
|
||||
3 500.99 10001.998 2
|
||||
-- !result
|
||||
select c0, sum(c1), avg(c2), max(c3), count(*) from t3 group by c0 order by c0;
|
||||
-- result:
|
||||
1 401.75 1333.567333333 type2 3
|
||||
2 700.25 3500.394500000 type4 2
|
||||
3 1001.98 5000.999000000 type5 2
|
||||
-- !result
|
||||
select c0, c1, c2, count(*) from t3 group by c0, c1, c2 order by c0, c1, c2;
|
||||
-- result:
|
||||
1 100.50 1000.123 2
|
||||
1 200.75 2000.456 1
|
||||
2 300.25 3000.789 1
|
||||
2 400.00 4000.000 1
|
||||
3 500.99 5000.999 2
|
||||
-- !result
|
||||
create table t4(
|
||||
c0 INT,
|
||||
c1 DATETIME,
|
||||
c2 DATE,
|
||||
c3 STRING
|
||||
) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 2 PROPERTIES('replication_num' = '1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t4 values(1, '2023-01-01 10:00:00', '2023-01-01', 'batch1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t4 values(1, '2023-01-01 10:00:00', '2023-01-01', 'batch1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t4 values(1, '2023-01-02 11:00:00', '2023-01-02', 'batch2');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t4 values(2, '2023-01-03 12:00:00', '2023-01-03', 'batch3');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t4 values(2, '2023-01-03 12:00:00', '2023-01-03', 'batch3');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t4 values(3, '2023-01-04 13:00:00', '2023-01-04', 'batch4');
|
||||
-- result:
|
||||
-- !result
|
||||
select c0, c1, count(*) from t4 group by c0, c1 order by c0, c1;
|
||||
-- result:
|
||||
1 2023-01-01 10:00:00 2
|
||||
1 2023-01-02 11:00:00 1
|
||||
2 2023-01-03 12:00:00 2
|
||||
3 2023-01-04 13:00:00 1
|
||||
-- !result
|
||||
select c0, c2, count(*), max(c3) from t4 group by c0, c2 order by c0, c2;
|
||||
-- result:
|
||||
1 2023-01-01 2 batch1
|
||||
1 2023-01-02 1 batch2
|
||||
2 2023-01-03 2 batch3
|
||||
3 2023-01-04 1 batch4
|
||||
-- !result
|
||||
select c0, min(c1), max(c1), count(*) from t4 group by c0 order by c0;
|
||||
-- result:
|
||||
1 2023-01-01 10:00:00 2023-01-02 11:00:00 3
|
||||
2 2023-01-03 12:00:00 2023-01-03 12:00:00 2
|
||||
3 2023-01-04 13:00:00 2023-01-04 13:00:00 1
|
||||
-- !result
|
||||
create table t5(
|
||||
c0 INT,
|
||||
c1 BOOLEAN,
|
||||
c2 BOOLEAN,
|
||||
c3 STRING
|
||||
) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 2 PROPERTIES('replication_num' = '1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t5 values(1, true, false, 'status1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t5 values(1, true, true, 'status2');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t5 values(1, false, false, 'status3');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t5 values(2, true, false, 'status4');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t5 values(2, false, true, 'status5');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t5 values(3, true, true, 'status6');
|
||||
-- result:
|
||||
-- !result
|
||||
select c0, c1, count(*) from t5 group by c0, c1 order by c0, c1;
|
||||
-- result:
|
||||
1 0 1
|
||||
1 1 2
|
||||
2 0 1
|
||||
2 1 1
|
||||
3 1 1
|
||||
-- !result
|
||||
select c0, c1, c2, count(*) from t5 group by c0, c1, c2 order by c0, c1, c2;
|
||||
-- result:
|
||||
1 0 0 1
|
||||
1 1 0 1
|
||||
1 1 1 1
|
||||
2 0 1 1
|
||||
2 1 0 1
|
||||
3 1 1 1
|
||||
-- !result
|
||||
select c0, sum(case when c1 then 1 else 0 end) as true_count, count(*) from t5 group by c0 order by c0;
|
||||
-- result:
|
||||
1 2 3
|
||||
2 1 2
|
||||
3 1 1
|
||||
-- !result
|
||||
create table t6(
|
||||
c0 INT,
|
||||
c1 FLOAT,
|
||||
c2 DOUBLE,
|
||||
c3 STRING
|
||||
) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 3 PROPERTIES('replication_num' = '1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t6 values(1, 1.5, 1.123456789, 'float1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t6 values(1, 2.5, 2.234567890, 'float2');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t6 values(1, 1.5, 1.123456789, 'float1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t6 values(2, 3.5, 3.345678901, 'float3');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t6 values(2, 4.5, 4.456789012, 'float4');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t6 values(3, 5.5, 5.567890123, 'float5');
|
||||
-- result:
|
||||
-- !result
|
||||
select c0, c1, sum(c2), count(*) from t6 group by c0, c1 order by c0, c1;
|
||||
-- result:
|
||||
1 1.5 2.246913578 2
|
||||
1 2.5 2.23456789 1
|
||||
2 3.5 3.345678901 1
|
||||
2 4.5 4.456789012 1
|
||||
3 5.5 5.567890123 1
|
||||
-- !result
|
||||
select c0, avg(c1), avg(c2), count(*) from t6 group by c0 order by c0;
|
||||
-- result:
|
||||
1 1.8333333333333333 1.493827156 3
|
||||
2 4.0 3.9012339564999996 2
|
||||
3 5.5 5.567890123 1
|
||||
-- !result
|
||||
select c0, c1, c2, count(*) from t6 group by c0, c1, c2 order by c0, c1, c2;
|
||||
-- result:
|
||||
1 1.5 1.123456789 2
|
||||
1 2.5 2.23456789 1
|
||||
2 3.5 3.345678901 1
|
||||
2 4.5 4.456789012 1
|
||||
3 5.5 5.567890123 1
|
||||
-- !result
|
||||
create table t7(
|
||||
c0 INT,
|
||||
c1 LARGEINT,
|
||||
c2 STRING
|
||||
) DUPLICATE KEY(c0, c1) DISTRIBUTED BY HASH(c0) BUCKETS 2 PROPERTIES('replication_num' = '1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t7 values(1, 9223372036854775807, 'large1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t7 values(1, 9223372036854775806, 'large2');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t7 values(1, 9223372036854775807, 'large1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t7 values(2, 9223372036854775805, 'large3');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t7 values(2, 9223372036854775804, 'large4');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t7 values(3, 9223372036854775803, 'large5');
|
||||
-- result:
|
||||
-- !result
|
||||
select c0, c1, count(*) from t7 group by c0, c1 order by c0, c1;
|
||||
-- result:
|
||||
1 9223372036854775806 1
|
||||
1 9223372036854775807 2
|
||||
2 9223372036854775804 1
|
||||
2 9223372036854775805 1
|
||||
3 9223372036854775803 1
|
||||
-- !result
|
||||
select c0, sum(c1), count(*) from t7 group by c0 order by c0;
|
||||
-- result:
|
||||
1 27670116110564327420 3
|
||||
2 18446744073709551609 2
|
||||
3 9223372036854775803 1
|
||||
-- !result
|
||||
select c0, c1, max(c2), count(*) from t7 group by c0, c1 order by c0, c1;
|
||||
-- result:
|
||||
1 9223372036854775806 large2 1
|
||||
1 9223372036854775807 large1 2
|
||||
2 9223372036854775804 large4 1
|
||||
2 9223372036854775805 large3 1
|
||||
3 9223372036854775803 large5 1
|
||||
-- !result
|
||||
create table t8(
|
||||
c0 INT,
|
||||
c1 VARCHAR(50),
|
||||
c2 VARCHAR(100),
|
||||
c3 STRING
|
||||
) DUPLICATE KEY(c0, c1) DISTRIBUTED BY HASH(c0) BUCKETS 3 PROPERTIES('replication_num' = '1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t8 values(1, 'varchar1', 'longer_varchar1', 'string1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t8 values(1, 'varchar2', 'longer_varchar2', 'string2');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t8 values(1, 'varchar1', 'longer_varchar1', 'string1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t8 values(2, 'varchar3', 'longer_varchar3', 'string3');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t8 values(2, 'varchar4', 'longer_varchar4', 'string4');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t8 values(3, 'varchar5', 'longer_varchar5', 'string5');
|
||||
-- result:
|
||||
-- !result
|
||||
select c0, c1, count(*) from t8 group by c0, c1 order by c0, c1;
|
||||
-- result:
|
||||
1 varchar1 2
|
||||
1 varchar2 1
|
||||
2 varchar3 1
|
||||
2 varchar4 1
|
||||
3 varchar5 1
|
||||
-- !result
|
||||
select c0, c1, c2, count(*) from t8 group by c0, c1, c2 order by c0, c1, c2;
|
||||
-- result:
|
||||
1 varchar1 longer_varchar1 2
|
||||
1 varchar2 longer_varchar2 1
|
||||
2 varchar3 longer_varchar3 1
|
||||
2 varchar4 longer_varchar4 1
|
||||
3 varchar5 longer_varchar5 1
|
||||
-- !result
|
||||
select c0, max(c1), max(c2), count(*) from t8 group by c0 order by c0;
|
||||
-- result:
|
||||
1 varchar2 longer_varchar2 3
|
||||
2 varchar4 longer_varchar4 2
|
||||
3 varchar5 longer_varchar5 1
|
||||
-- !result
|
||||
create table t9(
|
||||
c1 STRING,
|
||||
c0 INT,
|
||||
c2 DECIMAL(10,2),
|
||||
c3 DATETIME,
|
||||
c4 BOOLEAN
|
||||
) DUPLICATE KEY(c1, c0) DISTRIBUTED BY HASH(c1) BUCKETS 3 PROPERTIES('replication_num' = '1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t9 values('group1', 1, 100.50, '2023-01-01 10:00:00', true);
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t9 values('group1', 1, 100.50, '2023-01-01 10:00:00', true);
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t9 values('group1', 1, 200.75, '2023-01-01 11:00:00', false);
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t9 values('group2', 1, 300.25, '2023-01-02 10:00:00', true);
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t9 values('group3', 2, 400.00, '2023-01-03 10:00:00', false);
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t9 values('group3', 2, 500.99, '2023-01-03 11:00:00', true);
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t9 values('group4', 3, 600.50, '2023-01-04 10:00:00', true);
|
||||
-- result:
|
||||
-- !result
|
||||
select c1, c0, c2, count(*) from t9 group by c1, c0, c2 order by c1, c0, c2;
|
||||
-- result:
|
||||
group1 1 100.50 2
|
||||
group1 1 200.75 1
|
||||
group2 1 300.25 1
|
||||
group3 2 400.00 1
|
||||
group3 2 500.99 1
|
||||
group4 3 600.50 1
|
||||
-- !result
|
||||
select c1, c0, sum(c2), count(*) from t9 group by c1, c0 order by c1, c0;
|
||||
-- result:
|
||||
group1 1 401.75 3
|
||||
group2 1 300.25 1
|
||||
group3 2 900.99 2
|
||||
group4 3 600.50 1
|
||||
-- !result
|
||||
select c1, c0, c2, c3, count(*) from t9 group by c1, c0, c2, c3 order by c1, c0, c2, c3;
|
||||
-- result:
|
||||
group1 1 100.50 2023-01-01 10:00:00 2
|
||||
group1 1 200.75 2023-01-01 11:00:00 1
|
||||
group2 1 300.25 2023-01-02 10:00:00 1
|
||||
group3 2 400.00 2023-01-03 10:00:00 1
|
||||
group3 2 500.99 2023-01-03 11:00:00 1
|
||||
group4 3 600.50 2023-01-04 10:00:00 1
|
||||
-- !result
|
||||
select c1, c0, c2, c4, count(*) from t9 group by c1, c0, c2, c4 order by c1, c0, c2, c4;
|
||||
-- result:
|
||||
group1 1 100.50 1 2
|
||||
group1 1 200.75 0 1
|
||||
group2 1 300.25 1 1
|
||||
group3 2 400.00 0 1
|
||||
group3 2 500.99 1 1
|
||||
group4 3 600.50 1 1
|
||||
-- !result
|
||||
create table t10(
|
||||
c1 STRING NULL,
|
||||
c0 INT NULL,
|
||||
c2 DECIMAL(10,2) NULL,
|
||||
c3 DATETIME NULL
|
||||
) DUPLICATE KEY(c1, c0) DISTRIBUTED BY HASH(c1) BUCKETS 2 PROPERTIES('replication_num' = '1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t10 values('valid1', 1, 100.50, '2023-01-01 10:00:00');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t10 values(null, 1, 200.75, '2023-01-01 11:00:00');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t10 values('valid1', 1, null, '2023-01-01 12:00:00');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t10 values('valid1', 1, 100.50, null);
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t10 values('valid2', null, 300.25, '2023-01-02 10:00:00');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t10 values('valid3', 2, null, null);
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t10 values(null, null, null, null);
|
||||
-- result:
|
||||
-- !result
|
||||
select c1, c0, count(*) from t10 group by c1, c0 order by c1, c0;
|
||||
-- result:
|
||||
None None 1
|
||||
None 1 1
|
||||
valid1 1 3
|
||||
valid2 None 1
|
||||
valid3 2 1
|
||||
-- !result
|
||||
select c1, c0, c2, count(*) from t10 group by c1, c0, c2 order by c1, c0, c2;
|
||||
-- result:
|
||||
None None None 1
|
||||
None 1 200.75 1
|
||||
valid1 1 None 1
|
||||
valid1 1 100.50 2
|
||||
valid2 None 300.25 1
|
||||
valid3 2 None 1
|
||||
-- !result
|
||||
select c1, c0, c2, c3, count(*) from t10 group by c1, c0, c2, c3 order by c1, c0, c2, c3;
|
||||
-- result:
|
||||
None None None None 1
|
||||
None 1 200.75 2023-01-01 11:00:00 1
|
||||
valid1 1 None 2023-01-01 12:00:00 1
|
||||
valid1 1 100.50 None 1
|
||||
valid1 1 100.50 2023-01-01 10:00:00 1
|
||||
valid2 None 300.25 2023-01-02 10:00:00 1
|
||||
valid3 2 None None 1
|
||||
-- !result
|
||||
create table t11(
|
||||
c0 INT,
|
||||
c1 STRING,
|
||||
c2 INT
|
||||
) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 2 PROPERTIES('replication_num' = '1');
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t11 values(1, 'single1', 100);
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t11 values(2, 'single2', 200);
|
||||
-- result:
|
||||
-- !result
|
||||
insert into t11 values(3, 'single3', 300);
|
||||
-- result:
|
||||
-- !result
|
||||
select c0, c1, c2, count(*) from t11 group by c0, c1, c2 order by c0, c1, c2;
|
||||
-- result:
|
||||
1 single1 100 1
|
||||
2 single2 200 1
|
||||
3 single3 300 1
|
||||
-- !result
|
||||
select c0, sum(c2), count(*) from t11 group by c0 order by c0;
|
||||
-- result:
|
||||
1 100 1
|
||||
2 200 1
|
||||
3 300 1
|
||||
-- !result
|
||||
select c0, c1, sum(c2), count(*) from t11 group by c0, c1 order by c0, c1;
|
||||
-- result:
|
||||
1 single1 100 1
|
||||
2 single2 200 1
|
||||
3 single3 300 1
|
||||
-- !result
|
||||
|
|
@ -53,3 +53,185 @@ select c0, sum(c1), max(c2), count(1) from t0 group by c0 order by c0;
|
|||
select c0, c1, sum(c1) from t0 group by c0, c1 order by c0, c1;
|
||||
select c0 from t1 group by c0 order by c0;
|
||||
select c0, c1 from t1 group by c0, c1 order by c0, c1;
|
||||
|
||||
-- Enhanced tests with more data types and multi-column scenarios
|
||||
|
||||
-- Test with DECIMAL data type
|
||||
create table t3(
|
||||
c0 INT,
|
||||
c1 DECIMAL(10,2),
|
||||
c2 DECIMAL(15,3),
|
||||
c3 STRING
|
||||
) DUPLICATE KEY(c0, c1) DISTRIBUTED BY HASH(c0) BUCKETS 3 PROPERTIES('replication_num' = '1');
|
||||
|
||||
insert into t3 values(1, 100.50, 1000.123, 'type1');
|
||||
insert into t3 values(1, 200.75, 2000.456, 'type2');
|
||||
insert into t3 values(1, 100.50, 1000.123, 'type1');
|
||||
insert into t3 values(2, 300.25, 3000.789, 'type3');
|
||||
insert into t3 values(2, 400.00, 4000.000, 'type4');
|
||||
insert into t3 values(3, 500.99, 5000.999, 'type5');
|
||||
insert into t3 values(3, 500.99, 5000.999, 'type5');
|
||||
|
||||
-- Test DECIMAL aggregation with multi-column grouping
|
||||
select c0, c1, sum(c2), count(*) from t3 group by c0, c1 order by c0, c1;
|
||||
select c0, sum(c1), avg(c2), max(c3), count(*) from t3 group by c0 order by c0;
|
||||
select c0, c1, c2, count(*) from t3 group by c0, c1, c2 order by c0, c1, c2;
|
||||
|
||||
-- Test with DATETIME data type
|
||||
create table t4(
|
||||
c0 INT,
|
||||
c1 DATETIME,
|
||||
c2 DATE,
|
||||
c3 STRING
|
||||
) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 2 PROPERTIES('replication_num' = '1');
|
||||
|
||||
insert into t4 values(1, '2023-01-01 10:00:00', '2023-01-01', 'batch1');
|
||||
insert into t4 values(1, '2023-01-01 10:00:00', '2023-01-01', 'batch1');
|
||||
insert into t4 values(1, '2023-01-02 11:00:00', '2023-01-02', 'batch2');
|
||||
insert into t4 values(2, '2023-01-03 12:00:00', '2023-01-03', 'batch3');
|
||||
insert into t4 values(2, '2023-01-03 12:00:00', '2023-01-03', 'batch3');
|
||||
insert into t4 values(3, '2023-01-04 13:00:00', '2023-01-04', 'batch4');
|
||||
|
||||
-- Test DATETIME aggregation
|
||||
select c0, c1, count(*) from t4 group by c0, c1 order by c0, c1;
|
||||
select c0, c2, count(*), max(c3) from t4 group by c0, c2 order by c0, c2;
|
||||
select c0, min(c1), max(c1), count(*) from t4 group by c0 order by c0;
|
||||
|
||||
-- Test with BOOLEAN data type
|
||||
create table t5(
|
||||
c0 INT,
|
||||
c1 BOOLEAN,
|
||||
c2 BOOLEAN,
|
||||
c3 STRING
|
||||
) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 2 PROPERTIES('replication_num' = '1');
|
||||
|
||||
insert into t5 values(1, true, false, 'status1');
|
||||
insert into t5 values(1, true, true, 'status2');
|
||||
insert into t5 values(1, false, false, 'status3');
|
||||
insert into t5 values(2, true, false, 'status4');
|
||||
insert into t5 values(2, false, true, 'status5');
|
||||
insert into t5 values(3, true, true, 'status6');
|
||||
|
||||
-- Test BOOLEAN aggregation
|
||||
select c0, c1, count(*) from t5 group by c0, c1 order by c0, c1;
|
||||
select c0, c1, c2, count(*) from t5 group by c0, c1, c2 order by c0, c1, c2;
|
||||
select c0, sum(case when c1 then 1 else 0 end) as true_count, count(*) from t5 group by c0 order by c0;
|
||||
|
||||
-- Test with FLOAT and DOUBLE data types
|
||||
create table t6(
|
||||
c0 INT,
|
||||
c1 FLOAT,
|
||||
c2 DOUBLE,
|
||||
c3 STRING
|
||||
) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 3 PROPERTIES('replication_num' = '1');
|
||||
|
||||
insert into t6 values(1, 1.5, 1.123456789, 'float1');
|
||||
insert into t6 values(1, 2.5, 2.234567890, 'float2');
|
||||
insert into t6 values(1, 1.5, 1.123456789, 'float1');
|
||||
insert into t6 values(2, 3.5, 3.345678901, 'float3');
|
||||
insert into t6 values(2, 4.5, 4.456789012, 'float4');
|
||||
insert into t6 values(3, 5.5, 5.567890123, 'float5');
|
||||
|
||||
-- Test FLOAT/DOUBLE aggregation
|
||||
select c0, c1, sum(c2), count(*) from t6 group by c0, c1 order by c0, c1;
|
||||
select c0, avg(c1), avg(c2), count(*) from t6 group by c0 order by c0;
|
||||
select c0, c1, c2, count(*) from t6 group by c0, c1, c2 order by c0, c1, c2;
|
||||
|
||||
-- Test with LARGEINT data type
|
||||
create table t7(
|
||||
c0 INT,
|
||||
c1 LARGEINT,
|
||||
c2 STRING
|
||||
) DUPLICATE KEY(c0, c1) DISTRIBUTED BY HASH(c0) BUCKETS 2 PROPERTIES('replication_num' = '1');
|
||||
|
||||
insert into t7 values(1, 9223372036854775807, 'large1');
|
||||
insert into t7 values(1, 9223372036854775806, 'large2');
|
||||
insert into t7 values(1, 9223372036854775807, 'large1');
|
||||
insert into t7 values(2, 9223372036854775805, 'large3');
|
||||
insert into t7 values(2, 9223372036854775804, 'large4');
|
||||
insert into t7 values(3, 9223372036854775803, 'large5');
|
||||
|
||||
-- Test LARGEINT aggregation
|
||||
select c0, c1, count(*) from t7 group by c0, c1 order by c0, c1;
|
||||
select c0, sum(c1), count(*) from t7 group by c0 order by c0;
|
||||
select c0, c1, max(c2), count(*) from t7 group by c0, c1 order by c0, c1;
|
||||
|
||||
-- Test with VARCHAR data type
|
||||
create table t8(
|
||||
c0 INT,
|
||||
c1 VARCHAR(50),
|
||||
c2 VARCHAR(100),
|
||||
c3 STRING
|
||||
) DUPLICATE KEY(c0, c1) DISTRIBUTED BY HASH(c0) BUCKETS 3 PROPERTIES('replication_num' = '1');
|
||||
|
||||
insert into t8 values(1, 'varchar1', 'longer_varchar1', 'string1');
|
||||
insert into t8 values(1, 'varchar2', 'longer_varchar2', 'string2');
|
||||
insert into t8 values(1, 'varchar1', 'longer_varchar1', 'string1');
|
||||
insert into t8 values(2, 'varchar3', 'longer_varchar3', 'string3');
|
||||
insert into t8 values(2, 'varchar4', 'longer_varchar4', 'string4');
|
||||
insert into t8 values(3, 'varchar5', 'longer_varchar5', 'string5');
|
||||
|
||||
-- Test VARCHAR aggregation
|
||||
select c0, c1, count(*) from t8 group by c0, c1 order by c0, c1;
|
||||
select c0, c1, c2, count(*) from t8 group by c0, c1, c2 order by c0, c1, c2;
|
||||
select c0, max(c1), max(c2), count(*) from t8 group by c0 order by c0;
|
||||
|
||||
-- Test complex multi-column grouping scenarios
|
||||
create table t9(
|
||||
c1 STRING,
|
||||
c0 INT,
|
||||
c2 DECIMAL(10,2),
|
||||
c3 DATETIME,
|
||||
c4 BOOLEAN
|
||||
) DUPLICATE KEY(c1, c0) DISTRIBUTED BY HASH(c1) BUCKETS 3 PROPERTIES('replication_num' = '1');
|
||||
|
||||
insert into t9 values('group1', 1, 100.50, '2023-01-01 10:00:00', true);
|
||||
insert into t9 values('group1', 1, 100.50, '2023-01-01 10:00:00', true);
|
||||
insert into t9 values('group1', 1, 200.75, '2023-01-01 11:00:00', false);
|
||||
insert into t9 values('group2', 1, 300.25, '2023-01-02 10:00:00', true);
|
||||
insert into t9 values('group3', 2, 400.00, '2023-01-03 10:00:00', false);
|
||||
insert into t9 values('group3', 2, 500.99, '2023-01-03 11:00:00', true);
|
||||
insert into t9 values('group4', 3, 600.50, '2023-01-04 10:00:00', true);
|
||||
|
||||
-- Test complex multi-column grouping
|
||||
select c1, c0, c2, count(*) from t9 group by c1, c0, c2 order by c1, c0, c2;
|
||||
select c1, c0, sum(c2), count(*) from t9 group by c1, c0 order by c1, c0;
|
||||
select c1, c0, c2, c3, count(*) from t9 group by c1, c0, c2, c3 order by c1, c0, c2, c3;
|
||||
select c1, c0, c2, c4, count(*) from t9 group by c1, c0, c2, c4 order by c1, c0, c2, c4;
|
||||
|
||||
-- Test with NULL values in multiple columns
|
||||
create table t10(
|
||||
c1 STRING NULL,
|
||||
c0 INT NULL,
|
||||
c2 DECIMAL(10,2) NULL,
|
||||
c3 DATETIME NULL
|
||||
) DUPLICATE KEY(c1, c0) DISTRIBUTED BY HASH(c1) BUCKETS 2 PROPERTIES('replication_num' = '1');
|
||||
|
||||
insert into t10 values('valid1', 1, 100.50, '2023-01-01 10:00:00');
|
||||
insert into t10 values(null, 1, 200.75, '2023-01-01 11:00:00');
|
||||
insert into t10 values('valid1', 1, null, '2023-01-01 12:00:00');
|
||||
insert into t10 values('valid1', 1, 100.50, null);
|
||||
insert into t10 values('valid2', null, 300.25, '2023-01-02 10:00:00');
|
||||
insert into t10 values('valid3', 2, null, null);
|
||||
insert into t10 values(null, null, null, null);
|
||||
|
||||
-- Test NULL handling in multi-column grouping
|
||||
select c1, c0, count(*) from t10 group by c1, c0 order by c1, c0;
|
||||
select c1, c0, c2, count(*) from t10 group by c1, c0, c2 order by c1, c0, c2;
|
||||
select c1, c0, c2, c3, count(*) from t10 group by c1, c0, c2, c3 order by c1, c0, c2, c3;
|
||||
|
||||
-- Test edge cases with empty groups and single values
|
||||
create table t11(
|
||||
c0 INT,
|
||||
c1 STRING,
|
||||
c2 INT
|
||||
) DUPLICATE KEY(c0) DISTRIBUTED BY HASH(c0) BUCKETS 2 PROPERTIES('replication_num' = '1');
|
||||
|
||||
insert into t11 values(1, 'single1', 100);
|
||||
insert into t11 values(2, 'single2', 200);
|
||||
insert into t11 values(3, 'single3', 300);
|
||||
|
||||
-- Test single value groups
|
||||
select c0, c1, c2, count(*) from t11 group by c0, c1, c2 order by c0, c1, c2;
|
||||
select c0, sum(c2), count(*) from t11 group by c0 order by c0;
|
||||
select c0, c1, sum(c2), count(*) from t11 group by c0, c1 order by c0, c1;
|
||||
|
|
|
|||
Loading…
Reference in New Issue