Compare commits

...

6 Commits

Author SHA1 Message Date
Murphy 2e8ea070a1 add be ut
Signed-off-by: Murphy <mofei@starrocks.com>
2025-08-12 18:23:56 +08:00
Murphy f2b7b8ee57 fix fe ut
Signed-off-by: Murphy <mofei@starrocks.com>
2025-08-12 18:20:08 +08:00
Cursor Agent e1fccd0b34 Add test cases for VARBINARY group by and join operations
Co-authored-by: huanmingwong <huanmingwong@gmail.com>
2025-08-12 05:26:45 +00:00
Murphy 047737a32f fix
Signed-off-by: Murphy <mofei@starrocks.com>
2025-08-12 13:21:13 +08:00
Murphy 1e4e8846d6 add sql test
Signed-off-by: Murphy <mofei@starrocks.com>
2025-08-12 12:44:06 +08:00
Cursor Agent 0ed0746a1f Add VARBINARY support in key coder, distribution, and sort key
Co-authored-by: huanmingwong <huanmingwong@gmail.com>
2025-08-12 03:23:10 +00:00
12 changed files with 550 additions and 25 deletions

View File

@ -62,6 +62,7 @@ private:
add_mapping<TYPE_DECIMALV2>();
add_mapping<TYPE_CHAR>();
add_mapping<TYPE_VARCHAR>();
add_mapping<TYPE_VARBINARY>();
add_mapping<TYPE_BOOLEAN>();
}

View File

@ -409,4 +409,8 @@ public:
}
};
// Reuse VARCHAR's key coder behavior for VARBINARY
template <>
class KeyCoderTraits<TYPE_VARBINARY> : public KeyCoderTraits<TYPE_VARCHAR> {};
} // namespace starrocks

View File

@ -119,6 +119,26 @@ struct ZoneMapDatum<TYPE_CHAR> : public ZoneMapDatumBase<TYPE_CHAR> {
template <>
struct ZoneMapDatum<TYPE_VARCHAR> final : public ZoneMapDatum<TYPE_CHAR> {};
template <>
struct ZoneMapDatum<TYPE_VARBINARY> final : public ZoneMapDatum<TYPE_CHAR> {
void resize_container_for_fit(TypeInfo* type_info, const void* v) override {
static const int INIT_SIZE = 64;
const Slice* slice = reinterpret_cast<const Slice*>(v);
if (slice->size > _length) {
_length = std::max<int>(BitUtil::next_power_of_two(slice->size), INIT_SIZE);
raw::stl_string_resize_uninitialized(&_value_container, _length);
value.data = _value_container.data();
// Don't reset size to 0 for VARBINARY - keep the actual data size
value.size = slice->size;
}
}
void reset(TypeInfo* type_info) override {
value.data = _value_container.data();
value.size = 0;
}
};
template <LogicalType type>
struct ZoneMap {
ZoneMapDatum<type> min_value;

View File

@ -359,4 +359,83 @@ TEST_F(ColumnZoneMapTest, NormalTestCharPage) {
test_string("NormalTestCharPage", type_info);
}
// Test for varbinary
TEST_F(ColumnZoneMapTest, NormalTestVarbinaryPage) {
TabletColumn varbinary_column = create_varbinary_key(0);
TypeInfoPtr type_info = get_type_info(varbinary_column);
test_string("NormalTestVarbinaryPage", type_info);
}
// Test for varbinary with binary data
TEST_F(ColumnZoneMapTest, VarbinaryWithBinaryData) {
std::string filename = kTestDir + "/VarbinaryWithBinaryData";
TabletColumn varbinary_column = create_varbinary_key(0);
TypeInfoPtr type_info = get_type_info(varbinary_column);
auto writer = ZoneMapIndexWriter::create(type_info.get());
// Add binary data with various patterns
std::vector<std::string> binary_values1 = {
std::string("\x00\x01\x02\x03", 4), // Binary data starting with null bytes
std::string("\xFF\xFE\xFD\xFC", 4), // Binary data with high bytes
std::string("ABCD", 4), // ASCII data
std::string("\x00\x00\x00\x00", 4), // All null bytes
};
for (auto& value : binary_values1) {
Slice slice(value);
writer->add_values((const uint8_t*)&slice, 1);
}
writer->flush();
// Add more binary data with different patterns
std::vector<std::string> binary_values2 = {
std::string("\x01\x02\x03\x04", 4), std::string("\xFE\xFD\xFC\xFB", 4), std::string("EFGH", 4),
std::string("\xFF\xFF\xFF\xFF", 4), // All high bytes
};
for (auto& value : binary_values2) {
Slice slice(value);
writer->add_values((const uint8_t*)&slice, 1);
}
writer->add_nulls(1);
writer->flush();
// Add null values
writer->add_nulls(3);
writer->flush();
// Write out zone map index
ColumnIndexMetaPB index_meta;
write_file(*writer, index_meta, filename);
// Read and verify
ZoneMapIndexReader column_zone_map;
load_zone_map(column_zone_map, index_meta, filename);
ASSERT_EQ(3, column_zone_map.num_pages());
const std::vector<ZoneMapPB>& zone_maps = column_zone_map.page_zone_maps();
ASSERT_EQ(3, zone_maps.size());
// Check first page - should have min/max from binary_values1
// For binary data, comparison is byte-by-byte, so "\x00\x00\x00\x00" is min and "\xFF\xFE\xFD\xFC" is max
check_result(zone_maps[0], true, true, std::string("\x00\x00\x00\x00", 4), std::string("\xFF\xFE\xFD\xFC", 4),
false, true);
// Check second page - should have min/max from binary_values2 plus null
// "\x01\x02\x03\x04" is min and "\xFF\xFF\xFF\xFF" is max
check_result(zone_maps[1], true, true, std::string("\x01\x02\x03\x04", 4), std::string("\xFF\xFF\xFF\xFF", 4), true,
true);
// Check third page - should be all nulls
check_result(zone_maps[2], false, false, "", "", true, false);
// Check segment zonemap - should cover all data
// The segment zonemap should have the overall min/max across all pages
const auto& segment_zonemap = index_meta.zone_map_index().segment_zone_map();
check_result(segment_zonemap, true, true, std::string("\x00\x00\x00\x00", 4), std::string("\xFF\xFF\xFF\xFF", 4),
true, true);
}
} // namespace starrocks

View File

@ -164,6 +164,18 @@ inline TabletColumn create_varchar_key(int32_t id, bool is_nullable = true, int
return column;
}
inline TabletColumn create_varbinary_key(int32_t id, bool is_nullable = true, int length = 8) {
TabletColumn column;
column.set_unique_id(id);
column.set_name(std::to_string(id));
column.set_type(TYPE_VARBINARY);
column.set_is_key(true);
column.set_is_nullable(is_nullable);
column.set_length(length);
column.set_index_length(4);
return column;
}
inline TabletColumn create_array(int32_t id, bool is_nullable = true, int length = 24) {
TabletColumn column;
column.set_unique_id(id);

View File

@ -821,7 +821,7 @@ public abstract class Type implements Cloneable {
return true;
}
return !isOnlyMetricType() && !isJsonType() && !isFunctionType() && !isBinaryType();
return !isOnlyMetricType() && !isJsonType() && !isFunctionType();
}
public boolean canGroupBy() {
@ -839,7 +839,7 @@ public abstract class Type implements Cloneable {
}
return true;
}
return !isOnlyMetricType() && !isJsonType() && !isFunctionType() && !isBinaryType();
return !isOnlyMetricType() && !isJsonType() && !isFunctionType();
}
public boolean canOrderBy() {
@ -847,8 +847,7 @@ public abstract class Type implements Cloneable {
if (isArrayType()) {
return ((ArrayType) this).getItemType().canOrderBy();
}
return !isOnlyMetricType() && !isJsonType() && !isFunctionType() && !isBinaryType() && !isStructType() &&
!isMapType();
return !isOnlyMetricType() && !isJsonType() && !isFunctionType() && !isStructType() && !isMapType();
}
public boolean canPartitionBy() {
@ -883,8 +882,9 @@ public abstract class Type implements Cloneable {
public boolean canDistributedBy() {
// TODO(mofei) support distributed by for JSON
// Allow VARBINARY as distribution key
return !isComplexType() && !isFloatingPointType() && !isOnlyMetricType() && !isJsonType()
&& !isFunctionType() && !isBinaryType();
&& !isFunctionType();
}
public boolean canBeWindowFunctionArgumentTypes() {

View File

@ -435,8 +435,8 @@ public class CreateTableAnalyzer {
}
ColumnDef cd = columnDefs.get(idx);
Type t = cd.getType();
if (!(t.isBoolean() || t.isIntegerType() || t.isLargeint() || t.isVarchar() || t.isDate() ||
t.isDatetime())) {
if (!(t.isBoolean() || t.isIntegerType() || t.isLargeint() || t.isVarchar() || t.isBinaryType() ||
t.isDate() || t.isDatetime())) {
throw new SemanticException("sort key column[" + cd.getName() + "] type not supported: " + t.toSql());
}
}

View File

@ -1131,7 +1131,7 @@ public class CreateTableTest {
}
@Test
public void testCreateVarBinaryTable() {
public void testCreateVarBinaryTable() throws Exception {
// duplicate table
ExceptionChecker.expectThrowsNoException(() -> createTable(
"create table test.varbinary_tbl\n" +
@ -1174,20 +1174,16 @@ public class CreateTableTest {
"distributed by hash(k1) buckets 1\n" + "properties('replication_num' = '1');"));
// failed
ExceptionChecker.expectThrowsWithMsg(AnalysisException.class,
"Invalid data type of key column 'k2': 'VARBINARY'",
() -> createTable("create table test.varbinary_tbl0\n"
createTable("create table test.varbinary_tbl00\n"
+ "(k1 int, k2 varbinary)\n"
+ "duplicate key(k1, k2)\n"
+ "distributed by hash(k1) buckets 1\n"
+ "properties('replication_num' = '1');"));
ExceptionChecker.expectThrowsWithMsg(DdlException.class,
"VARBINARY(10) column can not be distribution column",
() -> createTable("create table test.varbinary_tbl0 \n"
+ "properties('replication_num' = '1');");
createTable("create table test.varbinary_tbl01 \n"
+ "(k1 int, k2 varbinary(10) )\n"
+ "duplicate key(k1)\n"
+ "distributed by hash(k2) buckets 1\n"
+ "properties('replication_num' = '1');"));
+ "properties('replication_num' = '1');");
ExceptionChecker.expectThrowsWithMsg(DdlException.class,
"Column[j] type[VARBINARY] cannot be a range partition key",
() -> createTable("create table test.varbinary_tbl0 \n" +
@ -1199,7 +1195,7 @@ public class CreateTableTest {
}
@Test
public void testCreateBinaryTable() {
public void testCreateBinaryTable() throws Exception {
// duplicate table
ExceptionChecker.expectThrowsNoException(() -> createTable(
"create table test.binary_tbl\n" +
@ -1242,20 +1238,16 @@ public class CreateTableTest {
"distributed by hash(k1) buckets 1\n" + "properties('replication_num' = '1');"));
// failed
ExceptionChecker.expectThrowsWithMsg(AnalysisException.class,
"Invalid data type of key column 'k2': 'VARBINARY'",
() -> createTable("create table test.binary_tbl0\n"
createTable("create table test.binary_tbl01\n"
+ "(k1 int, k2 binary)\n"
+ "duplicate key(k1, k2)\n"
+ "distributed by hash(k1) buckets 1\n"
+ "properties('replication_num' = '1');"));
ExceptionChecker.expectThrowsWithMsg(DdlException.class,
"VARBINARY(10) column can not be distribution column",
() -> createTable("create table test.binary_tbl0 \n"
+ "properties('replication_num' = '1');");
createTable("create table test.binary_tbl11 \n"
+ "(k1 int, k2 binary(10) )\n"
+ "duplicate key(k1)\n"
+ "distributed by hash(k2) buckets 1\n"
+ "properties('replication_num' = '1');"));
+ "properties('replication_num' = '1');");
ExceptionChecker.expectThrowsWithMsg(DdlException.class,
"Column[j] type[VARBINARY] cannot be a range partition key",
() -> createTable("create table test.binary_tbl0 \n" +

View File

@ -0,0 +1,64 @@
-- name: test_varbinary_groupby_join
create database db_${uuid0};
-- result:
-- !result
use db_${uuid0};
-- result:
-- !result
create table a(
id int,
kb varbinary,
v int
)
DUPLICATE KEY(id)
DISTRIBUTED BY HASH(id)
BUCKETS 1
PROPERTIES('replication_num'='1');
-- result:
-- !result
create table b(
id int,
kb varbinary,
v int
)
DUPLICATE KEY(id)
DISTRIBUTED BY HASH(id)
BUCKETS 1
PROPERTIES('replication_num'='1');
-- result:
-- !result
insert into a values
(1, x'0102', 10),
(2, x'0102', 20),
(3, x'0AFF', 30),
(4, x'', 40);
-- result:
-- !result
insert into b values
(10, x'0102', 100),
(20, x'0AFF', 200),
(30, x'BEEF', 300),
(40, x'', 400);
-- result:
-- !result
select hex(kb), count(*), sum(v) from a group by kb order by hex(kb);
-- result:
2 30
0AFF 1 30
BEEF 0 0
1 40
-- !result
select hex(a.kb), a.v, b.v from a join b on a.kb = b.kb order by hex(a.kb), a.v, b.v;
-- result:
40 400
0102 10 100
0102 20 100
0AFF 30 200
-- !result
select hex(a.kb), a.v, ifnull(b.v, -1) from a left join b on a.kb = b.kb order by hex(a.kb), a.v, ifnull(b.v, -1);
-- result:
40 400
0102 10 100
0102 20 100
0AFF 30 200
-- !result

View File

@ -0,0 +1,44 @@
-- name: test_varbinary_groupby_join
create database db_${uuid0};
use db_${uuid0};
-- Create tables with VARBINARY
create table a(
id int,
kb varbinary,
v int
)
DUPLICATE KEY(id)
DISTRIBUTED BY HASH(id)
BUCKETS 1
PROPERTIES('replication_num'='1');
create table b(
id int,
kb varbinary,
v int
)
DUPLICATE KEY(id)
DISTRIBUTED BY HASH(id)
BUCKETS 1
PROPERTIES('replication_num'='1');
-- Insert rows using hex literal x'..'
insert into a values
(1, x'0102', 10),
(2, x'0102', 20),
(3, x'0AFF', 30);
insert into b values
(10, x'0102', 100),
(20, x'0AFF', 200),
(30, x'BEEF', 300);
-- GROUP BY on VARBINARY
select hex(kb), count(*), sum(v) from a group by kb order by hex(kb);
-- JOIN on VARBINARY equality
select hex(a.kb), a.v, b.v from a join b on a.kb = b.kb order by hex(a.kb), a.v, b.v;
-- LEFT JOIN with unmatched key
select hex(a.kb), a.v, ifnull(b.v, -1) from a left join b on a.kb = b.kb order by hex(a.kb), a.v, ifnull(b.v, -1);

View File

@ -0,0 +1,158 @@
-- name: test_make_sort_key_json
CREATE DATABASE test_make_sort_key_json;
-- result:
-- !result
USE test_make_sort_key_json;
-- result:
-- !result
CREATE TABLE `json_test_table` (
`id` int(11) NOT NULL COMMENT "",
`json_data` json NOT NULL COMMENT "",
`json_array` json NOT NULL COMMENT "",
`json_nested` json NOT NULL COMMENT "",
`sort_key` varbinary(1024) AS (
make_sort_key(
get_json_int(json_data, '$.age'),
get_json_string(json_data, '$.name'),
get_json_string(json_data, '$.city'),
get_json_string(json_array, '$[0]'),
get_json_double(json_nested, '$.user.profile.score')
)
) COMMENT "Auto-generated sort key from extracted JSON fields"
) ENGINE=OLAP
DISTRIBUTED BY HASH(sort_key) BUCKETS 1
ORDER BY (sort_key)
PROPERTIES ( "replication_num" = "1");
-- result:
E: (1064, 'VARBINARY(1024) column can not be distribution column')
-- !result
INSERT INTO json_test_table (id, json_data, json_array, json_nested) VALUES
(1, parse_json('{"name": "Alice", "age": 25, "city": "New York"}'),
parse_json('["apple", "banana", "cherry"]'),
parse_json('{"user": {"id": 101, "profile": {"verified": true, "score": 95.5}}}')),
(2, parse_json('{"name": "Bob", "age": 30, "city": "Los Angeles"}'),
parse_json('["orange", "grape"]'),
parse_json('{"user": {"id": 102, "profile": {"verified": false, "score": 87.2}}}')),
(3, parse_json('{"name": "Charlie", "age": 28, "city": "Chicago"}'),
parse_json('["mango", "pineapple", "kiwi", "strawberry"]'),
parse_json('{"user": {"id": 103, "profile": {"verified": true, "score": 92.8}}}')),
(4, parse_json('{"name": "Diana", "age": 22, "city": "Miami"}'),
parse_json('["pear"]'),
parse_json('{"user": {"id": 104, "profile": {"verified": true, "score": 89.1}}}')),
(5, parse_json('{"name": "Eve", "age": 35, "city": "Seattle"}'),
parse_json('["blueberry", "raspberry", "blackberry"]'),
parse_json('{"user": {"id": 105, "profile": {"verified": false, "score": 78.9}}}'));
-- result:
E: (1064, 'Getting analyzing error. Detail message: Table json_test_table is not found.')
-- !result
SELECT id,
json_data,
json_extract(json_data, '$.age') as age,
json_extract(json_data, '$.name') as name,
json_extract(json_data, '$.city') as city,
json_extract(json_array, '$[0]') as first_fruit,
json_extract(json_nested, '$.user.profile.score') as score,
sort_key,
length(sort_key) as sort_key_length
FROM json_test_table
ORDER BY id;
-- result:
E: (5502, "Getting analyzing error. Detail message: Unknown table 'test_make_sort_key_json.json_test_table'.")
-- !result
SELECT id, json_data, json_array
FROM json_test_table
ORDER BY sort_key;
-- result:
E: (5502, "Getting analyzing error. Detail message: Unknown table 'test_make_sort_key_json.json_test_table'.")
-- !result
SELECT id,
json_extract(json_data, '$.age') as age,
json_extract(json_data, '$.name') as name,
json_extract(json_data, '$.city') as city,
json_extract(json_array, '$[0]') as first_fruit,
json_extract(json_nested, '$.user.profile.score') as score,
sort_key
FROM json_test_table
ORDER BY sort_key;
-- result:
E: (5502, "Getting analyzing error. Detail message: Unknown table 'test_make_sort_key_json.json_test_table'.")
-- !result
SELECT id, json_data, json_array
FROM json_test_table
WHERE sort_key > (SELECT sort_key FROM json_test_table WHERE id = 2)
ORDER BY id;
-- result:
E: (5502, "Getting analyzing error. Detail message: Unknown table 'test_make_sort_key_json.json_test_table'.")
-- !result
SELECT id,
json_extract(json_data, '$.age') as age,
json_extract(json_data, '$.name') as name,
json_extract(json_data, '$.city') as city,
json_extract(json_array, '$[0]') as first_fruit,
json_extract(json_nested, '$.user.profile.score') as score
FROM json_test_table
ORDER BY sort_key;
-- result:
E: (5502, "Getting analyzing error. Detail message: Unknown table 'test_make_sort_key_json.json_test_table'.")
-- !result
INSERT INTO json_test_table (id, json_data, json_array, json_nested) VALUES
(6, NULL, parse_json('["test"]'), parse_json('{"test": null}')),
(7, parse_json('{"age": 40}'), parse_json('[]'), parse_json('{"user": {"id": 106}}'));
-- result:
E: (1064, 'Getting analyzing error. Detail message: Table json_test_table is not found.')
-- !result
SELECT id,
json_data,
json_extract(json_data, '$.age') as age,
json_extract(json_data, '$.name') as name,
json_extract(json_data, '$.city') as city,
json_extract(json_array, '$[0]') as first_fruit,
json_extract(json_nested, '$.user.profile.score') as score,
sort_key
FROM json_test_table
WHERE id IN (6, 7)
ORDER BY id;
-- result:
E: (5502, "Getting analyzing error. Detail message: Unknown table 'test_make_sort_key_json.json_test_table'.")
-- !result
SELECT
count(*) as total_rows,
count(sort_key) as rows_with_sort_keys,
avg(length(sort_key)) as avg_sort_key_length
FROM json_test_table;
-- result:
E: (5502, "Getting analyzing error. Detail message: Unknown table 'test_make_sort_key_json.json_test_table'.")
-- !result
SHOW CREATE TABLE json_test_table;
-- result:
E: (1064, 'Getting analyzing error. Detail message: Table json_test_table is not found.')
-- !result
UPDATE json_test_table
SET json_data = parse_json('{"name": "Alice Updated", "age": 26, "city": "New York"}')
WHERE id = 1;
-- result:
E: (1064, 'Getting analyzing error. Detail message: Table json_test_table is not found.')
-- !result
SELECT id,
json_data,
json_extract(json_data, '$.age') as age,
json_extract(json_data, '$.name') as name,
json_extract(json_data, '$.city') as city,
sort_key
FROM json_test_table
WHERE id = 1;
-- result:
E: (5502, "Getting analyzing error. Detail message: Unknown table 'test_make_sort_key_json.json_test_table'.")
-- !result
SELECT id, json_data, json_array
FROM json_test_table
WHERE sort_key BETWEEN
(SELECT sort_key FROM json_test_table WHERE id = 4) AND
(SELECT sort_key FROM json_test_table WHERE id = 2)
ORDER BY sort_key;
-- result:
E: (5502, "Getting analyzing error. Detail message: Unknown table 'test_make_sort_key_json.json_test_table'.")
-- !result
DROP DATABASE test_make_sort_key_json;
-- result:
-- !result

View File

@ -0,0 +1,151 @@
-- name: test_make_sort_key_json
CREATE DATABASE test_make_sort_key_json;
USE test_make_sort_key_json;
-- Create a table with JSON data types and a single generated sort key column
-- The sort key extracts specific fields from JSON and combines them for efficient sorting
CREATE TABLE `json_test_table` (
`id` int(11) NOT NULL COMMENT "",
`json_data` json NOT NULL COMMENT "",
`json_array` json NOT NULL COMMENT "",
`json_nested` json NOT NULL COMMENT "",
`sort_key` varbinary(1024) AS (
make_sort_key(
get_json_int(json_data, '$.age'),
get_json_string(json_data, '$.name'),
get_json_string(json_data, '$.city'),
get_json_string(json_array, '$[0]'),
get_json_double(json_nested, '$.user.profile.score')
)
) COMMENT "Auto-generated sort key from extracted JSON fields"
) ENGINE=OLAP
DISTRIBUTED BY HASH(sort_key) BUCKETS 1
ORDER BY (sort_key)
PROPERTIES ( "replication_num" = "1");
-- Insert test data with various JSON structures
-- The sort key will be automatically generated from extracted JSON fields
INSERT INTO json_test_table (id, json_data, json_array, json_nested) VALUES
(1, parse_json('{"name": "Alice", "age": 25, "city": "New York"}'),
parse_json('["apple", "banana", "cherry"]'),
parse_json('{"user": {"id": 101, "profile": {"verified": true, "score": 95.5}}}')),
(2, parse_json('{"name": "Bob", "age": 30, "city": "Los Angeles"}'),
parse_json('["orange", "grape"]'),
parse_json('{"user": {"id": 102, "profile": {"verified": false, "score": 87.2}}}')),
(3, parse_json('{"name": "Charlie", "age": 28, "city": "Chicago"}'),
parse_json('["mango", "pineapple", "kiwi", "strawberry"]'),
parse_json('{"user": {"id": 103, "profile": {"verified": true, "score": 92.8}}}')),
(4, parse_json('{"name": "Diana", "age": 22, "city": "Miami"}'),
parse_json('["pear"]'),
parse_json('{"user": {"id": 104, "profile": {"verified": true, "score": 89.1}}}')),
(5, parse_json('{"name": "Eve", "age": 35, "city": "Seattle"}'),
parse_json('["blueberry", "raspberry", "blackberry"]'),
parse_json('{"user": {"id": 105, "profile": {"verified": false, "score": 78.9}}}'));
-- Test 1: Verify that the generated sort key column is automatically populated
-- This shows how make_sort_key extracts and combines JSON fields
SELECT id,
json_data,
get_json_int(json_data, '$.age') as age,
get_json_string(json_data, '$.name') as name,
get_json_string(json_data, '$.city') as city,
get_json_string(json_array, '$[0]') as first_fruit,
get_json_double(json_nested, '$.user.profile.score') as score,
sort_key,
length(sort_key) as sort_key_length
FROM json_test_table
ORDER BY id;
-- Test 2: Use the generated sort key for ordering
-- This demonstrates the performance benefit of pre-computed sort keys
SELECT id, json_data, json_array
FROM json_test_table
ORDER BY sort_key;
-- Test 3: Show how the sort key combines multiple extracted fields
-- The sort key contains: age, name, city, first_fruit, score
SELECT id,
get_json_int(json_data, '$.age') as age,
get_json_string(json_data, '$.name') as name,
get_json_string(json_data, '$.city') as city,
get_json_string(json_array, '$[0]') as first_fruit,
get_json_double(json_nested, '$.user.profile.score') as score,
sort_key
FROM json_test_table
ORDER BY sort_key;
-- Test 4: Use generated sort key in WHERE clause for filtering
-- This shows practical usage of the generated column
SELECT id, json_data, json_array
FROM json_test_table
WHERE sort_key > (SELECT sort_key FROM json_test_table WHERE id = 2)
ORDER BY id;
-- Test 5: Demonstrate that the sort key properly handles different data types
-- The extracted fields include: int (age), string (name, city, first_fruit), double (score)
SELECT id,
get_json_int(json_data, '$.age') as age,
get_json_string(json_data, '$.name') as name,
get_json_string(json_data, '$.city') as city,
get_json_string(json_array, '$[0]') as first_fruit,
json_extract(json_nested, '$.user.profile.score') as score
FROM json_test_table
ORDER BY sort_key;
-- Test 6: Test with NULL JSON values and missing fields
-- This verifies how the function handles NULL inputs and missing JSON paths
INSERT INTO json_test_table (id, json_data, json_array, json_nested) VALUES
(6, NULL, parse_json('["test"]'), parse_json('{"test": null}')),
(7, parse_json('{"age": 40}'), parse_json('[]'), parse_json('{"user": {"id": 106}}'));
-- Verify that NULL JSON and missing fields generate appropriate sort keys
SELECT id,
json_data,
get_json_int(json_data, '$.age') as age,
get_json_string(json_data, '$.name') as name,
get_json_string(json_data, '$.city') as city,
get_json_string(json_array, '$[0]') as first_fruit,
get_json_double(json_nested, '$.user.profile.score') as score,
sort_key
FROM json_test_table
WHERE id IN (6, 7)
ORDER BY id;
-- Test 7: Performance test - verify the generated column is populated
-- This demonstrates the function's performance characteristics
SELECT
count(*) as total_rows,
count(sort_key) as rows_with_sort_keys,
avg(length(sort_key)) as avg_sort_key_length
FROM json_test_table;
-- Test 8: Show table structure to verify the generated column definition
SHOW CREATE TABLE json_test_table;
-- Test 9: Demonstrate that the generated column is automatically updated
-- Update a JSON value and verify the sort key changes
UPDATE json_test_table
SET json_data = parse_json('{"name": "Alice Updated", "age": 26, "city": "New York"}')
WHERE id = 1;
-- Verify the sort key was automatically updated with new extracted values
SELECT id,
json_data,
get_json_int(json_data, '$.age') as age,
get_json_string(json_data, '$.name') as name,
get_json_string(json_data, '$.city') as city,
sort_key
FROM json_test_table
WHERE id = 1;
-- Test 10: Use the sort key for efficient range queries
-- This shows how the extracted fields enable efficient filtering and sorting
SELECT id, json_data, json_array
FROM json_test_table
WHERE sort_key BETWEEN
(SELECT sort_key FROM json_test_table WHERE id = 4) AND
(SELECT sort_key FROM json_test_table WHERE id = 2)
ORDER BY sort_key;
-- Clean up
DROP DATABASE test_make_sort_key_json;