Compare commits
7 Commits
main
...
cursor/imp
| Author | SHA1 | Date |
|---|---|---|
|
|
e50ed60519 | |
|
|
71b6a504a3 | |
|
|
023308ae1b | |
|
|
2a469b4372 | |
|
|
26698ed659 | |
|
|
3d26b9f15e | |
|
|
47fcab7ef5 |
|
|
@ -395,6 +395,7 @@ inline void encode_float64(double v, std::string* dest) {
|
|||
|
||||
struct EncoderVisitor : public ColumnVisitorAdapter<EncoderVisitor> {
|
||||
bool is_last_field = false;
|
||||
bool write_null_markers = true; // Control whether to write NULL markers
|
||||
std::vector<std::string>* buffs;
|
||||
const Buffer<uint8_t>* null_mask = nullptr; // Track null rows to skip processing
|
||||
|
||||
|
|
@ -404,11 +405,14 @@ struct EncoderVisitor : public ColumnVisitorAdapter<EncoderVisitor> {
|
|||
Status do_visit(const NullableColumn& column) {
|
||||
auto& nulls = column.immutable_null_column_data();
|
||||
|
||||
for (size_t i = 0; i < column.size(); i++) {
|
||||
if (nulls[i]) {
|
||||
(*buffs)[i].append("\0", 1);
|
||||
} else {
|
||||
(*buffs)[i].append("\1", 1);
|
||||
// Write NULL markers only if requested
|
||||
if (write_null_markers) {
|
||||
for (size_t i = 0; i < column.size(); i++) {
|
||||
if (nulls[i]) {
|
||||
(*buffs)[i].append("\0", 1);
|
||||
} else {
|
||||
(*buffs)[i].append("\1", 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -524,6 +528,7 @@ StatusOr<ColumnPtr> UtilityFunctions::encode_sort_key(FunctionContext* context,
|
|||
std::vector<std::string> buffs(num_rows);
|
||||
detail::EncoderVisitor visitor;
|
||||
visitor.buffs = &buffs;
|
||||
visitor.write_null_markers = true; // Enable NULL markers for sort key encoding
|
||||
for (int j = 0; j < num_args; ++j) {
|
||||
// Insert NOT_NULL markers for all rows.
|
||||
// This is necessary because the function may receive columns whose nullability
|
||||
|
|
@ -553,6 +558,127 @@ StatusOr<ColumnPtr> UtilityFunctions::encode_sort_key(FunctionContext* context,
|
|||
return result.build(ColumnHelper::is_all_const(columns));
|
||||
}
|
||||
|
||||
// Usage: zorder_encode(col1, col2, ...)
|
||||
// zorder_encode builds a composite key for each row by interleaving the bits of the input columns' values (Morton order).
|
||||
// This encoding preserves spatial locality for multi-dimensional data, which is useful for indexing and sorting.
|
||||
// The function works as follows:
|
||||
// - Each input column is encoded using the same logic as encode_sort_key to get order-preserving byte sequences.
|
||||
// - For each row, the bits from all encoded sequences are interleaved in z-order (Morton order).
|
||||
// - The resulting byte sequence forms the final Z-order encoded key for that row.
|
||||
// - The output is a VARBINARY column, where each entry is the Z-order encoded key for the corresponding row.
|
||||
StatusOr<ColumnPtr> UtilityFunctions::encode_zorder_key(FunctionContext* context, const Columns& columns) {
|
||||
int num_args = columns.size();
|
||||
RETURN_IF(num_args < 1, Status::InvalidArgument("encode_zorder_key requires at least 1 argument"));
|
||||
|
||||
size_t num_rows = columns[0]->size();
|
||||
for (int i = 1; i < num_args; ++i) {
|
||||
RETURN_IF(columns[i]->size() != num_rows,
|
||||
Status::InvalidArgument("all arguments must have the same number of rows"));
|
||||
}
|
||||
|
||||
// Use EncoderVisitor to get encoded sequences for each dimension (without NULL markers)
|
||||
std::vector<std::vector<std::string>> dim_encodings;
|
||||
dim_encodings.reserve(num_args);
|
||||
|
||||
// Collect NULL flags for each dimension
|
||||
std::vector<std::vector<uint8_t>> null_flags;
|
||||
null_flags.reserve(num_args);
|
||||
|
||||
for (int j = 0; j < num_args; ++j) {
|
||||
std::vector<std::string> buffs(num_rows);
|
||||
std::vector<uint8_t> nulls(num_rows, 0);
|
||||
|
||||
detail::EncoderVisitor visitor;
|
||||
visitor.buffs = &buffs;
|
||||
visitor.write_null_markers = false; // Disable NULL markers for z-order encoding
|
||||
visitor.is_last_field = true;
|
||||
RETURN_IF_ERROR(columns[j]->accept(&visitor));
|
||||
|
||||
// Collect NULL flags for this dimension
|
||||
if (columns[j]->is_nullable()) {
|
||||
auto nullable_col = down_cast<const NullableColumn*>(columns[j].get());
|
||||
auto& null_data = nullable_col->immutable_null_column_data();
|
||||
for (size_t i = 0; i < num_rows; ++i) {
|
||||
nulls[i] = null_data[i] ? 1 : 0;
|
||||
}
|
||||
} else {
|
||||
// Non-nullable column, all values are non-null
|
||||
std::fill(nulls.begin(), nulls.end(), 0);
|
||||
}
|
||||
|
||||
dim_encodings.emplace_back(std::move(buffs));
|
||||
null_flags.emplace_back(std::move(nulls));
|
||||
}
|
||||
|
||||
// Calculate total bits needed for z-order interleaving
|
||||
// For z-order encoding, we need to interleave bits from all dimensions
|
||||
// Each dimension contributes up to its maximum bit width
|
||||
// For mixed data types, we use the maximum bit width (64 bits) for all dimensions
|
||||
const size_t max_bit_width = 64; // Maximum bit width for any data type
|
||||
const size_t num_dims = dim_encodings.size();
|
||||
const size_t total_bits = max_bit_width * num_dims;
|
||||
const size_t interleaved_bytes = (total_bits + 7) / 8;
|
||||
const size_t null_markers_size = num_dims;
|
||||
const size_t total_bytes = null_markers_size + interleaved_bytes;
|
||||
|
||||
ColumnBuilder<TYPE_VARBINARY> builder(num_rows);
|
||||
|
||||
// Pre-compute bit position mappings for better cache locality
|
||||
std::vector<std::pair<size_t, int>> bit_positions;
|
||||
bit_positions.reserve(total_bits);
|
||||
for (size_t bit_idx = 0; bit_idx < max_bit_width; ++bit_idx) {
|
||||
for (size_t dim = 0; dim < num_dims; ++dim) {
|
||||
size_t ob = bit_idx * num_dims + dim;
|
||||
size_t byte = ob >> 3;
|
||||
int bit_in_byte = 7 - (ob & 7);
|
||||
bit_positions.emplace_back(byte, bit_in_byte);
|
||||
}
|
||||
}
|
||||
|
||||
std::string zorder_buffer;
|
||||
zorder_buffer.reserve(total_bytes);
|
||||
for (size_t i = 0; i < num_rows; ++i) {
|
||||
std::fill(zorder_buffer.begin(), zorder_buffer.end(), 0);
|
||||
|
||||
// Prepend NULL markers for each dimension (same as original zorder_encode)
|
||||
size_t marker_offset = 0;
|
||||
for (size_t dim = 0; dim < null_flags.size(); ++dim) {
|
||||
zorder_buffer[marker_offset++] = null_flags[dim][i] ? 0 : 1;
|
||||
}
|
||||
|
||||
// Optimized bit interleaving using pre-computed positions
|
||||
size_t bit_pos_idx = 0;
|
||||
bool has_data = false;
|
||||
|
||||
for (size_t bit_idx = 0; bit_idx < max_bit_width; ++bit_idx) {
|
||||
for (size_t dim = 0; dim < dim_encodings.size(); ++dim) {
|
||||
const auto& encoding = dim_encodings[dim][i];
|
||||
size_t byte_pos = bit_idx / 8;
|
||||
size_t bit_in_byte = 7 - (bit_idx % 8);
|
||||
|
||||
if (byte_pos < encoding.size()) {
|
||||
uint8_t bit = (encoding[byte_pos] >> bit_in_byte) & 1;
|
||||
if (bit) {
|
||||
const auto& pos = bit_positions[bit_pos_idx];
|
||||
zorder_buffer[marker_offset + pos.first] |= (1 << pos.second);
|
||||
has_data = true;
|
||||
}
|
||||
}
|
||||
bit_pos_idx++;
|
||||
}
|
||||
|
||||
// Early exit: if we've processed all significant bits and found no data, stop
|
||||
if (bit_idx > 8 && !has_data) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
builder.append(Slice(zorder_buffer.data(), zorder_buffer.size()));
|
||||
}
|
||||
|
||||
return builder.build(ColumnHelper::is_all_const(columns));
|
||||
}
|
||||
|
||||
} // namespace starrocks
|
||||
|
||||
#include "gen_cpp/opcode/UtilityFunctions.inc"
|
||||
|
|
|
|||
|
|
@ -70,6 +70,9 @@ public:
|
|||
|
||||
// Build an order-preserving composite binary key from heterogeneous arguments
|
||||
DEFINE_VECTORIZED_FN(encode_sort_key);
|
||||
|
||||
// Build a Morton(Z-order) encoded binary key from heterogeneous arguments
|
||||
DEFINE_VECTORIZED_FN(encode_zorder_key);
|
||||
};
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
|
|||
|
|
@ -33,6 +33,34 @@ namespace starrocks {
|
|||
class UtilityFunctionsTest : public ::testing::Test {
|
||||
public:
|
||||
void SetUp() override {}
|
||||
|
||||
// Helper function to sort and print zorder encode results
|
||||
std::vector<std::pair<int, std::string>> sortAndPrintZOrderResults(
|
||||
const BinaryColumn* bin, const std::string& test_name, const std::vector<std::string>& row_labels = {}) {
|
||||
// Create pairs of (row_index, encoded_value) for sorting
|
||||
std::vector<std::pair<int, std::string>> row_encoded_pairs;
|
||||
for (int i = 0; i < bin->size(); ++i) {
|
||||
row_encoded_pairs.emplace_back(i, bin->get_slice(i).to_string());
|
||||
}
|
||||
|
||||
// Sort by encoded value (lexicographic order)
|
||||
std::sort(row_encoded_pairs.begin(), row_encoded_pairs.end(),
|
||||
[](const std::pair<int, std::string>& a, const std::pair<int, std::string>& b) {
|
||||
return a.second < b.second;
|
||||
});
|
||||
|
||||
// Print sorted results for debugging
|
||||
std::cout << "\n=== " << test_name << " sorted results ===" << std::endl;
|
||||
for (const auto& pair : row_encoded_pairs) {
|
||||
std::cout << "Row " << pair.first;
|
||||
if (!row_labels.empty() && pair.first < static_cast<int>(row_labels.size())) {
|
||||
std::cout << " (" << row_labels[pair.first] << ")";
|
||||
}
|
||||
std::cout << " -> " << pair.second << std::endl;
|
||||
}
|
||||
|
||||
return row_encoded_pairs;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(UtilityFunctionsTest, versionTest) {
|
||||
|
|
@ -293,4 +321,217 @@ TEST_F(UtilityFunctionsTest, encodeSortKeyStringEscaping) {
|
|||
ASSERT_NE(k0.to_string(), k1.to_string());
|
||||
}
|
||||
|
||||
TEST_F(UtilityFunctionsTest, zorderEncodeSingleDimOrdering) {
|
||||
FunctionContext* ctx = FunctionContext::create_test_context();
|
||||
auto ptr = std::unique_ptr<FunctionContext>(ctx);
|
||||
|
||||
// Test data table: [row_index] -> value
|
||||
std::vector<int32_t> test_data = {-2, -1, 0, 1, 2};
|
||||
|
||||
auto c_int = Int32Column::create();
|
||||
for (int32_t value : test_data) {
|
||||
c_int->append(value);
|
||||
}
|
||||
|
||||
Columns cols;
|
||||
cols.emplace_back(c_int);
|
||||
|
||||
ASSIGN_OR_ASSERT_FAIL(ColumnPtr out, UtilityFunctions::encode_zorder_key(ctx, cols));
|
||||
auto* bin = ColumnHelper::cast_to_raw<TYPE_VARBINARY>(out);
|
||||
ASSERT_EQ(5, bin->size());
|
||||
|
||||
// Use helper function to sort and print results
|
||||
std::vector<std::string> row_labels = {"-2", "-1", "0", "1", "2"};
|
||||
auto sorted_results = sortAndPrintZOrderResults(bin, "zorderEncodeSingleDimOrdering", row_labels);
|
||||
|
||||
// Extract row indices in sorted order
|
||||
std::vector<int> sorted_row_indices;
|
||||
for (const auto& pair : sorted_results) {
|
||||
sorted_row_indices.push_back(pair.first);
|
||||
}
|
||||
|
||||
std::vector<int> expected_order = {0, 1, 2, 3, 4};
|
||||
ASSERT_EQ(expected_order, sorted_row_indices);
|
||||
}
|
||||
|
||||
TEST_F(UtilityFunctionsTest, zorderEncodeTwoDimsBasicInterleaving) {
|
||||
FunctionContext* ctx = FunctionContext::create_test_context();
|
||||
auto ptr = std::unique_ptr<FunctionContext>(ctx);
|
||||
|
||||
// Test data table: [row_index] -> (a, b)
|
||||
// clang-format off
|
||||
std::vector<std::pair<int32_t, int32_t>> test_data = {
|
||||
{100, 10000},
|
||||
{123, 10001},
|
||||
{145, 10010},
|
||||
{167, 10019}
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
auto a = Int32Column::create();
|
||||
auto b = Int32Column::create();
|
||||
for (const auto& pair : test_data) {
|
||||
a->append(pair.first);
|
||||
b->append(pair.second);
|
||||
}
|
||||
|
||||
Columns cols;
|
||||
cols.emplace_back(a);
|
||||
cols.emplace_back(b);
|
||||
|
||||
ASSIGN_OR_ASSERT_FAIL(ColumnPtr out, UtilityFunctions::encode_zorder_key(ctx, cols));
|
||||
auto* bin = ColumnHelper::cast_to_raw<TYPE_VARBINARY>(out);
|
||||
ASSERT_EQ(test_data.size(), bin->size());
|
||||
|
||||
// Use helper function to sort and print results
|
||||
auto sorted_results = sortAndPrintZOrderResults(bin, "zorderEncodeTwoDimsBasicInterleaving");
|
||||
|
||||
// Extract row indices in sorted order
|
||||
std::vector<int> sorted_row_indices;
|
||||
for (const auto& pair : sorted_results) {
|
||||
sorted_row_indices.push_back(pair.first);
|
||||
}
|
||||
|
||||
std::vector<int> expected_order = {0, 1, 2, 3};
|
||||
ASSERT_EQ(expected_order, sorted_row_indices);
|
||||
}
|
||||
|
||||
TEST_F(UtilityFunctionsTest, zorderEncodeNullHandling) {
|
||||
FunctionContext* ctx = FunctionContext::create_test_context();
|
||||
auto ptr = std::unique_ptr<FunctionContext>(ctx);
|
||||
|
||||
// Test data table: [row_index] -> (c1_value, c2_value, c1_is_null)
|
||||
std::vector<std::tuple<int32_t, int32_t, bool>> test_data = {
|
||||
{0, 0, true}, // (NULL,0)
|
||||
{1, 0, false}, // (1,0)
|
||||
{0, 1, true}, // (NULL,1)
|
||||
{1, 1, false} // (1,1)
|
||||
};
|
||||
|
||||
auto c1 = NullableColumn::create(Int32Column::create(), NullColumn::create());
|
||||
auto c2 = Int32Column::create();
|
||||
|
||||
for (const auto& tuple : test_data) {
|
||||
if (std::get<2>(tuple)) { // is_null
|
||||
c1->append_nulls(1);
|
||||
} else {
|
||||
c1->append_datum(Datum(int32_t(std::get<0>(tuple))));
|
||||
}
|
||||
c2->append(std::get<1>(tuple));
|
||||
}
|
||||
|
||||
Columns cols;
|
||||
cols.emplace_back(c1);
|
||||
cols.emplace_back(c2);
|
||||
|
||||
ASSIGN_OR_ASSERT_FAIL(ColumnPtr out, UtilityFunctions::encode_zorder_key(ctx, cols));
|
||||
auto* bin = ColumnHelper::cast_to_raw<TYPE_VARBINARY>(out);
|
||||
ASSERT_EQ(4, bin->size());
|
||||
|
||||
// Use helper function to sort and print results
|
||||
std::vector<std::string> row_labels = {"(NULL,0)", "(1,0)", "(NULL,1)", "(1,1)"};
|
||||
auto sorted_results = sortAndPrintZOrderResults(bin, "zorderEncodeNullHandling", row_labels);
|
||||
|
||||
// Extract row indices in sorted order
|
||||
std::vector<int> sorted_row_indices;
|
||||
for (const auto& pair : sorted_results) {
|
||||
sorted_row_indices.push_back(pair.first);
|
||||
}
|
||||
|
||||
// Verify expected ordering: NULL values should sort before non-NULL values
|
||||
// Expected order: Row 0(NULL,0) < Row 2(NULL,1) < Row 1(1,0) < Row 3(1,1)
|
||||
std::vector<int> expected_order = {0, 2, 1, 3};
|
||||
|
||||
ASSERT_EQ(expected_order, sorted_row_indices);
|
||||
}
|
||||
|
||||
TEST_F(UtilityFunctionsTest, zorderEncodeMixedDataTypes) {
|
||||
FunctionContext* ctx = FunctionContext::create_test_context();
|
||||
auto ptr = std::unique_ptr<FunctionContext>(ctx);
|
||||
|
||||
// Test data table: [row_index] -> (int_val, bigint_val, float_val, double_val, date_val, timestamp_val)
|
||||
std::vector<std::tuple<int32_t, int64_t, float, double, DateValue, TimestampValue>> test_data = {
|
||||
{10, 20L, 30.5f, 40.5, DateValue::create(2023, 1, 10),
|
||||
TimestampValue::create(2023, 1, 10, 12, 0, 0, 0)}, // small positive values
|
||||
{50, 60L, 70.5f, 80.5, DateValue::create(2023, 2, 20),
|
||||
TimestampValue::create(2023, 2, 20, 12, 0, 0, 0)}, // medium positive values
|
||||
{0, 0L, 0.0f, 0.0, DateValue::create(1970, 1, 1), TimestampValue::create(1970, 1, 1, 0, 0, 0, 0)}, // zeros
|
||||
{90, 95L, 98.5f, 99.5, DateValue::create(2023, 12, 31),
|
||||
TimestampValue::create(2023, 12, 31, 23, 59, 59, 0)}, // large positive values
|
||||
{5, 15L, 25.5f, 35.5, DateValue::create(2023, 1, 5),
|
||||
TimestampValue::create(2023, 1, 5, 12, 0, 0, 0)} // very small positive values
|
||||
};
|
||||
|
||||
// Create columns with different data types
|
||||
auto int_col = Int32Column::create();
|
||||
auto bigint_col = Int64Column::create();
|
||||
auto float_col = FloatColumn::create();
|
||||
auto double_col = DoubleColumn::create();
|
||||
auto date_col = DateColumn::create();
|
||||
auto timestamp_col = TimestampColumn::create();
|
||||
|
||||
// Fill columns from test data
|
||||
for (const auto& tuple : test_data) {
|
||||
int_col->append(std::get<0>(tuple));
|
||||
bigint_col->append(std::get<1>(tuple));
|
||||
float_col->append(std::get<2>(tuple));
|
||||
double_col->append(std::get<3>(tuple));
|
||||
date_col->append(std::get<4>(tuple));
|
||||
timestamp_col->append(std::get<5>(tuple));
|
||||
}
|
||||
|
||||
Columns cols;
|
||||
cols.emplace_back(int_col);
|
||||
cols.emplace_back(bigint_col);
|
||||
cols.emplace_back(float_col);
|
||||
cols.emplace_back(double_col);
|
||||
cols.emplace_back(date_col);
|
||||
cols.emplace_back(timestamp_col);
|
||||
|
||||
ASSIGN_OR_ASSERT_FAIL(ColumnPtr out, UtilityFunctions::encode_zorder_key(ctx, cols));
|
||||
auto* bin = ColumnHelper::cast_to_raw<TYPE_VARBINARY>(out);
|
||||
ASSERT_EQ(5, bin->size());
|
||||
|
||||
// Helper function to convert slice to vector for easier inspection
|
||||
auto slice_to_vec = [](const Slice& s) { return std::vector<uint8_t>(s.data, s.data + s.size); };
|
||||
|
||||
// Get all encoded keys
|
||||
std::vector<std::vector<uint8_t>> keys;
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
keys.push_back(slice_to_vec(bin->get_slice(i)));
|
||||
}
|
||||
|
||||
// Verify that all keys have the expected structure:
|
||||
// [6 null markers] + [interleaved bits from 6 columns]
|
||||
// Z-order encoding interleaves bits from each dimension up to the maximum bit width
|
||||
// Max bit width is 64 bits, so total interleaved bits = 64 * 6 = 384 bits
|
||||
// Total bytes = 6 (null markers) + ceil(384 / 8) = 6 + 48 = 54 bytes
|
||||
const size_t expected_size = 6 + (64 * 6 + 7) / 8;
|
||||
|
||||
for (const auto& key : keys) {
|
||||
ASSERT_EQ(expected_size, key.size()) << "Key size mismatch";
|
||||
|
||||
// Verify null markers (first 6 bytes should all be 0x01 for non-null values)
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
ASSERT_EQ(0x01, key[i]) << "Null marker at position " << i << " should be 0x01";
|
||||
}
|
||||
}
|
||||
|
||||
// Use helper function to sort and print results
|
||||
std::vector<std::string> row_labels = {"small", "medium", "zeros", "large", "very_small"};
|
||||
auto sorted_results = sortAndPrintZOrderResults(bin, "zorderEncodeMixedDataTypes", row_labels);
|
||||
|
||||
// Extract row indices in sorted order
|
||||
std::vector<int> sorted_row_indices;
|
||||
for (const auto& pair : sorted_results) {
|
||||
sorted_row_indices.push_back(pair.first);
|
||||
}
|
||||
|
||||
// Verify expected ordering based on actual Z-order encoding behavior
|
||||
// Since all values are positive, Z-order should sort by magnitude
|
||||
// Expected order: Row 2 (zeros) < Row 4 (very_small) < Row 0 (small) < Row 1 (medium) < Row 3 (large)
|
||||
std::vector<int> expected_order = {2, 4, 0, 1, 3};
|
||||
ASSERT_EQ(expected_order, sorted_row_indices);
|
||||
}
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
|
|||
|
|
@ -826,6 +826,7 @@ vectorized_functions = [
|
|||
[100018, 'host_name', True, False, 'VARCHAR', [], "UtilityFunctions::host_name"],
|
||||
[100020, 'get_query_profile', True, False, 'VARCHAR', ['VARCHAR'], "UtilityFunctions::get_query_profile"],
|
||||
[100024, 'encode_sort_key', True, False, 'VARBINARY', ['ANY_ELEMENT', '...'], 'UtilityFunctions::encode_sort_key'],
|
||||
[100025, 'encode_zorder_key', True, False, 'VARBINARY', ['ANY_ELEMENT', '...'], 'UtilityFunctions::encode_zorder_key'],
|
||||
|
||||
# json string function
|
||||
[110022, "get_json_int", False, False, "BIGINT", ["VARCHAR", "VARCHAR"], "JsonFunctions::get_json_bigint",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,224 @@
|
|||
-- name: test_zorder_encode_basic
|
||||
CREATE DATABASE test_zorder_encode_basic;
|
||||
-- result:
|
||||
-- !result
|
||||
USE test_zorder_encode_basic;
|
||||
-- result:
|
||||
-- !result
|
||||
CREATE TABLE points2d (
|
||||
id INT NOT NULL,
|
||||
x INT,
|
||||
y INT,
|
||||
zkey VARBINARY(1024) AS (encode_zorder_key(x, y))
|
||||
) ENGINE=OLAP
|
||||
DISTRIBUTED BY HASH(id) BUCKETS 1
|
||||
ORDER BY (zkey)
|
||||
PROPERTIES ("replication_num" = "1");
|
||||
-- result:
|
||||
-- !result
|
||||
set enable_profile = true;
|
||||
-- result:
|
||||
-- !result
|
||||
set enable_async_profile = false;
|
||||
-- result:
|
||||
-- !result
|
||||
create view profile_filter_rows as
|
||||
select trim(unnest)
|
||||
from table(unnest(split(get_query_profile(last_query_id()), '\n')))
|
||||
where unnest like '%FilterRows%'
|
||||
order by unnest;
|
||||
-- result:
|
||||
-- !result
|
||||
INSERT INTO points2d (id, x, y)
|
||||
SELECT
|
||||
row_number() OVER (ORDER BY rand()) as id,
|
||||
100 + (row_number() OVER (ORDER BY rand()) % 200) as x,
|
||||
100 + (row_number() OVER (ORDER BY rand()) % 200) as y
|
||||
FROM table(generate_series(1, 200000));
|
||||
-- result:
|
||||
-- !result
|
||||
INSERT INTO points2d (id, x, y)
|
||||
SELECT
|
||||
200000 + row_number() OVER (ORDER BY rand()) as id,
|
||||
1000 + (row_number() OVER (ORDER BY rand()) % 200) as x,
|
||||
1000 + (row_number() OVER (ORDER BY rand()) % 200) as y
|
||||
FROM table(generate_series(1, 200000));
|
||||
-- result:
|
||||
-- !result
|
||||
INSERT INTO points2d (id, x, y)
|
||||
SELECT
|
||||
400000 + row_number() OVER (ORDER BY rand()) as id,
|
||||
5000 + (row_number() OVER (ORDER BY rand()) % 200) as x,
|
||||
5000 + (row_number() OVER (ORDER BY rand()) % 200) as y
|
||||
FROM table(generate_series(1, 200000));
|
||||
-- result:
|
||||
-- !result
|
||||
INSERT INTO points2d (id, x, y)
|
||||
SELECT
|
||||
600000 + row_number() OVER (ORDER BY rand()) as id,
|
||||
8000 + (row_number() OVER (ORDER BY rand()) % 200) as x,
|
||||
8000 + (row_number() OVER (ORDER BY rand()) % 200) as y
|
||||
FROM table(generate_series(1, 200000));
|
||||
-- result:
|
||||
-- !result
|
||||
INSERT INTO points2d (id, x, y)
|
||||
SELECT
|
||||
800000 + row_number() OVER (ORDER BY rand()) as id,
|
||||
100 + (row_number() OVER (ORDER BY rand()) % 9000) as x,
|
||||
100 + (row_number() OVER (ORDER BY rand()) % 9000) as y
|
||||
FROM table(generate_series(1, 200000));
|
||||
-- result:
|
||||
-- !result
|
||||
SELECT count(*) FROM points2d WHERE x = 1000;
|
||||
-- result:
|
||||
1023
|
||||
-- !result
|
||||
select * from profile_filter_rows;
|
||||
-- result:
|
||||
- BitmapIndexFilterRows: 0
|
||||
- BloomFilterFilterRows: 0
|
||||
- GinFilterRows: 0
|
||||
- SegmentRuntimeZoneMapFilterRows: 0
|
||||
- SegmentZoneMapFilterRows: 0
|
||||
- ShortKeyFilterRows: 0
|
||||
- VectorIndexFilterRows: 0
|
||||
- ZoneMapIndexFilterRows: 587.008K (587008)
|
||||
- DelVecFilterRows: 0
|
||||
- PredFilterRows: 411.969K (411969)
|
||||
-- !result
|
||||
SELECT count(*) FROM points2d WHERE y = 1000;
|
||||
-- result:
|
||||
1023
|
||||
-- !result
|
||||
select * from profile_filter_rows;
|
||||
-- result:
|
||||
- BitmapIndexFilterRows: 0
|
||||
- BloomFilterFilterRows: 0
|
||||
- GinFilterRows: 0
|
||||
- SegmentRuntimeZoneMapFilterRows: 0
|
||||
- SegmentZoneMapFilterRows: 0
|
||||
- ShortKeyFilterRows: 0
|
||||
- VectorIndexFilterRows: 0
|
||||
- ZoneMapIndexFilterRows: 587.008K (587008)
|
||||
- DelVecFilterRows: 0
|
||||
- PredFilterRows: 411.969K (411969)
|
||||
-- !result
|
||||
SELECT count(*) FROM points2d WHERE x = 1000 and y = 1000;
|
||||
-- result:
|
||||
1
|
||||
-- !result
|
||||
select * from profile_filter_rows;
|
||||
-- result:
|
||||
- BitmapIndexFilterRows: 0
|
||||
- BloomFilterFilterRows: 0
|
||||
- GinFilterRows: 0
|
||||
- SegmentRuntimeZoneMapFilterRows: 0
|
||||
- SegmentZoneMapFilterRows: 0
|
||||
- ShortKeyFilterRows: 0
|
||||
- VectorIndexFilterRows: 0
|
||||
- ZoneMapIndexFilterRows: 587.008K (587008)
|
||||
- DelVecFilterRows: 0
|
||||
- PredFilterRows: 412.991K (412991)
|
||||
-- !result
|
||||
SELECT count(*) FROM points2d WHERE x BETWEEN 95 AND 105 AND y BETWEEN 95 AND 105;
|
||||
-- result:
|
||||
198
|
||||
-- !result
|
||||
select * from profile_filter_rows;
|
||||
-- result:
|
||||
- BitmapIndexFilterRows: 0
|
||||
- BloomFilterFilterRows: 0
|
||||
- GinFilterRows: 0
|
||||
- SegmentRuntimeZoneMapFilterRows: 0
|
||||
- SegmentZoneMapFilterRows: 0
|
||||
- ShortKeyFilterRows: 0
|
||||
- VectorIndexFilterRows: 0
|
||||
- ZoneMapIndexFilterRows: 587.008K (587008)
|
||||
- DelVecFilterRows: 0
|
||||
- PredFilterRows: 412.794K (412794)
|
||||
-- !result
|
||||
SELECT count(*) FROM points2d WHERE x BETWEEN 995 AND 1005 AND y BETWEEN 995 AND 1005;
|
||||
-- result:
|
||||
155
|
||||
-- !result
|
||||
select * from profile_filter_rows;
|
||||
-- result:
|
||||
- BitmapIndexFilterRows: 0
|
||||
- BloomFilterFilterRows: 0
|
||||
- GinFilterRows: 0
|
||||
- SegmentRuntimeZoneMapFilterRows: 0
|
||||
- SegmentZoneMapFilterRows: 0
|
||||
- ShortKeyFilterRows: 0
|
||||
- VectorIndexFilterRows: 0
|
||||
- ZoneMapIndexFilterRows: 587.008K (587008)
|
||||
- DelVecFilterRows: 0
|
||||
- PredFilterRows: 412.837K (412837)
|
||||
-- !result
|
||||
SELECT count(*) FROM points2d WHERE x BETWEEN 4995 AND 5005 AND y BETWEEN 4995 AND 5005;
|
||||
-- result:
|
||||
169
|
||||
-- !result
|
||||
select * from profile_filter_rows;
|
||||
-- result:
|
||||
- BitmapIndexFilterRows: 0
|
||||
- BloomFilterFilterRows: 0
|
||||
- GinFilterRows: 0
|
||||
- SegmentRuntimeZoneMapFilterRows: 0
|
||||
- SegmentZoneMapFilterRows: 0
|
||||
- ShortKeyFilterRows: 0
|
||||
- VectorIndexFilterRows: 0
|
||||
- ZoneMapIndexFilterRows: 587.008K (587008)
|
||||
- DelVecFilterRows: 0
|
||||
- PredFilterRows: 412.823K (412823)
|
||||
-- !result
|
||||
SELECT count(*) FROM points2d WHERE x BETWEEN 7995 AND 8005 AND y BETWEEN 7995 AND 8005;
|
||||
-- result:
|
||||
165
|
||||
-- !result
|
||||
select * from profile_filter_rows;
|
||||
-- result:
|
||||
- BitmapIndexFilterRows: 0
|
||||
- BloomFilterFilterRows: 0
|
||||
- GinFilterRows: 0
|
||||
- SegmentRuntimeZoneMapFilterRows: 0
|
||||
- SegmentZoneMapFilterRows: 0
|
||||
- ShortKeyFilterRows: 0
|
||||
- VectorIndexFilterRows: 0
|
||||
- ZoneMapIndexFilterRows: 589.824K (589824)
|
||||
- DelVecFilterRows: 0
|
||||
- PredFilterRows: 410.011K (410011)
|
||||
-- !result
|
||||
SELECT count(*) FROM points2d WHERE x BETWEEN 90 AND 210 AND y BETWEEN 90 AND 210;
|
||||
-- result:
|
||||
61474
|
||||
-- !result
|
||||
select * from profile_filter_rows;
|
||||
-- result:
|
||||
- BitmapIndexFilterRows: 0
|
||||
- BloomFilterFilterRows: 0
|
||||
- GinFilterRows: 0
|
||||
- SegmentRuntimeZoneMapFilterRows: 0
|
||||
- SegmentZoneMapFilterRows: 0
|
||||
- ShortKeyFilterRows: 0
|
||||
- VectorIndexFilterRows: 0
|
||||
- ZoneMapIndexFilterRows: 587.008K (587008)
|
||||
- DelVecFilterRows: 0
|
||||
- PredFilterRows: 351.518K (351518)
|
||||
-- !result
|
||||
SELECT count(*) FROM points2d WHERE x BETWEEN 990 AND 1110 AND y BETWEEN 990 AND 1110;
|
||||
-- result:
|
||||
61737
|
||||
-- !result
|
||||
select * from profile_filter_rows;
|
||||
-- result:
|
||||
- BitmapIndexFilterRows: 0
|
||||
- BloomFilterFilterRows: 0
|
||||
- GinFilterRows: 0
|
||||
- SegmentRuntimeZoneMapFilterRows: 0
|
||||
- SegmentZoneMapFilterRows: 0
|
||||
- ShortKeyFilterRows: 0
|
||||
- VectorIndexFilterRows: 0
|
||||
- ZoneMapIndexFilterRows: 587.008K (587008)
|
||||
- DelVecFilterRows: 0
|
||||
- PredFilterRows: 351.255K (351255)
|
||||
-- !result
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
-- name: test_zorder_encode_basic
|
||||
CREATE DATABASE test_zorder_encode_basic;
|
||||
USE test_zorder_encode_basic;
|
||||
|
||||
-- Create a table with a generated Z-order key over two dimensions
|
||||
CREATE TABLE points2d (
|
||||
id INT NOT NULL,
|
||||
x INT,
|
||||
y INT,
|
||||
zkey VARBINARY(1024) AS (encode_zorder_key(x, y))
|
||||
) ENGINE=OLAP
|
||||
DISTRIBUTED BY HASH(id) BUCKETS 1
|
||||
ORDER BY (zkey)
|
||||
PROPERTIES ("replication_num" = "1");
|
||||
|
||||
set enable_profile = true;
|
||||
set enable_async_profile = false;
|
||||
|
||||
create view profile_filter_rows as
|
||||
select trim(unnest)
|
||||
from table(unnest(split(get_query_profile(last_query_id()), '\n')))
|
||||
where unnest like '%FilterRows%'
|
||||
order by unnest;
|
||||
|
||||
-- Insert data that demonstrates Z-order spatial locality
|
||||
-- This creates large clusters of points in different regions to show how Z-order
|
||||
-- improves query performance for spatial range queries
|
||||
|
||||
-- Cluster 1: Large cluster around (100, 100) - 200K points
|
||||
INSERT INTO points2d (id, x, y)
|
||||
SELECT
|
||||
row_number() OVER (ORDER BY rand()) as id,
|
||||
100 + (row_number() OVER (ORDER BY rand()) % 200) as x,
|
||||
100 + (row_number() OVER (ORDER BY rand()) % 200) as y
|
||||
FROM table(generate_series(1, 200000));
|
||||
|
||||
-- Cluster 2: Large cluster around (1000, 1000) - 200K points
|
||||
INSERT INTO points2d (id, x, y)
|
||||
SELECT
|
||||
200000 + row_number() OVER (ORDER BY rand()) as id,
|
||||
1000 + (row_number() OVER (ORDER BY rand()) % 200) as x,
|
||||
1000 + (row_number() OVER (ORDER BY rand()) % 200) as y
|
||||
FROM table(generate_series(1, 200000));
|
||||
|
||||
-- Cluster 3: Large cluster around (5000, 5000) - 200K points
|
||||
INSERT INTO points2d (id, x, y)
|
||||
SELECT
|
||||
400000 + row_number() OVER (ORDER BY rand()) as id,
|
||||
5000 + (row_number() OVER (ORDER BY rand()) % 200) as x,
|
||||
5000 + (row_number() OVER (ORDER BY rand()) % 200) as y
|
||||
FROM table(generate_series(1, 200000));
|
||||
|
||||
-- Cluster 4: Large cluster around (8000, 8000) - 200K points
|
||||
INSERT INTO points2d (id, x, y)
|
||||
SELECT
|
||||
600000 + row_number() OVER (ORDER BY rand()) as id,
|
||||
8000 + (row_number() OVER (ORDER BY rand()) % 200) as x,
|
||||
8000 + (row_number() OVER (ORDER BY rand()) % 200) as y
|
||||
FROM table(generate_series(1, 200000));
|
||||
|
||||
-- Scattered points to fill the space - 200K points
|
||||
INSERT INTO points2d (id, x, y)
|
||||
SELECT
|
||||
800000 + row_number() OVER (ORDER BY rand()) as id,
|
||||
100 + (row_number() OVER (ORDER BY rand()) % 9000) as x,
|
||||
100 + (row_number() OVER (ORDER BY rand()) % 9000) as y
|
||||
FROM table(generate_series(1, 200000));
|
||||
|
||||
SELECT count(*) FROM points2d WHERE x = 1000;
|
||||
select * from profile_filter_rows;
|
||||
SELECT count(*) FROM points2d WHERE y = 1000;
|
||||
select * from profile_filter_rows;
|
||||
SELECT count(*) FROM points2d WHERE x = 1000 and y = 1000;
|
||||
select * from profile_filter_rows;
|
||||
|
||||
-- Additional queries to demonstrate Z-order benefits
|
||||
-- Query points in a spatial range around cluster 1
|
||||
SELECT count(*) FROM points2d WHERE x BETWEEN 95 AND 105 AND y BETWEEN 95 AND 105;
|
||||
select * from profile_filter_rows;
|
||||
|
||||
-- Query points in a spatial range around cluster 2
|
||||
SELECT count(*) FROM points2d WHERE x BETWEEN 995 AND 1005 AND y BETWEEN 995 AND 1005;
|
||||
select * from profile_filter_rows;
|
||||
|
||||
-- Query points in a spatial range around cluster 3
|
||||
SELECT count(*) FROM points2d WHERE x BETWEEN 4995 AND 5005 AND y BETWEEN 4995 AND 5005;
|
||||
select * from profile_filter_rows;
|
||||
|
||||
-- Query points in a spatial range around cluster 4
|
||||
SELECT count(*) FROM points2d WHERE x BETWEEN 7995 AND 8005 AND y BETWEEN 7995 AND 8005;
|
||||
select * from profile_filter_rows;
|
||||
|
||||
-- Large range queries to show Z-order effectiveness
|
||||
SELECT count(*) FROM points2d WHERE x BETWEEN 90 AND 210 AND y BETWEEN 90 AND 210;
|
||||
select * from profile_filter_rows;
|
||||
|
||||
SELECT count(*) FROM points2d WHERE x BETWEEN 990 AND 1110 AND y BETWEEN 990 AND 1110;
|
||||
select * from profile_filter_rows;
|
||||
|
||||
Loading…
Reference in New Issue