[Enhancement] support encode_sort_key function (backport #61781) (#61976)

Co-authored-by: Murphy <96611012+murphyatwork@users.noreply.github.com>
Co-authored-by: Cursor Agent <cursoragent@cursor.com>
This commit is contained in:
mergify[bot] 2025-08-19 18:36:47 +08:00 committed by GitHub
parent 90f1f3be58
commit ae28c45368
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 784 additions and 119 deletions

View File

@ -18,6 +18,7 @@
#include "runtime/mem_pool.h"
#include "storage/olap_type_infra.h"
#include "storage/type_traits.h"
#include "types/logical_type.h"
namespace starrocks {
@ -51,6 +52,7 @@ Status datum_from_string(TypeInfo* type_info, Datum* dst, const std::string& str
return Status::OK();
}
/* Type need memory allocated */
case TYPE_VARBINARY:
case TYPE_CHAR:
case TYPE_VARCHAR: {
/* Type need memory allocated */
@ -92,6 +94,7 @@ std::string datum_to_string(TypeInfo* type_info, const Datum& datum) {
switch (type) {
case TYPE_BOOLEAN:
return datum_to_string<TYPE_TINYINT>(type_info, datum);
case TYPE_VARBINARY:
case TYPE_CHAR:
case TYPE_VARCHAR:
return datum_to_string<TYPE_VARCHAR>(type_info, datum);

View File

@ -14,6 +14,7 @@
#include "exprs/utility_functions.h"
#include "column/column_visitor_adapter.h"
#include "gen_cpp/FrontendService_types.h"
#include "runtime/client_cache.h"
@ -29,7 +30,9 @@
#include <limits>
#include <random>
#include "column/binary_column.h"
#include "column/column_builder.h"
#include "column/column_helper.h"
#include "column/column_viewer.h"
#include "column/vectorized_fwd.h"
#include "common/config.h"
@ -39,6 +42,8 @@
#include "gutil/casts.h"
#include "runtime/runtime_state.h"
#include "service/backend_options.h"
#include "storage/key_coder.h"
#include "storage/primary_key_encoder.h"
#include "types/logical_type.h"
#include "util/cidr.h"
#include "util/monotime.h"
@ -366,6 +371,188 @@ StatusOr<ColumnPtr> UtilityFunctions::equiwidth_bucket(FunctionContext* context,
return builder.build(false);
}
// Helpers copied and adapted from storage/primary_key_encoder.cpp for order-preserving encoding
namespace detail {
inline void encode_float32(float v, std::string* dest) {
uint32_t u;
static_assert(sizeof(u) == sizeof(v), "size mismatch");
memcpy(&u, &v, sizeof(u));
// If negative, flip all bits; else flip sign bit only.
u ^= (u & 0x80000000u) ? 0xFFFFFFFFu : 0x80000000u;
u = starrocks::encoding_utils::to_bigendian(u);
dest->append(reinterpret_cast<const char*>(&u), sizeof(u));
}
inline void encode_float64(double v, std::string* dest) {
uint64_t u;
static_assert(sizeof(u) == sizeof(v), "size mismatch");
memcpy(&u, &v, sizeof(u));
u ^= (u & 0x8000000000000000ull) ? 0xFFFFFFFFFFFFFFFFull : 0x8000000000000000ull;
u = starrocks::encoding_utils::to_bigendian(u);
dest->append(reinterpret_cast<char*>(&u), sizeof(u));
}
struct EncoderVisitor : public ColumnVisitorAdapter<EncoderVisitor> {
bool is_last_field = false;
std::vector<std::string>* buffs;
const Buffer<uint8_t>* null_mask = nullptr; // Track null rows to skip processing
explicit EncoderVisitor() : ColumnVisitorAdapter(this) {}
// Nullable wrapper: handle null values and data together
Status do_visit(const NullableColumn& column) {
auto& nulls = column.immutable_null_column_data();
for (size_t i = 0; i < column.size(); i++) {
if (nulls[i]) {
(*buffs)[i].append("\0", 1);
} else {
(*buffs)[i].append("\1", 1);
}
}
// Set the null mask so that subsequent visitor methods can skip null rows
const Buffer<uint8_t>* original_null_mask = null_mask;
null_mask = &nulls;
// Process the underlying data column
Status status = column.data_column()->accept(this);
// Restore the original null mask
null_mask = original_null_mask;
return status;
}
// Const: forward to data
Status do_visit(const ConstColumn& column) {
for (size_t i = 0; i < column.size(); i++) {
RETURN_IF_ERROR(column.data_column()->accept(this));
}
return Status::OK();
}
// Strings/binary
template <typename T>
Status do_visit(const BinaryColumnBase<T>& column) {
for (size_t i = 0; i < column.size(); i++) {
// Skip processing for null rows
if (null_mask && (*null_mask)[i]) {
continue;
}
Slice s = column.get_slice(i);
encoding_utils::encode_slice(s, &(*buffs)[i], is_last_field);
}
return Status::OK();
}
// Fixed-length numerics
template <typename T>
Status do_visit(const FixedLengthColumn<T>& column) {
const auto& data = column.get_data();
for (size_t i = 0; i < column.size(); i++) {
// Skip processing for null rows
if (null_mask && (*null_mask)[i]) {
continue;
}
if constexpr (std::is_same_v<T, float>) {
encode_float32(data[i], &(*buffs)[i]);
} else if constexpr (std::is_same_v<T, double>) {
encode_float64(data[i], &(*buffs)[i]);
} else if constexpr (std::is_integral_v<T> && sizeof(T) <= 8) {
encoding_utils::encode_integral<T>(data[i], &(*buffs)[i]);
} else if constexpr (std::is_same_v<T, DateValue>) {
encoding_utils::encode_integral<int32_t>(data[i].julian(), &(*buffs)[i]);
} else if constexpr (std::is_same_v<T, DateTimeValue>) {
encoding_utils::encode_integral<int64_t>(data[i].to_int64(), &(*buffs)[i]);
} else if constexpr (std::is_same_v<T, TimestampValue>) {
encoding_utils::encode_integral<int64_t>(data[i].timestamp(), &(*buffs)[i]);
} else {
return Status::NotSupported(
fmt::format("encode_sort_key: unsupported argument type: {}", typeid(T).name()));
}
}
return Status::OK();
}
// Unsupported complex types map/array/struct/json/hll/bitmap/percentile
template <typename T>
Status do_visit(const T& column) {
// Even for unsupported types, we should check null mask to be consistent
// but since we're returning an error anyway, we can skip the null check
return Status::NotSupported("encode_sort_key: unsupported argument type");
}
};
} // namespace detail
// The encoding strategy of encode_sort_key is as follows:
//
// - For each input column, each row's value is encoded into a binary buffer
// using an order-preserving encoding. The encoding method depends on the
// column's type:
// * For integral types, values are converted to big-endian and, for signed
// types, the sign bit is flipped to preserve order.
// * For floating-point types, a custom encoding is used to ensure correct
// sort order.
// * For string types (Slice), a special encoding is used for non-last fields
// to escape 0x00 bytes and append a 0x00 0x00 terminator, while the last
// field is appended directly.
// * For date/time types, the internal integer representation is encoded as
// an integral value.
// * Unsupported types return an error.
//
// - For each column, a NULL marker (byte value 0x00 or 0x01) is appended for every
// row, regardless of the column's nullability, to ensure consistent encoding
// even if the column's nullability metadata is lost during processing.
//
// - After encoding each column, a separator byte (0x00) is appended between
// columns (except after the last column) to delimit fields in the composite
// key.
//
// - The result is a composite binary key for each row, which preserves the
// original sort order of the input columns when compared lexicographically.
StatusOr<ColumnPtr> UtilityFunctions::encode_sort_key(FunctionContext* context, const Columns& columns) {
const char* NOT_NULL_MARKER = "\1";
const char* COLUMN_SEPARATOR = "\0";
int num_args = columns.size();
RETURN_IF(num_args < 1, Status::InvalidArgument("encode_sort_key requires at least 1 argument"));
size_t num_rows = columns[0]->size();
auto result = ColumnBuilder<TYPE_VARBINARY>(num_rows);
std::vector<std::string> buffs(num_rows);
detail::EncoderVisitor visitor;
visitor.buffs = &buffs;
for (int j = 0; j < num_args; ++j) {
// Insert NOT_NULL markers for all rows.
// This is necessary because the function may receive columns whose nullability
// does not accurately reflect the original data source.
// For example, a nullable column that contains no null values may be converted
// to a non-nullable column during processing. To ensure consistency in the sort key
// encoding, we explicitly add NOT_NULL markers for every row.
if (!columns[j]->is_nullable()) {
for (auto& buff : buffs) {
buff.append(NOT_NULL_MARKER, 1);
}
}
visitor.is_last_field = (j + 1 == num_args);
RETURN_IF_ERROR(columns[j]->accept(&visitor));
// Add a separator between each column
if (j < num_args - 1) {
for (auto& buff : buffs) {
buff.append(COLUMN_SEPARATOR, 1);
}
}
}
for (size_t i = 0; i < num_rows; i++) {
result.append(std::move(buffs[i]));
}
return result.build(ColumnHelper::is_all_const(columns));
}
} // namespace starrocks
#include "gen_cpp/opcode/UtilityFunctions.inc"

View File

@ -67,6 +67,9 @@ public:
// Build a equi-width histogram
DEFINE_VECTORIZED_FN(equiwidth_bucket);
// Build an order-preserving composite binary key from heterogeneous arguments
DEFINE_VECTORIZED_FN(encode_sort_key);
};
} // namespace starrocks

View File

@ -62,6 +62,7 @@ private:
add_mapping<TYPE_DECIMALV2>();
add_mapping<TYPE_CHAR>();
add_mapping<TYPE_VARCHAR>();
add_mapping<TYPE_VARBINARY>();
add_mapping<TYPE_BOOLEAN>();
}

View File

@ -409,4 +409,8 @@ public:
}
};
// Reuse VARCHAR's key coder behavior for VARBINARY
template <>
class KeyCoderTraits<TYPE_VARBINARY> : public KeyCoderTraits<TYPE_VARCHAR> {};
} // namespace starrocks

View File

@ -55,58 +55,7 @@ namespace starrocks {
constexpr uint8_t SORT_KEY_NULL_FIRST_MARKER = 0x00;
constexpr uint8_t SORT_KEY_NORMAL_MARKER = 0x01;
template <class UT>
UT to_bigendian(UT v);
template <>
uint8_t to_bigendian(uint8_t v) {
return v;
}
template <>
uint16_t to_bigendian(uint16_t v) {
return BigEndian::FromHost16(v);
}
template <>
uint32_t to_bigendian(uint32_t v) {
return BigEndian::FromHost32(v);
}
template <>
uint64_t to_bigendian(uint64_t v) {
return BigEndian::FromHost64(v);
}
template <>
uint128_t to_bigendian(uint128_t v) {
return BigEndian::FromHost128(v);
}
template <class T>
void encode_integral(const T& v, std::string* dest) {
if constexpr (std::is_signed<T>::value) {
typedef typename std::make_unsigned<T>::type UT;
UT uv = v;
uv ^= static_cast<UT>(1) << (sizeof(UT) * 8 - 1);
uv = to_bigendian(uv);
dest->append(reinterpret_cast<const char*>(&uv), sizeof(uv));
} else {
T nv = to_bigendian(v);
dest->append(reinterpret_cast<const char*>(&nv), sizeof(nv));
}
}
template <class T>
void decode_integral(Slice* src, T* v) {
if constexpr (std::is_signed<T>::value) {
typedef typename std::make_unsigned<T>::type UT;
UT uv = *(UT*)(src->data);
uv = to_bigendian(uv);
uv ^= static_cast<UT>(1) << (sizeof(UT) * 8 - 1);
*v = uv;
} else {
T nv = *(T*)(src->data);
*v = to_bigendian(nv);
}
src->remove_prefix(sizeof(T));
}
using namespace encoding_utils;
template <int LEN>
static bool SSEEncodeChunk(const uint8_t** srcp, uint8_t** dstp) {
@ -164,7 +113,8 @@ static inline void EncodeChunkLoop(const uint8_t** srcp, uint8_t** dstp, int len
}
}
inline void encode_slice(const Slice& s, std::string* dst, bool is_last) {
// Implementation of encoding_utils::encode_slice
void encoding_utils::encode_slice(const Slice& s, std::string* dst, bool is_last) {
if (is_last) {
dst->append(s.data, s.size);
} else {

View File

@ -17,9 +17,80 @@
#include "column/field.h"
#include "column/vectorized_fwd.h"
#include "common/status.h"
#include "gutil/endian.h"
namespace starrocks {
// Utility functions for encoding various data types with order-preserving encoding
namespace encoding_utils {
// Endian conversion utilities
template <class UT>
inline UT to_bigendian(UT v) {
return v;
}
template <>
inline uint8_t to_bigendian(uint8_t v) {
return v;
}
template <>
inline uint16_t to_bigendian(uint16_t v) {
return BigEndian::FromHost16(v);
}
template <>
inline uint32_t to_bigendian(uint32_t v) {
return BigEndian::FromHost32(v);
}
template <>
inline uint64_t to_bigendian(uint64_t v) {
return BigEndian::FromHost64(v);
}
template <>
inline uint128_t to_bigendian(uint128_t v) {
return BigEndian::FromHost128(v);
}
// Integral encoding with order-preserving transformation
template <class T>
inline void encode_integral(const T& v, std::string* dest) {
if constexpr (std::is_signed<T>::value) {
typedef typename std::make_unsigned<T>::type UT;
UT uv = v;
uv ^= static_cast<UT>(1) << (sizeof(UT) * 8 - 1);
uv = to_bigendian(uv);
dest->append(reinterpret_cast<const char*>(&uv), sizeof(uv));
} else {
T nv = to_bigendian(v);
dest->append(reinterpret_cast<const char*>(&nv), sizeof(nv));
}
}
// Integral decoding with order-preserving transformation
template <class T>
inline void decode_integral(Slice* src, T* v) {
if constexpr (std::is_signed<T>::value) {
typedef typename std::make_unsigned<T>::type UT;
UT uv = *(UT*)(src->data);
uv = to_bigendian(uv);
uv ^= static_cast<UT>(1) << (sizeof(UT) * 8 - 1);
*v = uv;
} else {
T nv = *(T*)(src->data);
*v = to_bigendian(nv);
}
src->remove_prefix(sizeof(T));
}
// Slice encoding with SSE optimizations for middle fields
void encode_slice(const Slice& s, std::string* dst, bool is_last);
} // namespace encoding_utils
// Utility methods to encode composite primary key into single binary key, while
// preserving original sort order.
// Currently only bool, integral types(tinyint, smallint, int, bigint, largeint)

View File

@ -30,6 +30,9 @@
namespace starrocks {
// Import encoding utilities from primary_key_encoder.h
using encoding_utils::encode_integral;
Status RowStoreEncoderSimple::encode_columns_to_full_row_column(const Schema& schema, const Columns& columns,
BinaryColumn& dest) {
RETURN_IF_ERROR(is_supported(schema));

View File

@ -19,62 +19,26 @@
#include "common/status.h"
#include "gutil/endian.h"
#include "gutil/stringprintf.h"
#include "storage/primary_key_encoder.h"
#include "types/bitmap_value.h"
namespace starrocks {
//declare and defines of template class must be in a file, so we exctract it here
template <class UT>
inline UT to_bigendian(UT v);
template <>
inline uint8_t to_bigendian(uint8_t v) {
return v;
}
template <>
inline uint16_t to_bigendian(uint16_t v) {
return BigEndian::FromHost16(v);
}
template <>
inline uint32_t to_bigendian(uint32_t v) {
return BigEndian::FromHost32(v);
}
template <>
inline uint64_t to_bigendian(uint64_t v) {
return BigEndian::FromHost64(v);
}
template <>
inline uint128_t to_bigendian(uint128_t v) {
return BigEndian::FromHost128(v);
}
template <class T>
inline size_t encode_integral(const T& v, std::string* dest) {
if constexpr (std::is_signed<T>::value) {
typedef typename std::make_unsigned<T>::type UT;
UT uv = v;
uv ^= static_cast<UT>(1) << (sizeof(UT) * 8 - 1);
uv = to_bigendian(uv);
dest->append(reinterpret_cast<const char*>(&uv), sizeof(uv));
return sizeof(uv);
} else {
T nv = to_bigendian(v);
dest->append(reinterpret_cast<const char*>(&nv), sizeof(nv));
return sizeof(nv);
}
}
// Note: to_bigendian and encode_integral functions are now available in
// storage/primary_key_encoder.h under the encoding_utils namespace
template <class T>
inline void decode_integral(Slice* src, T* v) {
if constexpr (std::is_signed<T>::value) {
typedef typename std::make_unsigned<T>::type UT;
UT uv = *(UT*)(src->data);
uv = to_bigendian(uv);
uv = encoding_utils::to_bigendian(uv);
uv ^= static_cast<UT>(1) << (sizeof(UT) * 8 - 1);
*v = uv;
} else {
T nv = *(T*)(src->data);
*v = to_bigendian(nv);
*v = encoding_utils::to_bigendian(nv);
}
src->remove_prefix(sizeof(T));
}

View File

@ -119,6 +119,25 @@ struct ZoneMapDatum<TYPE_CHAR> : public ZoneMapDatumBase<TYPE_CHAR> {
template <>
struct ZoneMapDatum<TYPE_VARCHAR> final : public ZoneMapDatum<TYPE_CHAR> {};
template <>
struct ZoneMapDatum<TYPE_VARBINARY> final : public ZoneMapDatum<TYPE_CHAR> {
void resize_container_for_fit(TypeInfo* type_info, const void* v) override {
static const int INIT_SIZE = 64;
const Slice* slice = reinterpret_cast<const Slice*>(v);
if (slice->size > _length) {
_length = std::max<int>(BitUtil::next_power_of_two(slice->size), INIT_SIZE);
raw::stl_string_resize_uninitialized(&_value_container, _length);
value.data = _value_container.data();
value.size = 0;
}
}
void reset(TypeInfo* type_info) override {
value.data = _value_container.data();
value.size = 0;
}
};
template <LogicalType type>
struct ZoneMap {
ZoneMapDatum<type> min_value;

View File

@ -20,8 +20,10 @@
#include "column/column_helper.h"
#include "column/column_viewer.h"
#include "common/statusor.h"
#include "exprs/function_context.h"
#include "runtime/runtime_state.h"
#include "testutil/assert.h"
#include "types/logical_type.h"
#include "util/random.h"
#include "util/time.h"
@ -127,4 +129,168 @@ TEST_F(UtilityFunctionsTest, uuidTest) {
}
}
TEST_F(UtilityFunctionsTest, encodeSortKeyBasicOrdering) {
FunctionContext* ctx = FunctionContext::create_test_context();
auto ptr = std::unique_ptr<FunctionContext>(ctx);
// Prepare columns of different types
// int32
auto c_int = Int32Column::create();
c_int->append(1);
c_int->append(2);
c_int->append(10);
// binary
auto c_str = BinaryColumn::create();
c_str->append(Slice("a"));
c_str->append(Slice("b"));
c_str->append(Slice("c"));
// date
auto c_date = DateColumn::create();
c_date->append(DateValue::create(2021, 1, 1));
c_date->append(DateValue::create(2021, 1, 2));
c_date->append(DateValue::create(2021, 1, 10));
// timestamp
auto c_timestamp = TimestampColumn::create();
c_timestamp->append(TimestampValue::create(2021, 1, 1, 0, 0, 0, 0));
c_timestamp->append(TimestampValue::create(2021, 1, 2, 0, 0, 0, 0));
c_timestamp->append(TimestampValue::create(2021, 1, 10, 0, 0, 0, 0));
// float32
auto c_float32 = FloatColumn::create();
c_float32->append(1.0f);
c_float32->append(2.0f);
c_float32->append(10.0f);
// float64
auto c_float64 = DoubleColumn::create();
c_float64->append(1.0);
c_float64->append(2.0);
c_float64->append(10.0);
Columns cols;
cols.emplace_back(c_int);
cols.emplace_back(c_str);
cols.emplace_back(c_date);
cols.emplace_back(c_timestamp);
cols.emplace_back(c_float32);
cols.emplace_back(c_float64);
// verify each column
for (auto& col : cols) {
ASSIGN_OR_ASSERT_FAIL(ColumnPtr out, UtilityFunctions::encode_sort_key(ctx, {col}));
auto* bin = ColumnHelper::cast_to_raw<TYPE_VARBINARY>(out);
ASSERT_EQ(3, bin->size());
std::vector<Slice> keys = {bin->get_slice(0), bin->get_slice(1), bin->get_slice(2)};
ASSERT_LT(keys[0].compare(keys[1]), 0) << col->get_name();
ASSERT_LT(keys[1].compare(keys[2]), 0) << col->get_name();
}
// verify all columns
ASSIGN_OR_ASSERT_FAIL(ColumnPtr out, UtilityFunctions::encode_sort_key(ctx, cols));
auto* bin = ColumnHelper::cast_to_raw<TYPE_VARBINARY>(out);
ASSERT_EQ(3, bin->size());
// Verify lexicographic order aligns with tuple order
std::vector<Slice> keys = {bin->get_slice(0), bin->get_slice(1), bin->get_slice(2)};
ASSERT_LT(keys[0].compare(keys[1]), 0);
ASSERT_LT(keys[1].compare(keys[2]), 0);
}
TEST_F(UtilityFunctionsTest, encodeSortKeyNullHandling) {
FunctionContext* ctx = FunctionContext::create_test_context();
auto ptr = std::unique_ptr<FunctionContext>(ctx);
// Create nullable columns with mixed null and non-null values
auto c_int = NullableColumn::create(Int32Column::create(), NullColumn::create());
c_int->append_datum(Datum(int32_t(1))); // non-null
c_int->append_nulls(1); // null
c_int->append_datum(Datum(int32_t(2))); // non-null
c_int->append_nulls(1); // null
auto c_str = NullableColumn::create(BinaryColumn::create(), NullColumn::create());
c_str->append_nulls(1); // null
c_str->append_datum(Datum(Slice("z"))); // non-null
c_str->append_nulls(1); // null
c_str->append_datum(Datum(Slice("a"))); // non-null
Columns cols;
cols.emplace_back(c_int);
cols.emplace_back(c_str);
ASSIGN_OR_ASSERT_FAIL(ColumnPtr out, UtilityFunctions::encode_sort_key(ctx, cols));
auto* bin = ColumnHelper::cast_to_raw<TYPE_VARBINARY>(out);
ASSERT_EQ(4, bin->size());
// Get the sort keys for each row
Slice k0 = bin->get_slice(0); // (1,NULL) - first non-null, second null
Slice k1 = bin->get_slice(1); // (NULL,"z") - first null, second non-null
Slice k2 = bin->get_slice(2); // (2,NULL) - first non-null, second null
Slice k3 = bin->get_slice(3); // (NULL,"a") - first null, second non-null
// Verify that null values sort before non-null values
// Row 1 (NULL,"z") should sort before Row 0 (1,NULL) - null in first column takes precedence
ASSERT_LT(k1.compare(k0), 0) << "NULL should sort before non-NULL in first column";
// Row 3 (NULL,"a") should sort before Row 2 (2,NULL) - null in first column takes precedence
ASSERT_LT(k3.compare(k2), 0) << "NULL should sort before non-NULL in first column";
// Row 3 (NULL,"a") should sort before Row 0 (1,NULL) - null in first column takes precedence
ASSERT_LT(k3.compare(k0), 0) << "NULL in first column should sort before any non-NULL";
// Verify that null values only contain null markers and no underlying data encoding
// This tests the fix where null rows don't process underlying data
// The first byte should be the null marker (\0 for null, \1 for non-null)
ASSERT_EQ('\0', k1.data[0]) << "First column null marker should be \\0";
ASSERT_EQ('\1', k0.data[0]) << "First column non-null marker should be \\1";
ASSERT_EQ('\0', k3.data[0]) << "First column null marker should be \\0";
ASSERT_EQ('\1', k2.data[0]) << "First column non-null marker should be \\1";
// Verify that rows with null in second column also have correct null markers
// The second column null marker should be at position after first column encoding
// For row 0: first column is non-null (1), so second column null marker should be after int32 encoding
// For row 2: first column is non-null (2), so second column null marker should be after int32 encoding
ASSERT_EQ('\0', k0.data[5])
<< "Second column null marker should be \\0 for row 0"; // After int32 encoding (4 bytes) + null marker (1 byte)
ASSERT_EQ('\0', k2.data[5]) << "Second column null marker should be \\0 for row 2";
// Verify that the sort keys are deterministic and consistent
// Running the same operation should produce identical results
ASSIGN_OR_ASSERT_FAIL(ColumnPtr out2, UtilityFunctions::encode_sort_key(ctx, cols));
auto* bin2 = ColumnHelper::cast_to_raw<TYPE_VARBINARY>(out2);
ASSERT_EQ(4, bin2->size());
for (size_t i = 0; i < 4; i++) {
ASSERT_EQ(bin->get_slice(i).to_string(), bin2->get_slice(i).to_string())
<< "Sort keys should be deterministic for row " << i;
}
}
TEST_F(UtilityFunctionsTest, encodeSortKeyStringEscaping) {
FunctionContext* ctx = FunctionContext::create_test_context();
auto ptr = std::unique_ptr<FunctionContext>(ctx);
auto c1 = BinaryColumn::create();
auto c2 = BinaryColumn::create();
c1->append(Slice("a\0b", 3));
c1->append(Slice("a", 1));
c2->append(Slice("x", 1));
c2->append(Slice("\0", 1));
Columns cols;
cols.emplace_back(c1);
cols.emplace_back(c2);
ASSIGN_OR_ASSERT_FAIL(ColumnPtr out, UtilityFunctions::encode_sort_key(ctx, cols));
auto* bin = ColumnHelper::cast_to_raw<TYPE_VARBINARY>(out);
ASSERT_EQ(2, bin->size());
// Ensure keys are comparable and not identical
Slice k0 = bin->get_slice(0);
Slice k1 = bin->get_slice(1);
ASSERT_NE(k0.to_string(), k1.to_string());
}
} // namespace starrocks

View File

@ -359,4 +359,83 @@ TEST_F(ColumnZoneMapTest, NormalTestCharPage) {
test_string("NormalTestCharPage", type_info);
}
// Test for varbinary
TEST_F(ColumnZoneMapTest, NormalTestVarbinaryPage) {
TabletColumn varbinary_column = create_varbinary_key(0);
TypeInfoPtr type_info = get_type_info(varbinary_column);
test_string("NormalTestVarbinaryPage", type_info);
}
// Test for varbinary with binary data
TEST_F(ColumnZoneMapTest, VarbinaryWithBinaryData) {
std::string filename = kTestDir + "/VarbinaryWithBinaryData";
TabletColumn varbinary_column = create_varbinary_key(0);
TypeInfoPtr type_info = get_type_info(varbinary_column);
auto writer = ZoneMapIndexWriter::create(type_info.get());
// Add binary data with various patterns
std::vector<std::string> binary_values1 = {
std::string("\x00\x01\x02\x03", 4), // Binary data starting with null bytes
std::string("\xFF\xFE\xFD\xFC", 4), // Binary data with high bytes
std::string("ABCD", 4), // ASCII data
std::string("\x00\x00\x00\x00", 4), // All null bytes
};
for (auto& value : binary_values1) {
Slice slice(value);
writer->add_values((const uint8_t*)&slice, 1);
}
writer->flush();
// Add more binary data with different patterns
std::vector<std::string> binary_values2 = {
std::string("\x01\x02\x03\x04", 4), std::string("\xFE\xFD\xFC\xFB", 4), std::string("EFGH", 4),
std::string("\xFF\xFF\xFF\xFF", 4), // All high bytes
};
for (auto& value : binary_values2) {
Slice slice(value);
writer->add_values((const uint8_t*)&slice, 1);
}
writer->add_nulls(1);
writer->flush();
// Add null values
writer->add_nulls(3);
writer->flush();
// Write out zone map index
ColumnIndexMetaPB index_meta;
write_file(*writer, index_meta, filename);
// Read and verify
ZoneMapIndexReader column_zone_map;
load_zone_map(column_zone_map, index_meta, filename);
ASSERT_EQ(3, column_zone_map.num_pages());
const std::vector<ZoneMapPB>& zone_maps = column_zone_map.page_zone_maps();
ASSERT_EQ(3, zone_maps.size());
// Check first page - should have min/max from binary_values1
// For binary data, comparison is byte-by-byte, so "\x00\x00\x00\x00" is min and "\xFF\xFE\xFD\xFC" is max
check_result(zone_maps[0], true, true, std::string("\x00\x00\x00\x00", 4), std::string("\xFF\xFE\xFD\xFC", 4),
false, true);
// Check second page - should have min/max from binary_values2 plus null
// "\x01\x02\x03\x04" is min and "\xFF\xFF\xFF\xFF" is max
check_result(zone_maps[1], true, true, std::string("\x01\x02\x03\x04", 4), std::string("\xFF\xFF\xFF\xFF", 4), true,
true);
// Check third page - should be all nulls
check_result(zone_maps[2], false, false, "", "", true, false);
// Check segment zonemap - should cover all data
// The segment zonemap should have the overall min/max across all pages
const auto& segment_zonemap = index_meta.zone_map_index().segment_zone_map();
check_result(segment_zonemap, true, true, std::string("\x00\x00\x00\x00", 4), std::string("\xFF\xFF\xFF\xFF", 4),
true, true);
}
} // namespace starrocks

View File

@ -164,6 +164,18 @@ inline TabletColumn create_varchar_key(int32_t id, bool is_nullable = true, int
return column;
}
inline TabletColumn create_varbinary_key(int32_t id, bool is_nullable = true, int length = 8) {
TabletColumn column;
column.set_unique_id(id);
column.set_name(std::to_string(id));
column.set_type(TYPE_VARBINARY);
column.set_is_key(true);
column.set_is_nullable(is_nullable);
column.set_length(length);
column.set_index_length(4);
return column;
}
inline TabletColumn create_array(int32_t id, bool is_nullable = true, int length = 24) {
TabletColumn column;
column.set_unique_id(id);

View File

@ -254,6 +254,7 @@ public class FunctionSet {
public static final String ISNOTNULL = "isnotnull";
public static final String ASSERT_TRUE = "assert_true";
public static final String HOST_NAME = "host_name";
public static final String ENCODE_SORT_KEY = "encode_sort_key";
// Aggregate functions:
public static final String APPROX_COUNT_DISTINCT = "approx_count_distinct";
public static final String APPROX_COUNT_DISTINCT_HLL_SKETCH = "approx_count_distinct_hll_sketch";

View File

@ -821,7 +821,7 @@ public abstract class Type implements Cloneable {
return true;
}
return !isOnlyMetricType() && !isJsonType() && !isFunctionType() && !isBinaryType();
return !isOnlyMetricType() && !isJsonType() && !isFunctionType();
}
public boolean canGroupBy() {
@ -839,7 +839,7 @@ public abstract class Type implements Cloneable {
}
return true;
}
return !isOnlyMetricType() && !isJsonType() && !isFunctionType() && !isBinaryType();
return !isOnlyMetricType() && !isJsonType() && !isFunctionType();
}
public boolean canOrderBy() {
@ -847,8 +847,7 @@ public abstract class Type implements Cloneable {
if (isArrayType()) {
return ((ArrayType) this).getItemType().canOrderBy();
}
return !isOnlyMetricType() && !isJsonType() && !isFunctionType() && !isBinaryType() && !isStructType() &&
!isMapType();
return !isOnlyMetricType() && !isJsonType() && !isFunctionType() && !isStructType() && !isMapType();
}
public boolean canPartitionBy() {
@ -883,8 +882,9 @@ public abstract class Type implements Cloneable {
public boolean canDistributedBy() {
// TODO(mofei) support distributed by for JSON
// Allow VARBINARY as distribution key
return !isComplexType() && !isFloatingPointType() && !isOnlyMetricType() && !isJsonType()
&& !isFunctionType() && !isBinaryType();
&& !isFunctionType();
}
public boolean canBeWindowFunctionArgumentTypes() {

View File

@ -435,8 +435,8 @@ public class CreateTableAnalyzer {
}
ColumnDef cd = columnDefs.get(idx);
Type t = cd.getType();
if (!(t.isBoolean() || t.isIntegerType() || t.isLargeint() || t.isVarchar() || t.isDate() ||
t.isDatetime())) {
if (!(t.isBoolean() || t.isIntegerType() || t.isLargeint() || t.isVarchar() || t.isBinaryType() ||
t.isDate() || t.isDatetime())) {
throw new SemanticException("sort key column[" + cd.getName() + "] type not supported: " + t.toSql());
}
}

View File

@ -1131,7 +1131,7 @@ public class CreateTableTest {
}
@Test
public void testCreateVarBinaryTable() {
public void testCreateVarBinaryTable() throws Exception {
// duplicate table
ExceptionChecker.expectThrowsNoException(() -> createTable(
"create table test.varbinary_tbl\n" +
@ -1174,20 +1174,16 @@ public class CreateTableTest {
"distributed by hash(k1) buckets 1\n" + "properties('replication_num' = '1');"));
// failed
ExceptionChecker.expectThrowsWithMsg(AnalysisException.class,
"Invalid data type of key column 'k2': 'VARBINARY'",
() -> createTable("create table test.varbinary_tbl0\n"
createTable("create table test.varbinary_tbl00\n"
+ "(k1 int, k2 varbinary)\n"
+ "duplicate key(k1, k2)\n"
+ "distributed by hash(k1) buckets 1\n"
+ "properties('replication_num' = '1');"));
ExceptionChecker.expectThrowsWithMsg(DdlException.class,
"VARBINARY(10) column can not be distribution column",
() -> createTable("create table test.varbinary_tbl0 \n"
+ "properties('replication_num' = '1');");
createTable("create table test.varbinary_tbl01 \n"
+ "(k1 int, k2 varbinary(10) )\n"
+ "duplicate key(k1)\n"
+ "distributed by hash(k2) buckets 1\n"
+ "properties('replication_num' = '1');"));
+ "properties('replication_num' = '1');");
ExceptionChecker.expectThrowsWithMsg(DdlException.class,
"Column[j] type[VARBINARY] cannot be a range partition key",
() -> createTable("create table test.varbinary_tbl0 \n" +
@ -1199,7 +1195,7 @@ public class CreateTableTest {
}
@Test
public void testCreateBinaryTable() {
public void testCreateBinaryTable() throws Exception {
// duplicate table
ExceptionChecker.expectThrowsNoException(() -> createTable(
"create table test.binary_tbl\n" +
@ -1242,20 +1238,16 @@ public class CreateTableTest {
"distributed by hash(k1) buckets 1\n" + "properties('replication_num' = '1');"));
// failed
ExceptionChecker.expectThrowsWithMsg(AnalysisException.class,
"Invalid data type of key column 'k2': 'VARBINARY'",
() -> createTable("create table test.binary_tbl0\n"
createTable("create table test.binary_tbl01\n"
+ "(k1 int, k2 binary)\n"
+ "duplicate key(k1, k2)\n"
+ "distributed by hash(k1) buckets 1\n"
+ "properties('replication_num' = '1');"));
ExceptionChecker.expectThrowsWithMsg(DdlException.class,
"VARBINARY(10) column can not be distribution column",
() -> createTable("create table test.binary_tbl0 \n"
+ "properties('replication_num' = '1');");
createTable("create table test.binary_tbl11 \n"
+ "(k1 int, k2 binary(10) )\n"
+ "duplicate key(k1)\n"
+ "distributed by hash(k2) buckets 1\n"
+ "properties('replication_num' = '1');"));
+ "properties('replication_num' = '1');");
ExceptionChecker.expectThrowsWithMsg(DdlException.class,
"Column[j] type[VARBINARY] cannot be a range partition key",
() -> createTable("create table test.binary_tbl0 \n" +

View File

@ -825,6 +825,7 @@ vectorized_functions = [
[100019, 'assert_true', True, False, 'BOOLEAN', ['BOOLEAN', "VARCHAR"], 'UtilityFunctions::assert_true'],
[100018, 'host_name', True, False, 'VARCHAR', [], "UtilityFunctions::host_name"],
[100020, 'get_query_profile', True, False, 'VARCHAR', ['VARCHAR'], "UtilityFunctions::get_query_profile"],
[100024, 'encode_sort_key', True, False, 'VARBINARY', ['ANY_ELEMENT', '...'], 'UtilityFunctions::encode_sort_key'],
# json string function
[110022, "get_json_int", False, False, "BIGINT", ["VARCHAR", "VARCHAR"], "JsonFunctions::get_json_bigint",

View File

@ -0,0 +1,123 @@
-- name: test_encode_sort_key_json
CREATE DATABASE test_encode_sort_key_json;
-- result:
-- !result
USE test_encode_sort_key_json;
-- result:
-- !result
CREATE TABLE `json_test_table` (
`id` int(11) NOT NULL COMMENT "",
`json_data` json ,
`json_array` json ,
`json_nested` json,
`sort_key` varbinary(1024) AS (
encode_sort_key(
get_json_int(json_data, '$.age'),
get_json_string(json_data, '$.name'),
get_json_string(json_data, '$.city'),
get_json_string(json_array, '$[0]'),
get_json_double(json_nested, '$.user.profile.score')
)
) COMMENT "Auto-generated sort key from extracted JSON fields"
) ENGINE=OLAP
DISTRIBUTED BY HASH(sort_key) BUCKETS 2
ORDER BY (sort_key)
PROPERTIES ( "replication_num" = "1");
-- result:
-- !result
INSERT INTO json_test_table (id, json_data, json_array, json_nested) VALUES
(1, parse_json('{"name": "Alice", "age": 25, "city": "New York"}'),
parse_json('["apple", "banana", "cherry"]'),
parse_json('{"user": {"id": 101, "profile": {"verified": true, "score": 95.5}}}')),
(2, parse_json('{"name": "Bob", "age": 30, "city": "Los Angeles"}'),
parse_json('["orange", "grape"]'),
parse_json('{"user": {"id": 102, "profile": {"verified": false, "score": 87.2}}}')),
(3, parse_json('{"name": "Charlie", "age": 28, "city": "Chicago"}'),
parse_json('["mango", "pineapple", "kiwi", "strawberry"]'),
parse_json('{"user": {"id": 103, "profile": {"verified": true, "score": 92.8}}}')),
(4, parse_json('{"name": "Diana", "age": 22, "city": "Miami"}'),
parse_json('["pear"]'),
parse_json('{"user": {"id": 104, "profile": {"verified": true, "score": 89.1}}}')),
(5, parse_json('{"name": "Eve", "age": 35, "city": "Seattle"}'),
parse_json('["blueberry", "raspberry", "blackberry"]'),
parse_json('{"user": {"id": 105, "profile": {"verified": false, "score": 78.9}}}'));
-- result:
-- !result
SELECT id,
json_data,
get_json_int(json_data, '$.age') as age,
get_json_string(json_data, '$.name') as name,
get_json_string(json_data, '$.city') as city,
get_json_string(json_array, '$[0]') as first_fruit,
get_json_double(json_nested, '$.user.profile.score') as score,
sort_key,
length(sort_key) as sort_key_length
FROM json_test_table
ORDER BY id;
-- result:
1 {"age": 25, "city": "New York", "name": "Alice"} 25 Alice New York apple 95.5 b'\x01\x80\x00\x00\x00\x00\x00\x00\x19\x00\x01Alice\x00\x00\x00\x01New York\x00\x00\x00\x01apple\x00\x00\x00\x01\xc0W\xe0\x00\x00\x00\x00\x00' 49
2 {"age": 30, "city": "Los Angeles", "name": "Bob"} 30 Bob Los Angeles orange 87.2 b'\x01\x80\x00\x00\x00\x00\x00\x00\x1e\x00\x01Bob\x00\x00\x00\x01Los Angeles\x00\x00\x00\x01orange\x00\x00\x00\x01\xc0U\xcc\xcc\xcc\xcc\xcc\xcd' 51
3 {"age": 28, "city": "Chicago", "name": "Charlie"} 28 Charlie Chicago mango 92.8 b'\x01\x80\x00\x00\x00\x00\x00\x00\x1c\x00\x01Charlie\x00\x00\x00\x01Chicago\x00\x00\x00\x01mango\x00\x00\x00\x01\xc0W333333' 50
4 {"age": 22, "city": "Miami", "name": "Diana"} 22 Diana Miami pear 89.1 b'\x01\x80\x00\x00\x00\x00\x00\x00\x16\x00\x01Diana\x00\x00\x00\x01Miami\x00\x00\x00\x01pear\x00\x00\x00\x01\xc0VFfffff' 45
5 {"age": 35, "city": "Seattle", "name": "Eve"} 35 Eve Seattle blueberry 78.9 b'\x01\x80\x00\x00\x00\x00\x00\x00#\x00\x01Eve\x00\x00\x00\x01Seattle\x00\x00\x00\x01blueberry\x00\x00\x00\x01\xc0S\xb9\x99\x99\x99\x99\x9a' 50
-- !result
SELECT id, json_data, json_array
FROM json_test_table
ORDER BY sort_key;
-- result:
4 {"age": 22, "city": "Miami", "name": "Diana"} ["pear"]
1 {"age": 25, "city": "New York", "name": "Alice"} ["apple", "banana", "cherry"]
3 {"age": 28, "city": "Chicago", "name": "Charlie"} ["mango", "pineapple", "kiwi", "strawberry"]
2 {"age": 30, "city": "Los Angeles", "name": "Bob"} ["orange", "grape"]
5 {"age": 35, "city": "Seattle", "name": "Eve"} ["blueberry", "raspberry", "blackberry"]
-- !result
SELECT id, json_data, json_array
FROM json_test_table
ORDER BY sort_key DESC;
-- result:
5 {"age": 35, "city": "Seattle", "name": "Eve"} ["blueberry", "raspberry", "blackberry"]
2 {"age": 30, "city": "Los Angeles", "name": "Bob"} ["orange", "grape"]
3 {"age": 28, "city": "Chicago", "name": "Charlie"} ["mango", "pineapple", "kiwi", "strawberry"]
1 {"age": 25, "city": "New York", "name": "Alice"} ["apple", "banana", "cherry"]
4 {"age": 22, "city": "Miami", "name": "Diana"} ["pear"]
-- !result
SELECT id, json_data, json_array
FROM json_test_table
WHERE sort_key > (SELECT sort_key FROM json_test_table WHERE id = 2)
ORDER BY id;
-- result:
5 {"age": 35, "city": "Seattle", "name": "Eve"} ["blueberry", "raspberry", "blackberry"]
-- !result
INSERT INTO json_test_table (id, json_data, json_array, json_nested) VALUES
(6, NULL, parse_json('["test"]'), parse_json('{"test": null}')),
(7, parse_json('{"age": 40}'), parse_json('[]'), parse_json('{"user": {"id": 106}}'));
-- result:
-- !result
SELECT id,
get_json_int(json_data, '$.age') as age
FROM json_test_table
ORDER BY id;
-- result:
1 25
2 30
3 28
4 22
5 35
6 None
7 40
-- !result
SELECT id, json_data, json_array
FROM json_test_table
ORDER BY sort_key;
-- result:
6 None ["test"]
4 {"age": 22, "city": "Miami", "name": "Diana"} ["pear"]
1 {"age": 25, "city": "New York", "name": "Alice"} ["apple", "banana", "cherry"]
3 {"age": 28, "city": "Chicago", "name": "Charlie"} ["mango", "pineapple", "kiwi", "strawberry"]
2 {"age": 30, "city": "Los Angeles", "name": "Bob"} ["orange", "grape"]
5 {"age": 35, "city": "Seattle", "name": "Eve"} ["blueberry", "raspberry", "blackberry"]
7 {"age": 40} []
-- !result
DROP DATABASE test_encode_sort_key_json;
-- result:
-- !result

View File

@ -0,0 +1,86 @@
-- name: test_encode_sort_key_json
CREATE DATABASE test_encode_sort_key_json;
USE test_encode_sort_key_json;
-- Create a table with JSON data types and a single generated sort key column
-- The sort key extracts specific fields from JSON and combines them for efficient sorting
CREATE TABLE `json_test_table` (
`id` int(11) NOT NULL COMMENT "",
`json_data` json ,
`json_array` json ,
`json_nested` json,
`sort_key` varbinary(1024) AS (
encode_sort_key(
get_json_int(json_data, '$.age'),
get_json_string(json_data, '$.name'),
get_json_string(json_data, '$.city'),
get_json_string(json_array, '$[0]'),
get_json_double(json_nested, '$.user.profile.score')
)
) COMMENT "Auto-generated sort key from extracted JSON fields"
) ENGINE=OLAP
DISTRIBUTED BY HASH(sort_key) BUCKETS 2
ORDER BY (sort_key)
PROPERTIES ( "replication_num" = "1");
-- Insert test data with various JSON structures
-- The sort key will be automatically generated from extracted JSON fields
INSERT INTO json_test_table (id, json_data, json_array, json_nested) VALUES
(1, parse_json('{"name": "Alice", "age": 25, "city": "New York"}'),
parse_json('["apple", "banana", "cherry"]'),
parse_json('{"user": {"id": 101, "profile": {"verified": true, "score": 95.5}}}')),
(2, parse_json('{"name": "Bob", "age": 30, "city": "Los Angeles"}'),
parse_json('["orange", "grape"]'),
parse_json('{"user": {"id": 102, "profile": {"verified": false, "score": 87.2}}}')),
(3, parse_json('{"name": "Charlie", "age": 28, "city": "Chicago"}'),
parse_json('["mango", "pineapple", "kiwi", "strawberry"]'),
parse_json('{"user": {"id": 103, "profile": {"verified": true, "score": 92.8}}}')),
(4, parse_json('{"name": "Diana", "age": 22, "city": "Miami"}'),
parse_json('["pear"]'),
parse_json('{"user": {"id": 104, "profile": {"verified": true, "score": 89.1}}}')),
(5, parse_json('{"name": "Eve", "age": 35, "city": "Seattle"}'),
parse_json('["blueberry", "raspberry", "blackberry"]'),
parse_json('{"user": {"id": 105, "profile": {"verified": false, "score": 78.9}}}'));
-- Test 1: Verify that the generated sort key column is automatically populated
-- This shows how encode_sort_key extracts and combines JSON fields
SELECT id,
json_data,
get_json_int(json_data, '$.age') as age,
get_json_string(json_data, '$.name') as name,
get_json_string(json_data, '$.city') as city,
get_json_string(json_array, '$[0]') as first_fruit,
get_json_double(json_nested, '$.user.profile.score') as score,
sort_key,
length(sort_key) as sort_key_length
FROM json_test_table
ORDER BY id;
SELECT id, json_data, json_array
FROM json_test_table
ORDER BY sort_key;
SELECT id, json_data, json_array
FROM json_test_table
ORDER BY sort_key DESC;
SELECT id, json_data, json_array
FROM json_test_table
WHERE sort_key > (SELECT sort_key FROM json_test_table WHERE id = 2)
ORDER BY id;
INSERT INTO json_test_table (id, json_data, json_array, json_nested) VALUES
(6, NULL, parse_json('["test"]'), parse_json('{"test": null}')),
(7, parse_json('{"age": 40}'), parse_json('[]'), parse_json('{"user": {"id": 106}}'));
SELECT id,
get_json_int(json_data, '$.age') as age
FROM json_test_table
ORDER BY id;
SELECT id, json_data, json_array
FROM json_test_table
ORDER BY sort_key;
-- Clean up
DROP DATABASE test_encode_sort_key_json;