[Feature] Introduce Variant column definition (#61099)
Signed-off-by: Xu Bai <xuba@cisco.com>
This commit is contained in:
parent
bfd8a0cfcd
commit
e970bfd5de
|
|
@ -44,4 +44,5 @@ add_library(Column STATIC
|
|||
stream_chunk.cpp
|
||||
column_view/column_view_base.cpp
|
||||
column_view/column_view_helper.cpp
|
||||
variant_column.cpp
|
||||
)
|
||||
|
|
|
|||
|
|
@ -93,6 +93,7 @@ public:
|
|||
virtual bool is_object() const { return false; }
|
||||
|
||||
virtual bool is_json() const { return false; }
|
||||
virtual bool is_variant() const { return false; }
|
||||
|
||||
virtual bool is_array() const { return false; }
|
||||
|
||||
|
|
@ -100,6 +101,7 @@ public:
|
|||
virtual bool is_nullable_view() const { return false; }
|
||||
virtual bool is_array_view() const { return false; }
|
||||
virtual bool is_json_view() const { return false; }
|
||||
virtual bool is_variant_view() const { return false; }
|
||||
virtual bool is_binary_view() const { return false; }
|
||||
virtual bool is_struct_view() const { return false; }
|
||||
virtual bool is_map_view() const { return false; }
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ public:
|
|||
ColumnView(ColumnView&& column_view) = delete;
|
||||
bool is_view() const override { return true; }
|
||||
bool is_json_view() const override { return ColumnHelper::get_data_column(_default_column.get())->is_json(); }
|
||||
bool is_variant_view() const override { return ColumnHelper::get_data_column(_default_column.get())->is_variant(); }
|
||||
bool is_map_view() const override { return ColumnHelper::get_data_column(_default_column.get())->is_map(); }
|
||||
bool is_array_view() const override { return ColumnHelper::get_data_column(_default_column.get())->is_array(); }
|
||||
bool is_binary_view() const override { return ColumnHelper::get_data_column(_default_column.get())->is_binary(); }
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ static bool should_use_view(LogicalType ltype) {
|
|||
case TYPE_STRUCT:
|
||||
case TYPE_MAP:
|
||||
case TYPE_JSON:
|
||||
case TYPE_VARIANT:
|
||||
case TYPE_VARCHAR:
|
||||
case TYPE_CHAR:
|
||||
case TYPE_BINARY:
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ VISIT_IMPL(HyperLogLogColumn)
|
|||
VISIT_IMPL(BitmapColumn)
|
||||
VISIT_IMPL(PercentileColumn)
|
||||
VISIT_IMPL(JsonColumn)
|
||||
VISIT_IMPL(VariantColumn)
|
||||
VISIT_IMPL(FixedLengthColumn<int96_t>)
|
||||
VISIT_IMPL(FixedLengthColumn<uint24_t>)
|
||||
VISIT_IMPL(FixedLengthColumn<decimal12_t>)
|
||||
|
|
@ -71,5 +72,6 @@ VISIT_IMPL(FixedLengthColumnBase<uint24_t>)
|
|||
VISIT_IMPL(FixedLengthColumnBase<int96_t>)
|
||||
VISIT_IMPL(FixedLengthColumnBase<decimal12_t>)
|
||||
VISIT_IMPL(ObjectColumn<JsonValue>)
|
||||
VISIT_IMPL(ObjectColumn<VariantValue>)
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ public:
|
|||
virtual Status visit(const BitmapColumn& column);
|
||||
virtual Status visit(const PercentileColumn& column);
|
||||
virtual Status visit(const JsonColumn& column);
|
||||
virtual Status visit(const VariantColumn& column);
|
||||
virtual Status visit(const FixedLengthColumn<int96_t>& column);
|
||||
virtual Status visit(const FixedLengthColumn<uint24_t>& column);
|
||||
virtual Status visit(const FixedLengthColumn<decimal12_t>& column);
|
||||
|
|
@ -82,6 +83,7 @@ public:
|
|||
virtual Status visit(const FixedLengthColumnBase<uint24_t>& column);
|
||||
virtual Status visit(const FixedLengthColumnBase<decimal12_t>& column);
|
||||
virtual Status visit(const ObjectColumn<JsonValue>& column);
|
||||
virtual Status visit(const ObjectColumn<VariantValue>& column);
|
||||
virtual Status visit(const ArrayViewColumn& column) {
|
||||
return Status::NotSupported("ArrayViewColumn is not supported");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -95,6 +95,8 @@ public:
|
|||
|
||||
Status visit(const LargeBinaryColumn& column) override { return _impl->do_visit(column); }
|
||||
|
||||
Status visit(const VariantColumn& column) override { return _impl->do_visit(column); }
|
||||
|
||||
private:
|
||||
Impl* _impl;
|
||||
};
|
||||
|
|
@ -170,6 +172,8 @@ public:
|
|||
|
||||
Status visit(LargeBinaryColumn* column) override { return _impl->do_visit(column); }
|
||||
|
||||
Status visit(VariantColumn* column) override { return _impl->do_visit(column); }
|
||||
|
||||
private:
|
||||
Impl* _impl;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -49,6 +49,8 @@ VISIT_IMPL(BitmapColumn)
|
|||
VISIT_IMPL(PercentileColumn)
|
||||
VISIT_IMPL(JsonColumn)
|
||||
VISIT_IMPL(ObjectColumn<JsonValue>)
|
||||
VISIT_IMPL(VariantColumn)
|
||||
VISIT_IMPL(ObjectColumn<VariantValue>)
|
||||
VISIT_IMPL(FixedLengthColumn<int96_t>)
|
||||
VISIT_IMPL(FixedLengthColumn<uint24_t>)
|
||||
VISIT_IMPL(FixedLengthColumn<decimal12_t>)
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@ public:
|
|||
virtual Status visit(BitmapColumn* column);
|
||||
virtual Status visit(PercentileColumn* column);
|
||||
virtual Status visit(JsonColumn* column);
|
||||
virtual Status visit(VariantColumn* column);
|
||||
virtual Status visit(FixedLengthColumn<int96_t>* column);
|
||||
virtual Status visit(FixedLengthColumn<uint24_t>* column);
|
||||
virtual Status visit(FixedLengthColumn<decimal12_t>* column);
|
||||
|
|
@ -81,6 +82,7 @@ public:
|
|||
virtual Status visit(FixedLengthColumnBase<uint24_t>* column);
|
||||
virtual Status visit(FixedLengthColumnBase<decimal12_t>* column);
|
||||
virtual Status visit(ObjectColumn<JsonValue>* column);
|
||||
virtual Status visit(ObjectColumn<VariantValue>* column);
|
||||
virtual Status visit(ArrayViewColumn* column) { return Status::NotSupported("ArrayViewColumn is not supported"); }
|
||||
virtual Status visit(ColumnView* column) { return Status::NotSupported("ColumnView is not supported"); }
|
||||
};
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ public:
|
|||
|
||||
bool is_nullable() const override { return _data->is_nullable(); }
|
||||
bool is_json() const override { return _data->is_json(); }
|
||||
bool is_variant() const override { return _data->is_variant(); }
|
||||
bool is_array() const override { return _data->is_array(); }
|
||||
|
||||
bool is_null(size_t index) const override { return _data->is_null(0); }
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ class BitmapValue;
|
|||
class HyperLogLog;
|
||||
class PercentileValue;
|
||||
class JsonValue;
|
||||
class VariantValue;
|
||||
} // namespace starrocks
|
||||
|
||||
namespace starrocks {
|
||||
|
|
@ -89,6 +90,7 @@ public:
|
|||
const BitmapValue* get_bitmap() const { return get<BitmapValue*>(); }
|
||||
const PercentileValue* get_percentile() const { return get<PercentileValue*>(); }
|
||||
const JsonValue* get_json() const { return get<JsonValue*>(); }
|
||||
const VariantValue* get_variant() const { return get<VariantValue*>(); }
|
||||
|
||||
void set_int8(int8_t v) { set<decltype(v)>(v); }
|
||||
void set_uint8(uint8_t v) { set<decltype(v)>(v); }
|
||||
|
|
@ -114,6 +116,7 @@ public:
|
|||
void set_bitmap(BitmapValue* v) { set<decltype(v)>(v); }
|
||||
void set_percentile(PercentileValue* v) { set<decltype(v)>(v); }
|
||||
void set_json(JsonValue* v) { set<decltype(v)>(v); }
|
||||
void set_variant(VariantValue* v) { set<decltype(v)>(v); }
|
||||
|
||||
template <typename T>
|
||||
const T& get() const {
|
||||
|
|
@ -198,7 +201,7 @@ private:
|
|||
using Variant =
|
||||
std::variant<std::monostate, int8_t, uint8_t, int16_t, uint16_t, uint24_t, int32_t, uint32_t, int64_t,
|
||||
uint64_t, int96_t, int128_t, int256_t, Slice, decimal12_t, DecimalV2Value, float, double,
|
||||
DatumArray, DatumMap, HyperLogLog*, BitmapValue*, PercentileValue*, JsonValue*>;
|
||||
DatumArray, DatumMap, HyperLogLog*, BitmapValue*, PercentileValue*, JsonValue*, VariantValue*>;
|
||||
Variant _value;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -92,6 +92,7 @@ public:
|
|||
|
||||
bool is_nullable() const override { return true; }
|
||||
bool is_json() const override { return _data_column->is_json(); }
|
||||
bool is_variant() const override { return _data_column->is_variant(); }
|
||||
bool is_array() const override { return _data_column->is_array(); }
|
||||
bool is_array_view() const override { return _data_column->is_array_view(); }
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
#include "gutil/casts.h"
|
||||
#include "types/bitmap_value.h"
|
||||
#include "types/hll.h"
|
||||
#include "types/variant_value.h"
|
||||
#include "util/json.h"
|
||||
#include "util/mysql_row_buffer.h"
|
||||
#include "util/percentile_value.h"
|
||||
|
|
@ -115,7 +116,16 @@ bool ObjectColumn<T>::append_strings(const Slice* data, size_t size) {
|
|||
_pool.reserve(_pool.size() + size);
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
const auto& s = data[i];
|
||||
_pool.emplace_back(s);
|
||||
if constexpr (std::is_same_v<T, VariantValue>) {
|
||||
auto variant_result = T::create(s);
|
||||
if (!variant_result.ok()) {
|
||||
LOG(WARNING) << "Failed to create VariantValue from Slice: " << variant_result.status().to_string();
|
||||
return false;
|
||||
}
|
||||
_pool.emplace_back(std::move(*variant_result));
|
||||
} else {
|
||||
_pool.emplace_back(s);
|
||||
}
|
||||
}
|
||||
|
||||
_cache_ok = false;
|
||||
|
|
@ -339,5 +349,6 @@ template class ObjectColumn<HyperLogLog>;
|
|||
template class ObjectColumn<BitmapValue>;
|
||||
template class ObjectColumn<PercentileValue>;
|
||||
template class ObjectColumn<JsonValue>;
|
||||
template class ObjectColumn<VariantValue>;
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
#include "gutil/strings/substitute.h"
|
||||
#include "types/bitmap_value.h"
|
||||
#include "types/hll.h"
|
||||
#include "types/variant_value.h"
|
||||
#include "util/json.h"
|
||||
#include "util/percentile_value.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -22,10 +22,12 @@
|
|||
#include "column/nullable_column.h"
|
||||
#include "column/object_column.h"
|
||||
#include "column/struct_column.h"
|
||||
#include "column/variant_column.h"
|
||||
#include "column/vectorized_fwd.h"
|
||||
#include "types/constexpr.h"
|
||||
#include "types/int256.h"
|
||||
#include "types/logical_type.h"
|
||||
#include "types/variant_value.h"
|
||||
#include "util/json.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
|
@ -84,6 +86,8 @@ template <>
|
|||
inline constexpr bool isArithmeticLT<TYPE_JSON> = false;
|
||||
template <>
|
||||
inline constexpr bool isArithmeticLT<TYPE_VARBINARY> = false;
|
||||
template <>
|
||||
inline constexpr bool isArithmeticLT<TYPE_VARIANT> = false;
|
||||
|
||||
template <LogicalType logical_type>
|
||||
constexpr bool isSliceLT = false;
|
||||
|
|
@ -296,6 +300,13 @@ struct RunTimeTypeTraits<TYPE_JSON> {
|
|||
using ProxyContainerType = ColumnType::Container;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct RunTimeTypeTraits<TYPE_VARIANT> {
|
||||
using CppType = VariantValue*;
|
||||
using ColumnType = VariantColumn;
|
||||
using ProxyContainerType = ColumnType::Container;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct RunTimeTypeTraits<TYPE_BINARY> {
|
||||
using CppType = Slice;
|
||||
|
|
@ -510,4 +521,12 @@ struct RunTimeTypeLimits<TYPE_JSON> {
|
|||
static value_type max_value() { return JsonValue{vpack::Slice::maxKeySlice()}; }
|
||||
};
|
||||
|
||||
template <>
|
||||
struct RunTimeTypeLimits<TYPE_VARIANT> {
|
||||
using value_type = VariantValue;
|
||||
|
||||
static value_type min_value() { return VariantValue::of_null(); }
|
||||
static value_type max_value() { return VariantValue::create(Slice::max_value()).value(); }
|
||||
};
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
|
|||
|
|
@ -0,0 +1,84 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "variant_column.h"
|
||||
|
||||
#include "types/variant_value.h"
|
||||
#include "util/mysql_row_buffer.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
uint32_t VariantColumn::serialize(size_t idx, uint8_t* pos) const {
|
||||
return static_cast<uint32_t>(get_object(idx)->serialize(pos));
|
||||
}
|
||||
|
||||
void VariantColumn::serialize_batch(uint8_t* dst, Buffer<uint32_t>& slice_sizes, size_t chunk_size,
|
||||
uint32_t max_one_row_size) const {
|
||||
for (size_t i = 0; i < chunk_size; ++i) {
|
||||
slice_sizes[i] += serialize(i, dst + i * max_one_row_size + slice_sizes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t VariantColumn::serialize_size(size_t idx) const {
|
||||
return static_cast<uint32_t>(get_object(idx)->serialize_size());
|
||||
}
|
||||
|
||||
const uint8_t* VariantColumn::deserialize_and_append(const uint8_t* pos) {
|
||||
// Read the first 4 bytes to get the size of the variant
|
||||
uint32_t variant_length = *reinterpret_cast<const uint32_t*>(pos);
|
||||
auto variant_result = VariantValue::create(Slice(pos, variant_length + sizeof(uint32_t)));
|
||||
|
||||
if (!variant_result.ok()) {
|
||||
throw std::runtime_error("Failed to deserialize variant: " + variant_result.status().to_string());
|
||||
}
|
||||
|
||||
uint32_t serialize_size = variant_result->serialize_size();
|
||||
append(std::move(*variant_result));
|
||||
|
||||
return pos + serialize_size;
|
||||
}
|
||||
|
||||
void VariantColumn::put_mysql_row_buffer(MysqlRowBuffer* buf, size_t idx, bool is_binary_protocol) const {
|
||||
VariantValue* variant = get_object(idx);
|
||||
DCHECK(variant != nullptr) << "Variant value is null at index " << idx;
|
||||
|
||||
auto json = variant->to_json();
|
||||
if (!json.ok()) {
|
||||
buf->push_null();
|
||||
} else {
|
||||
buf->push_string(json->data(), json->size(), '\'');
|
||||
}
|
||||
}
|
||||
|
||||
void VariantColumn::append_datum(const Datum& datum) {
|
||||
BaseClass::append(datum.get<VariantValue*>());
|
||||
}
|
||||
|
||||
void VariantColumn::append(const Column& src, size_t offset, size_t count) {
|
||||
BaseClass::append(src, offset, count);
|
||||
}
|
||||
|
||||
void VariantColumn::append(const VariantValue& object) {
|
||||
BaseClass::append(object);
|
||||
}
|
||||
|
||||
void VariantColumn::append(const VariantValue* object) {
|
||||
BaseClass::append(object);
|
||||
}
|
||||
|
||||
void VariantColumn::append(VariantValue&& object) {
|
||||
BaseClass::append(std::move(object));
|
||||
}
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "column/column.h"
|
||||
#include "column/object_column.h"
|
||||
#include "column/vectorized_fwd.h"
|
||||
#include "types/variant_value.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
class VariantColumn final
|
||||
: public CowFactory<ColumnFactory<ObjectColumn<VariantValue>, VariantColumn>, VariantColumn, Column> {
|
||||
public:
|
||||
using ValueType = VariantValue;
|
||||
using SuperClass = CowFactory<ColumnFactory<ObjectColumn<VariantValue>, VariantColumn>, VariantColumn, Column>;
|
||||
using BaseClass = VariantColumnBase;
|
||||
|
||||
VariantColumn() = default;
|
||||
explicit VariantColumn(size_t size) : SuperClass(size) {}
|
||||
VariantColumn(const VariantColumn& rhs) : SuperClass(rhs) {}
|
||||
|
||||
VariantColumn(VariantColumn&& rhs) noexcept : SuperClass(std::move(rhs)) {}
|
||||
|
||||
MutableColumnPtr clone() const override { return BaseClass::clone(); }
|
||||
MutableColumnPtr clone_empty() const override { return this->create(); }
|
||||
|
||||
uint32_t serialize(size_t idx, uint8_t* pos) const override;
|
||||
uint32_t serialize_size(size_t idx) const override;
|
||||
void serialize_batch(uint8_t* dst, Buffer<uint32_t>& slice_sizes, size_t chunk_size,
|
||||
uint32_t max_one_row_size) const override;
|
||||
const uint8_t* deserialize_and_append(const uint8_t* pos) override;
|
||||
|
||||
void append_datum(const Datum& datum) override;
|
||||
void append(const Column& src, size_t offset, size_t count) override;
|
||||
|
||||
// Add a forwarding function to expose the base class append function
|
||||
void append(const Column& src) { append(src, 0, src.size()); }
|
||||
void append(const VariantValue* object);
|
||||
void append(VariantValue&& object);
|
||||
void append(const VariantValue& object);
|
||||
|
||||
bool is_variant() const override { return true; }
|
||||
|
||||
std::string get_name() const override { return "variant"; }
|
||||
|
||||
void put_mysql_row_buffer(MysqlRowBuffer* buf, size_t idx, bool is_binary_protocol = false) const override;
|
||||
};
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -27,6 +27,7 @@ class HyperLogLog;
|
|||
class BitmapValue;
|
||||
class PercentileValue;
|
||||
class JsonValue;
|
||||
class VariantValue;
|
||||
|
||||
class DateValue;
|
||||
class TimestampValue;
|
||||
|
|
@ -104,6 +105,9 @@ using PercentileColumn = ObjectColumn<PercentileValue>;
|
|||
using JsonColumnBase = ObjectColumn<JsonValue>;
|
||||
class JsonColumn;
|
||||
|
||||
using VariantColumnBase = ObjectColumn<VariantValue>;
|
||||
class VariantColumn;
|
||||
|
||||
class MapColumn;
|
||||
class StructColumn;
|
||||
|
||||
|
|
|
|||
|
|
@ -184,6 +184,7 @@ Status MySQLDataSource::open(RuntimeState* state) {
|
|||
case TYPE_DOUBLE:
|
||||
case TYPE_FLOAT:
|
||||
case TYPE_JSON:
|
||||
case TYPE_VARIANT:
|
||||
case TYPE_FUNCTION:
|
||||
case TYPE_VARBINARY:
|
||||
case TYPE_UNSIGNED_TINYINT:
|
||||
|
|
|
|||
|
|
@ -121,7 +121,8 @@ std::string VExtLiteral::_value_to_string(ColumnPtr& column) {
|
|||
[&](auto&& arg) {
|
||||
using T = std::decay_t<decltype(arg)>;
|
||||
if constexpr (is_type_in<T, std::monostate, int96_t, decimal12_t, DecimalV2Value, DatumArray,
|
||||
DatumMap, HyperLogLog*, BitmapValue*, PercentileValue*, JsonValue*>()) {
|
||||
DatumMap, HyperLogLog*, BitmapValue*, PercentileValue*, JsonValue*,
|
||||
VariantValue*>()) {
|
||||
// ignore these types
|
||||
} else if constexpr (std::is_same_v<T, Slice>) {
|
||||
res = std::string(arg.data, arg.size);
|
||||
|
|
|
|||
|
|
@ -957,8 +957,8 @@ struct ColumnRangeBuilder {
|
|||
template <LogicalType ltype, typename E, CompoundNodeType Type>
|
||||
Status operator()(ChunkPredicateBuilder<E, Type>* parent, const SlotDescriptor* slot,
|
||||
std::map<std::string, ColumnValueRangeType>* column_value_ranges) {
|
||||
if constexpr (ltype == TYPE_TIME || ltype == TYPE_NULL || ltype == TYPE_JSON || lt_is_float<ltype> ||
|
||||
lt_is_binary<ltype>) {
|
||||
if constexpr (ltype == TYPE_TIME || ltype == TYPE_NULL || ltype == TYPE_JSON || ltype == TYPE_VARIANT ||
|
||||
lt_is_float<ltype> || lt_is_binary<ltype>) {
|
||||
return Status::OK();
|
||||
} else {
|
||||
// Treat tinyint and boolean as int
|
||||
|
|
|
|||
|
|
@ -676,7 +676,7 @@ public:
|
|||
return _min = TimestampValue::MIN_TIMESTAMP_VALUE && _max == TimestampValue::MAX_TIMESTAMP_VALUE;
|
||||
} else if constexpr (IsDecimal<CppType>) {
|
||||
return _min == DecimalV2Value::get_min_decimal() && _max == DecimalV2Value::get_max_decimal();
|
||||
} else if constexpr (Type != TYPE_JSON) {
|
||||
} else if constexpr (Type != TYPE_JSON && Type != TYPE_VARIANT) {
|
||||
return _min == RunTimeTypeLimits<Type>::min_value() && _max == RunTimeTypeLimits<Type>::max_value();
|
||||
} else {
|
||||
return false;
|
||||
|
|
@ -1078,7 +1078,7 @@ private:
|
|||
} else if constexpr (IsDecimal<CppType>) {
|
||||
_min = DecimalV2Value::get_max_decimal();
|
||||
_max = DecimalV2Value::get_min_decimal();
|
||||
} else if constexpr (Type != TYPE_JSON) {
|
||||
} else if constexpr (Type != TYPE_JSON && Type != TYPE_VARIANT) {
|
||||
// for json vaue, cpp type is JsonValue*
|
||||
// but min/max value type is JsonValue
|
||||
// and JsonValue needs special serialization handling.
|
||||
|
|
@ -1105,7 +1105,7 @@ private:
|
|||
} else if constexpr (IsDecimal<CppType>) {
|
||||
_max = DecimalV2Value::get_max_decimal();
|
||||
_min = DecimalV2Value::get_min_decimal();
|
||||
} else if constexpr (Type != TYPE_JSON) {
|
||||
} else if constexpr (Type != TYPE_JSON && Type != TYPE_VARIANT) {
|
||||
// for json vaue, cpp type is JsonValue*
|
||||
// but min/max value type is JsonValue
|
||||
// and JsonValue needs special serialization handling.
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ Status MysqlTableWriter::_build_insert_sql(int from, int to, std::string_view* s
|
|||
_stmt_buffer.push_back('"');
|
||||
} else if constexpr (type == TYPE_TINYINT || type == TYPE_BOOLEAN) {
|
||||
fmt::format_to(std::back_inserter(_stmt_buffer), "{}", (int32_t)viewer.value(i));
|
||||
} else if constexpr (type == TYPE_JSON) {
|
||||
} else if constexpr (type == TYPE_JSON || type == TYPE_VARIANT) {
|
||||
fmt::format_to(std::back_inserter(_stmt_buffer), "{}", *viewer.value(i));
|
||||
} else {
|
||||
fmt::format_to(std::back_inserter(_stmt_buffer), "{}", viewer.value(i));
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@
|
|||
#include "util/raw_container.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
// Date: Julian Date -2000-01-01 ~ 9999-01-01
|
||||
// MAX USE 22 bits
|
||||
typedef int32_t JulianDate;
|
||||
|
|
@ -234,6 +233,10 @@ public:
|
|||
template <bool use_iso8601_format = false, bool igonre_microsecond = false>
|
||||
static int to_string(Timestamp timestamp, char* s, size_t n);
|
||||
|
||||
// Convert timestamp to string with timezone format (e.g., "2025-04-16 12:34:56.78-04:00")
|
||||
template <bool use_iso8601_format = false, bool ignore_microsecond = false>
|
||||
static std::string to_string_with_timezone(Timestamp timestamp, const cctz::time_zone& timezone);
|
||||
|
||||
inline static double time_to_literal(double time);
|
||||
|
||||
inline static Timestamp of_epoch_second(int64_t seconds, int64_t microseconds);
|
||||
|
|
@ -566,4 +569,27 @@ int timestamp::to_string(Timestamp timestamp, char* to, size_t n) {
|
|||
return 19;
|
||||
}
|
||||
|
||||
template <bool use_iso8601_format, bool ignore_microsecond>
|
||||
std::string timestamp::to_string_with_timezone(Timestamp timestamp, const cctz::time_zone& timezone) {
|
||||
// Convert StarRocks timestamp to unix seconds
|
||||
int64_t julian = to_julian(timestamp);
|
||||
int64_t time_microseconds = to_time(timestamp);
|
||||
|
||||
// Calculate unix seconds from julian date
|
||||
int64_t unix_seconds = (julian - date::UNIX_EPOCH_JULIAN) * SECS_PER_DAY + time_microseconds / USECS_PER_SEC;
|
||||
int64_t microseconds = time_microseconds % USECS_PER_SEC;
|
||||
|
||||
// Convert to time_point
|
||||
auto time_point = std::chrono::system_clock::from_time_t(unix_seconds);
|
||||
time_point += std::chrono::microseconds(microseconds);
|
||||
|
||||
// Format the timestamp with timezone using cctz
|
||||
constexpr const char* base_format = use_iso8601_format ? "%Y-%m-%dT%H:%M:" : "%Y-%m-%d %H:%M:";
|
||||
constexpr const char* time_format = ignore_microsecond ? "%S" : "%E*S";
|
||||
constexpr auto tz_format = "%Ez";
|
||||
const std::string format_str = std::string(base_format) + time_format + tz_format;
|
||||
|
||||
return cctz::format(format_str, time_point, timezone);
|
||||
}
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
|
|||
|
|
@ -268,14 +268,16 @@ bool TypeDescriptor::support_join() const {
|
|||
if (type == TYPE_ARRAY || type == TYPE_MAP || type == TYPE_STRUCT) {
|
||||
return std::all_of(children.begin(), children.end(), [](const TypeDescriptor& t) { return t.support_join(); });
|
||||
}
|
||||
return type != TYPE_JSON && type != TYPE_OBJECT && type != TYPE_PERCENTILE && type != TYPE_HLL;
|
||||
return type != TYPE_JSON && type != TYPE_OBJECT && type != TYPE_PERCENTILE && type != TYPE_HLL &&
|
||||
type != TYPE_VARIANT;
|
||||
}
|
||||
|
||||
bool TypeDescriptor::support_orderby() const {
|
||||
if (type == TYPE_ARRAY) {
|
||||
return children[0].support_orderby();
|
||||
}
|
||||
return type != TYPE_JSON && type != TYPE_OBJECT && type != TYPE_PERCENTILE && type != TYPE_HLL && type != TYPE_MAP;
|
||||
return type != TYPE_JSON && type != TYPE_OBJECT && type != TYPE_PERCENTILE && type != TYPE_HLL &&
|
||||
type != TYPE_MAP && type != TYPE_VARIANT;
|
||||
}
|
||||
|
||||
bool TypeDescriptor::support_groupby() const {
|
||||
|
|
@ -283,7 +285,8 @@ bool TypeDescriptor::support_groupby() const {
|
|||
return std::all_of(children.begin(), children.end(),
|
||||
[](const TypeDescriptor& t) { return t.support_groupby(); });
|
||||
}
|
||||
return type != TYPE_JSON && type != TYPE_OBJECT && type != TYPE_PERCENTILE && type != TYPE_HLL;
|
||||
return type != TYPE_JSON && type != TYPE_OBJECT && type != TYPE_PERCENTILE && type != TYPE_HLL &&
|
||||
type != TYPE_VARIANT;
|
||||
}
|
||||
|
||||
TypeDescriptor TypeDescriptor::from_storage_type_info(TypeInfo* type_info) {
|
||||
|
|
@ -321,6 +324,7 @@ int TypeDescriptor::get_slot_size() const {
|
|||
case TYPE_OBJECT:
|
||||
case TYPE_PERCENTILE:
|
||||
case TYPE_JSON:
|
||||
case TYPE_VARIANT:
|
||||
case TYPE_VARBINARY:
|
||||
return sizeof(Slice);
|
||||
|
||||
|
|
|
|||
|
|
@ -113,6 +113,13 @@ struct TypeDescriptor {
|
|||
return res;
|
||||
}
|
||||
|
||||
static TypeDescriptor create_variant_type() {
|
||||
TypeDescriptor res;
|
||||
res.type = TYPE_VARIANT;
|
||||
res.len = 128; // Default length for variant type
|
||||
return res;
|
||||
}
|
||||
|
||||
static TypeDescriptor create_array_type(const TypeDescriptor& children) {
|
||||
TypeDescriptor res;
|
||||
res.type = TYPE_ARRAY;
|
||||
|
|
@ -211,6 +218,8 @@ struct TypeDescriptor {
|
|||
return TypeDescriptor::create_decimalv3_type(TYPE_DECIMAL256, precision, scale);
|
||||
case TYPE_JSON:
|
||||
return TypeDescriptor::create_json_type();
|
||||
case TYPE_VARIANT:
|
||||
return TypeDescriptor::create_variant_type();
|
||||
case TYPE_OBJECT:
|
||||
return TypeDescriptor::create_bitmap_type();
|
||||
default:
|
||||
|
|
@ -325,7 +334,9 @@ struct TypeDescriptor {
|
|||
|
||||
// For some types with potential huge length, whose memory consumption is far more than normal types,
|
||||
// they need a different chunk_size setting
|
||||
bool is_huge_type() const { return type == TYPE_JSON || type == TYPE_OBJECT || type == TYPE_HLL; }
|
||||
bool is_huge_type() const {
|
||||
return type == TYPE_JSON || type == TYPE_OBJECT || type == TYPE_HLL || type == TYPE_VARIANT;
|
||||
}
|
||||
|
||||
/// Returns the size of a slot for this type.
|
||||
int get_slot_size() const;
|
||||
|
|
|
|||
|
|
@ -31,10 +31,12 @@
|
|||
#include "column/nullable_column.h"
|
||||
#include "column/object_column.h"
|
||||
#include "column/struct_column.h"
|
||||
#include "column/variant_column.h"
|
||||
#include "gutil/strings/substitute.h"
|
||||
#include "runtime/descriptors.h"
|
||||
#include "serde/protobuf_serde.h"
|
||||
#include "types/hll.h"
|
||||
#include "types/variant_value.h"
|
||||
#include "util/coding.h"
|
||||
#include "util/json.h"
|
||||
#include "util/percentile_value.h"
|
||||
|
|
@ -379,6 +381,55 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
class VariantColumnSerde {
|
||||
public:
|
||||
static int64_t max_serialized_size(const VariantColumn& column) {
|
||||
const auto& pool = column.get_pool();
|
||||
int64_t size = 0;
|
||||
size += sizeof(uint32_t); // num_objects
|
||||
for (const auto& obj : pool) {
|
||||
size += sizeof(uint64_t);
|
||||
size += obj.serialize_size();
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static uint8_t* serialize(const VariantColumn& column, uint8_t* buff) {
|
||||
buff = write_little_endian_32(column.get_pool().size(), buff);
|
||||
for (const auto& v : column.get_pool()) {
|
||||
constexpr uint64_t size_field_length = sizeof(uint64_t);
|
||||
uint64_t actual = v.serialize(buff + size_field_length);
|
||||
buff = write_little_endian_64(actual, buff);
|
||||
buff += actual;
|
||||
}
|
||||
|
||||
return buff;
|
||||
}
|
||||
|
||||
static StatusOr<const uint8_t*> deserialize(const uint8_t* buff, VariantColumn* column) {
|
||||
uint32_t num_objects = 0;
|
||||
buff = read_little_endian_32(buff, &num_objects);
|
||||
column->reset_column();
|
||||
auto& pool = column->get_pool();
|
||||
pool.reserve(num_objects);
|
||||
for (int i = 0; i < num_objects; ++i) {
|
||||
uint64_t serialized_size = 0;
|
||||
buff = read_little_endian_64(buff, &serialized_size);
|
||||
auto variant = VariantValue::create(Slice(buff, serialized_size));
|
||||
if (!variant.ok()) {
|
||||
return Status::Corruption(fmt::format("Failed to deserialize VariantValue at index {}: {}", i,
|
||||
variant.status().to_string()));
|
||||
}
|
||||
|
||||
pool.emplace_back(std::move(variant.value()));
|
||||
buff += serialized_size;
|
||||
}
|
||||
|
||||
return buff;
|
||||
}
|
||||
};
|
||||
|
||||
class NullableColumnSerde {
|
||||
public:
|
||||
static int64_t max_serialized_size(const NullableColumn& column, const int encode_level) {
|
||||
|
|
@ -543,6 +594,11 @@ public:
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status do_visit(const VariantColumn& column) {
|
||||
_size += VariantColumnSerde::max_serialized_size(column);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
int64_t size() const { return _size; }
|
||||
|
||||
private:
|
||||
|
|
@ -607,6 +663,11 @@ public:
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status do_visit(const VariantColumn& column) {
|
||||
_cur = VariantColumnSerde::serialize(column, _cur);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
uint8_t* cur() const { return _cur; }
|
||||
|
||||
int64_t bytes() const { return _cur - _buff; }
|
||||
|
|
@ -679,6 +740,16 @@ public:
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status do_visit(VariantColumn* column) {
|
||||
auto v = VariantColumnSerde::deserialize(_cur, column);
|
||||
if (!v.ok()) {
|
||||
return v.status();
|
||||
}
|
||||
|
||||
_cur = v.value();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
const uint8_t* cur() const { return _cur; }
|
||||
|
||||
int64_t bytes() const { return _cur - _buff; }
|
||||
|
|
|
|||
|
|
@ -625,9 +625,10 @@ bool ChunkPipelineAccumulator::is_finished() const {
|
|||
}
|
||||
|
||||
template <class ColumnT>
|
||||
inline constexpr bool is_object = std::is_same_v<ColumnT, ArrayColumn> || std::is_same_v<ColumnT, StructColumn> ||
|
||||
std::is_same_v<ColumnT, MapColumn> || std::is_same_v<ColumnT, JsonColumn> ||
|
||||
std::is_same_v<ObjectColumn<typename ColumnT::ValueType>, ColumnT>;
|
||||
inline constexpr bool is_object =
|
||||
std::is_same_v<ColumnT, ArrayColumn> || std::is_same_v<ColumnT, StructColumn> ||
|
||||
std::is_same_v<ColumnT, MapColumn> || std::is_same_v<ColumnT, JsonColumn> ||
|
||||
std::is_same_v<ColumnT, VariantColumn> || std::is_same_v<ObjectColumn<typename ColumnT::ValueType>, ColumnT>;
|
||||
|
||||
// Selective-copy data from SegmentedColumn according to provided index
|
||||
class SegmentedColumnSelectiveCopy final : public ColumnVisitorAdapter<SegmentedColumnSelectiveCopy> {
|
||||
|
|
@ -737,7 +738,7 @@ public:
|
|||
return {};
|
||||
}
|
||||
|
||||
// Inefficient fallback implementation, it's usually used for Array/Struct/Map/Json
|
||||
// Inefficient fallback implementation, it's usually used for Array/Struct/Map/Json/Variant
|
||||
template <class ColumnT>
|
||||
typename std::enable_if_t<is_object<ColumnT>, Status> do_visit(const ColumnT& column) {
|
||||
_result = column.clone_empty();
|
||||
|
|
|
|||
|
|
@ -698,6 +698,7 @@ ColumnPredicate* new_column_in_predicate_generic(const TypeInfoPtr& type_info, C
|
|||
case TYPE_OBJECT:
|
||||
case TYPE_PERCENTILE:
|
||||
case TYPE_JSON:
|
||||
case TYPE_VARIANT:
|
||||
case TYPE_NULL:
|
||||
case TYPE_FUNCTION:
|
||||
case TYPE_TIME:
|
||||
|
|
|
|||
|
|
@ -405,6 +405,7 @@ ColumnPredicate* new_column_not_in_predicate(const TypeInfoPtr& type_info, Colum
|
|||
case TYPE_OBJECT:
|
||||
case TYPE_PERCENTILE:
|
||||
case TYPE_JSON:
|
||||
case TYPE_VARIANT:
|
||||
case TYPE_NULL:
|
||||
case TYPE_FUNCTION:
|
||||
case TYPE_TIME:
|
||||
|
|
|
|||
|
|
@ -182,6 +182,7 @@ static ColumnPredicate* new_column_predicate(const TypeInfoPtr& type_info, Colum
|
|||
case TYPE_OBJECT:
|
||||
case TYPE_PERCENTILE:
|
||||
case TYPE_JSON:
|
||||
case TYPE_VARIANT:
|
||||
case TYPE_NULL:
|
||||
case TYPE_FUNCTION:
|
||||
case TYPE_TIME:
|
||||
|
|
|
|||
|
|
@ -1638,6 +1638,7 @@ const FieldConverter* get_field_converter(LogicalType from_type, LogicalType to_
|
|||
TYPE_CASE_CLAUSE(TYPE_OBJECT)
|
||||
TYPE_CASE_CLAUSE(TYPE_PERCENTILE)
|
||||
TYPE_CASE_CLAUSE(TYPE_JSON)
|
||||
TYPE_CASE_CLAUSE(TYPE_VARIANT)
|
||||
TYPE_CASE_CLAUSE(TYPE_VARBINARY)
|
||||
case TYPE_DECIMAL32:
|
||||
case TYPE_DECIMAL64:
|
||||
|
|
|
|||
|
|
@ -39,8 +39,8 @@ struct RuntimeColumnPredicateBuilder {
|
|||
const SlotDescriptor* slot, int32_t driver_sequence,
|
||||
ObjectPool* pool) {
|
||||
// keep consistent with ColumnRangeBuilder
|
||||
if constexpr (ltype == TYPE_TIME || ltype == TYPE_NULL || ltype == TYPE_JSON || lt_is_float<ltype> ||
|
||||
lt_is_binary<ltype>) {
|
||||
if constexpr (ltype == TYPE_TIME || ltype == TYPE_NULL || ltype == TYPE_JSON || ltype == TYPE_VARIANT ||
|
||||
lt_is_float<ltype> || lt_is_binary<ltype>) {
|
||||
DCHECK(false) << "unreachable path";
|
||||
return Status::NotSupported("unreachable path");
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -103,6 +103,7 @@ uint32_t TabletColumn::get_field_length_by_type(LogicalType type, uint32_t strin
|
|||
case TYPE_HLL:
|
||||
case TYPE_PERCENTILE:
|
||||
case TYPE_JSON:
|
||||
case TYPE_VARIANT:
|
||||
case TYPE_VARBINARY:
|
||||
return string_length + sizeof(get_olap_string_max_length());
|
||||
case TYPE_ARRAY:
|
||||
|
|
|
|||
|
|
@ -28,4 +28,5 @@ add_library(Types STATIC
|
|||
checker/type_checker.cpp
|
||||
type_checker_manager.cpp
|
||||
int256.cpp
|
||||
variant_value.cpp
|
||||
)
|
||||
|
|
|
|||
|
|
@ -60,10 +60,12 @@ LogicalType string_to_logical_type(const std::string& type_str) {
|
|||
if (upper_type_str == "DECIMAL256") return TYPE_DECIMAL256;
|
||||
if (upper_type_str == "INT256") return TYPE_INT256;
|
||||
if (upper_type_str == "JSON") return TYPE_JSON;
|
||||
if (upper_type_str == "BINARY") return TYPE_BINARY;
|
||||
if (upper_type_str == "VARBINARY") return TYPE_VARBINARY;
|
||||
if (upper_type_str == "ANY_ARRAY") return TYPE_ARRAY;
|
||||
if (upper_type_str == "ANY_STRUCT") return TYPE_STRUCT;
|
||||
if (upper_type_str == "ANY_MAP") return TYPE_MAP;
|
||||
if (upper_type_str == "VARIANT") return TYPE_VARIANT;
|
||||
LOG(WARNING) << "invalid type string. [type='" << type_str << "']";
|
||||
return TYPE_UNKNOWN;
|
||||
}
|
||||
|
|
@ -152,6 +154,8 @@ const char* logical_type_to_string(LogicalType type) {
|
|||
return "MAX_VALUE";
|
||||
case TYPE_VARBINARY:
|
||||
return "VARBINARY";
|
||||
case TYPE_VARIANT:
|
||||
return "VARIANT";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
|
@ -285,6 +289,7 @@ public:
|
|||
_data[TYPE_VARBINARY] = TYPE_VARBINARY;
|
||||
_data[TYPE_DECIMAL256] = TYPE_DECIMAL256;
|
||||
_data[TYPE_INT256] = TYPE_INT256;
|
||||
_data[TYPE_VARIANT] = TYPE_VARIANT;
|
||||
}
|
||||
LogicalType get_logical_type(LogicalType field_type) { return _data[field_type]; }
|
||||
|
||||
|
|
|
|||
|
|
@ -72,10 +72,11 @@ enum LogicalType {
|
|||
TYPE_PERCENTILE = 53,
|
||||
|
||||
TYPE_JSON = 54,
|
||||
TYPE_VARIANT = 55,
|
||||
|
||||
// max value of LogicalType, newly-added type should not exceed this value.
|
||||
// used to create a fixed-size hash map.
|
||||
TYPE_MAX_VALUE = 55
|
||||
TYPE_MAX_VALUE = 56
|
||||
};
|
||||
|
||||
// TODO(lism): support varbinary for zone map.
|
||||
|
|
@ -168,6 +169,7 @@ inline bool is_semi_type(LogicalType type) {
|
|||
case TYPE_ARRAY:
|
||||
case TYPE_MAP:
|
||||
case TYPE_JSON:
|
||||
case TYPE_VARIANT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
@ -244,6 +246,7 @@ constexpr bool is_scalar_logical_type(LogicalType ltype) {
|
|||
case TYPE_DECIMAL128: /* 26 */
|
||||
case TYPE_DECIMAL256: /* 27 */
|
||||
case TYPE_JSON:
|
||||
case TYPE_VARIANT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
@ -322,6 +325,7 @@ VALUE_GUARD(LogicalType, ObjectLTGuard, lt_is_object, TYPE_OBJECT)
|
|||
VALUE_GUARD(LogicalType, StringLTGuard, lt_is_string, TYPE_CHAR, TYPE_VARCHAR)
|
||||
VALUE_GUARD(LogicalType, BinaryLTGuard, lt_is_binary, TYPE_BINARY, TYPE_VARBINARY)
|
||||
VALUE_GUARD(LogicalType, JsonGuard, lt_is_json, TYPE_JSON)
|
||||
VALUE_GUARD(LogicalType, VariantGuard, lt_is_variant, TYPE_VARIANT)
|
||||
VALUE_GUARD(LogicalType, FunctionGuard, lt_is_function, TYPE_FUNCTION)
|
||||
VALUE_GUARD(LogicalType, ObjectFamilyLTGuard, lt_is_object_family, TYPE_JSON, TYPE_HLL, TYPE_OBJECT, TYPE_PERCENTILE)
|
||||
VALUE_GUARD(LogicalType, ArrayGuard, lt_is_array, TYPE_ARRAY)
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ namespace starrocks {
|
|||
M(TYPE_TIME) \
|
||||
M(TYPE_JSON) \
|
||||
M(TYPE_VARBINARY) \
|
||||
M(TYPE_VARIANT) \
|
||||
M(TYPE_BOOLEAN)
|
||||
|
||||
#define APPLY_FOR_COMPLEX_TYPE(M) \
|
||||
|
|
@ -104,7 +105,8 @@ namespace starrocks {
|
|||
M(FUNCTION) \
|
||||
M(BINARY) \
|
||||
M(VARBINARY) \
|
||||
M(JSON)
|
||||
M(JSON) \
|
||||
M(VARIANT)
|
||||
|
||||
#define APPLY_FOR_MIN_MAX_COMPRESSABLE_TYPE(M) \
|
||||
APPLY_FOR_ALL_INT_TYPE(M) \
|
||||
|
|
|
|||
|
|
@ -0,0 +1,191 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "variant_value.h"
|
||||
|
||||
#include <arrow/util/endian.h>
|
||||
|
||||
#include <boost/uuid/uuid_io.hpp>
|
||||
#include <cstring>
|
||||
|
||||
#include "util/url_coding.h"
|
||||
#include "util/variant_util.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
StatusOr<VariantValue> VariantValue::create(const Slice& slice) {
|
||||
// Validate slice first
|
||||
if (slice.get_data() == nullptr) {
|
||||
return Status::InvalidArgument("Invalid variant slice: null data pointer");
|
||||
}
|
||||
|
||||
if (slice.get_size() < sizeof(uint32_t)) {
|
||||
return Status::InvalidArgument("Invalid variant slice: too small to contain size header");
|
||||
}
|
||||
|
||||
const char* variant_raw = slice.get_data();
|
||||
// The first 4 bytes are the size of the variant
|
||||
uint32_t variant_size;
|
||||
std::memcpy(&variant_size, variant_raw, sizeof(uint32_t));
|
||||
// Check variant size limit (16MB)
|
||||
if (variant_size > kMaxVariantSize) {
|
||||
return Status::InvalidArgument("Variant size exceeds maximum limit: " + std::to_string(variant_size) + " > " +
|
||||
std::to_string(kMaxVariantSize));
|
||||
}
|
||||
|
||||
if (variant_size > slice.get_size() - sizeof(uint32_t)) {
|
||||
return Status::InvalidArgument(
|
||||
"Invalid variant size: " + std::to_string(variant_size) +
|
||||
" exceeds available data: " + std::to_string(slice.get_size() - sizeof(uint32_t)));
|
||||
}
|
||||
|
||||
const auto variant = std::string_view(variant_raw + sizeof(uint32_t), variant_size);
|
||||
|
||||
auto metadata_status = load_metadata(variant);
|
||||
if (!metadata_status.ok()) {
|
||||
return metadata_status.status();
|
||||
}
|
||||
|
||||
const auto& metadata_view = metadata_status.value();
|
||||
if (metadata_view.size() > variant_size) {
|
||||
return Status::InvalidArgument("Metadata size exceeds variant size");
|
||||
}
|
||||
|
||||
std::string metadata(metadata_view);
|
||||
RETURN_IF_ERROR(validate_metadata(metadata));
|
||||
std::string value(variant_raw + sizeof(uint32_t) + metadata_view.size(), variant_size - metadata_view.size());
|
||||
|
||||
return VariantValue(std::move(metadata), std::move(value));
|
||||
}
|
||||
|
||||
Status VariantValue::validate_metadata(const std::string_view metadata) {
|
||||
// metadata at least 3 bytes: version, dictionarySize and at least one offset.
|
||||
if (metadata.size() < kMinMetadataSize) {
|
||||
return Status::InternalError("Variant metadata is too short");
|
||||
}
|
||||
|
||||
const uint8_t header = static_cast<uint8_t>(metadata[0]);
|
||||
if (const uint8_t version = header & kVersionMask; version != 1) {
|
||||
return Status::NotSupported("Unsupported variant version: " + std::to_string(version));
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
VariantValue VariantValue::of_null() {
|
||||
static constexpr uint8_t header = static_cast<uint8_t>(VariantPrimitiveType::NULL_TYPE) << 2;
|
||||
static constexpr uint8_t null_chars[] = {header};
|
||||
return VariantValue(VariantMetadata::kEmptyMetadata,
|
||||
std::string_view{reinterpret_cast<const char*>(null_chars), 1});
|
||||
}
|
||||
|
||||
StatusOr<std::string_view> VariantValue::load_metadata(const std::string_view variant) {
|
||||
if (variant.empty()) {
|
||||
return Status::InvalidArgument("Variant is empty");
|
||||
}
|
||||
|
||||
// Check variant size limit (16MB)
|
||||
if (variant.size() > kMaxVariantSize) {
|
||||
return Status::InvalidArgument("Variant size exceeds maximum limit: " + std::to_string(variant.size()) + " > " +
|
||||
std::to_string(kMaxVariantSize));
|
||||
}
|
||||
|
||||
const uint8_t header = static_cast<uint8_t>(variant[0]);
|
||||
if (const uint8_t version = header & kVersionMask; version != 1) {
|
||||
return Status::NotSupported("Unsupported variant version: " + std::to_string(version));
|
||||
}
|
||||
|
||||
const uint8_t offset_size = 1 + ((header & kOffsetSizeMask) >> kOffsetSizeShift);
|
||||
if (offset_size < 1 || offset_size > 4) {
|
||||
return Status::InvalidArgument("Invalid offset size in variant metadata: " + std::to_string(offset_size) +
|
||||
", expected 1, 2, 3 or 4 bytes");
|
||||
}
|
||||
|
||||
if (variant.size() < kHeaderSize + offset_size) {
|
||||
return Status::InvalidArgument("Variant too short to contain dict_size");
|
||||
}
|
||||
|
||||
uint32_t dict_size = VariantUtil::readLittleEndianUnsigned(variant.data() + 1, offset_size);
|
||||
uint32_t offset_list_offset = kHeaderSize + offset_size;
|
||||
|
||||
// Check for potential overflow in offset list size calculation
|
||||
if (dict_size > (kMaxVariantSize - offset_list_offset) / offset_size - 1) {
|
||||
return Status::InvalidArgument("Dict size too large: " + std::to_string(dict_size));
|
||||
}
|
||||
|
||||
uint32_t required_offset_list_size = (1 + dict_size) * offset_size;
|
||||
uint32_t data_offset = offset_list_offset + required_offset_list_size;
|
||||
uint32_t last_offset_pos = offset_list_offset + dict_size * offset_size;
|
||||
if (last_offset_pos + offset_size > variant.size()) {
|
||||
return Status::InvalidArgument("Variant too short to contain all offsets");
|
||||
}
|
||||
|
||||
uint32_t last_data_size = VariantUtil::readLittleEndianUnsigned(variant.data() + last_offset_pos, offset_size);
|
||||
uint32_t end_offset = data_offset + last_data_size;
|
||||
|
||||
if (end_offset > variant.size()) {
|
||||
return Status::CapacityLimitExceed("Variant metadata end offset exceeds variant size: " +
|
||||
std::to_string(end_offset) + " > " + std::to_string(variant.size()));
|
||||
}
|
||||
|
||||
return std::string_view(variant.data(), end_offset);
|
||||
}
|
||||
|
||||
size_t VariantValue::serialize(uint8_t* dst) const {
|
||||
size_t offset = 0;
|
||||
|
||||
// The first 4 bytes are the total size of the variant
|
||||
uint32_t total_size = static_cast<uint32_t>(_metadata.size() + _value.size());
|
||||
memcpy(dst + offset, &total_size, sizeof(uint32_t));
|
||||
offset += sizeof(uint32_t);
|
||||
|
||||
// metadata
|
||||
memcpy(dst + offset, _metadata.data(), _metadata.size());
|
||||
offset += _metadata.size();
|
||||
|
||||
// value
|
||||
memcpy(dst + offset, _value.data(), _value.size());
|
||||
offset += _value.size();
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
uint32_t VariantValue::serialize_size() const {
|
||||
return sizeof(uint32_t) + _metadata.size() + _value.size();
|
||||
}
|
||||
|
||||
StatusOr<std::string> VariantValue::to_json(cctz::time_zone timezone) const {
|
||||
std::stringstream json_str;
|
||||
auto status = VariantUtil::variant_to_json(_metadata, _value, json_str, timezone);
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
|
||||
return json_str.str();
|
||||
}
|
||||
|
||||
std::string VariantValue::to_string() const {
|
||||
auto json_result = to_json();
|
||||
if (!json_result.ok()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
return json_result.value();
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const VariantValue& value) {
|
||||
return os << value.to_string();
|
||||
}
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cctz/time_zone.h>
|
||||
|
||||
#include <string_view>
|
||||
|
||||
#include "common/statusor.h"
|
||||
#include "fmt/format.h"
|
||||
#include "util/slice.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
class VariantValue {
|
||||
public:
|
||||
VariantValue(const std::string_view metadata, const std::string_view value) : _metadata(metadata), _value(value) {}
|
||||
VariantValue(std::string metadata, std::string value) : _metadata(std::move(metadata)), _value(std::move(value)) {}
|
||||
VariantValue() = default;
|
||||
|
||||
/**
|
||||
* Static factory method to create a VariantValue from a Slice.
|
||||
* @param slice The Slice must contain the full variant binary including size header.
|
||||
* The first 4 bytes of the Slice are expected to be the size of the variant.
|
||||
* The memory layout is: [total size (4 bytes)][metadata][value].
|
||||
* @return The created VariantValue or an error status.
|
||||
*/
|
||||
static StatusOr<VariantValue> create(const Slice& slice);
|
||||
|
||||
VariantValue(const VariantValue& rhs) = default;
|
||||
|
||||
VariantValue(VariantValue&& rhs) noexcept = default;
|
||||
|
||||
static Status validate_metadata(const std::string_view metadata);
|
||||
|
||||
VariantValue& operator=(const VariantValue& rhs) = default;
|
||||
|
||||
VariantValue& operator=(VariantValue&& rhs) noexcept = default;
|
||||
|
||||
static VariantValue of_null();
|
||||
|
||||
// Load metadata from the variant binary.
|
||||
// will slice the variant binary to extract metadata
|
||||
static StatusOr<std::string_view> load_metadata(std::string_view variant);
|
||||
|
||||
// Serialize the VariantValue to a byte array.
|
||||
// return the number of bytes written
|
||||
size_t serialize(uint8_t* dst) const;
|
||||
|
||||
// Calculate the size of the serialized VariantValue.
|
||||
// 4 bytes for value size + metadata size + value size
|
||||
uint32_t serialize_size() const;
|
||||
|
||||
uint64_t mem_usage() const { return serialize_size(); }
|
||||
|
||||
// Convert to a JSON string
|
||||
StatusOr<std::string> to_json(cctz::time_zone timezone = cctz::local_time_zone()) const;
|
||||
std::string to_string() const;
|
||||
|
||||
std::string get_metadata() const { return _metadata; }
|
||||
std::string get_value() const { return _value; }
|
||||
|
||||
// Variant value has a maximum size limit of 16MB to prevent excessive memory usage.
|
||||
static constexpr uint32_t kMaxVariantSize = 16 * 1024 * 1024;
|
||||
|
||||
private:
|
||||
static constexpr uint8_t kVersionMask = 0b1111;
|
||||
static constexpr uint8_t kSortedStrings = 0b10000;
|
||||
static constexpr uint8_t kOffsetSizeMask = 0b11000000;
|
||||
static constexpr uint8_t kOffsetSizeShift = 6;
|
||||
static constexpr uint8_t kHeaderSize = 1;
|
||||
static constexpr size_t kMinMetadataSize = 3;
|
||||
|
||||
// Now directly store strings instead of string_views
|
||||
std::string _metadata;
|
||||
std::string _value;
|
||||
};
|
||||
|
||||
// append json string to the stream
|
||||
std::ostream& operator<<(std::ostream& os, const VariantValue& json);
|
||||
|
||||
} // namespace starrocks
|
||||
|
||||
// fmt::format
|
||||
template <>
|
||||
struct fmt::formatter<starrocks::VariantValue> : formatter<std::string> {
|
||||
template <typename FormatContext>
|
||||
auto format(const starrocks::VariantValue& p, FormatContext& ctx) -> decltype(ctx.out()) {
|
||||
return formatter<std::string>::format(p.to_string(), ctx);
|
||||
}
|
||||
}; // namespace fmt
|
||||
|
|
@ -114,6 +114,7 @@ set(UTIL_FILES
|
|||
internal_service_recoverable_stub.cpp
|
||||
lake_service_recoverable_stub.cpp
|
||||
byte_buffer.cpp
|
||||
variant_util.cpp
|
||||
)
|
||||
|
||||
add_library(Util STATIC
|
||||
|
|
|
|||
|
|
@ -0,0 +1,288 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "variant_util.h"
|
||||
|
||||
#include <arrow/util/endian.h>
|
||||
|
||||
#include <boost/uuid/uuid.hpp>
|
||||
#include <boost/uuid/uuid_io.hpp>
|
||||
|
||||
#include "exprs/cast_expr.h"
|
||||
#include "formats/parquet/variant.h"
|
||||
#include "runtime/decimalv3.h"
|
||||
#include "types/timestamp_value.h"
|
||||
#include "url_coding.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
uint32_t VariantUtil::readLittleEndianUnsigned(const void* from, uint8_t size) {
|
||||
uint32_t result = 0;
|
||||
memcpy(&result, from, size);
|
||||
return arrow::bit_util::FromLittleEndian(result);
|
||||
}
|
||||
|
||||
std::string VariantUtil::type_to_string(VariantType type) {
|
||||
switch (type) {
|
||||
case VariantType::OBJECT:
|
||||
return "Object";
|
||||
case VariantType::ARRAY:
|
||||
return "Array";
|
||||
case VariantType::NULL_TYPE:
|
||||
return "Null";
|
||||
case VariantType::BOOLEAN:
|
||||
return "Boolean";
|
||||
case VariantType::INT8:
|
||||
return "Int8";
|
||||
case VariantType::INT16:
|
||||
return "Int16";
|
||||
case VariantType::INT32:
|
||||
return "Int32";
|
||||
case VariantType::INT64:
|
||||
return "Int64";
|
||||
case VariantType::DOUBLE:
|
||||
return "Double";
|
||||
case VariantType::DECIMAL4:
|
||||
return "Decimal4";
|
||||
case VariantType::DECIMAL8:
|
||||
return "Decimal8";
|
||||
case VariantType::DECIMAL16:
|
||||
return "Decimal16";
|
||||
case VariantType::DATE:
|
||||
return "Date";
|
||||
case VariantType::TIMESTAMP_TZ:
|
||||
return "TimestampTZ";
|
||||
case VariantType::TIMESTAMP_NTZ:
|
||||
return "TimestampNTZ";
|
||||
case VariantType::FLOAT:
|
||||
return "Float";
|
||||
case VariantType::BINARY:
|
||||
return "Binary";
|
||||
case VariantType::STRING:
|
||||
return "String";
|
||||
case VariantType::TIME_NTZ:
|
||||
return "TimeNTZ";
|
||||
case VariantType::TIMESTAMP_TZ_NANOS:
|
||||
return "TimestampTZNanos";
|
||||
case VariantType::TIMESTAMP_NTZ_NANOS:
|
||||
return "TimestampNTZNanos";
|
||||
case VariantType::UUID:
|
||||
return "UUID";
|
||||
default:
|
||||
return "Unknown";
|
||||
}
|
||||
}
|
||||
|
||||
std::string epoch_day_to_date(int32_t epoch_days) {
|
||||
std::time_t raw_time = epoch_days * 86400; // to seconds
|
||||
std::tm* ptm = std::gmtime(&raw_time); // to UTC
|
||||
char buffer[11];
|
||||
std::strftime(buffer, sizeof(buffer), "%Y-%m-%d", ptm);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
std::string VariantUtil::decimal4_to_string(DecimalValue<int32_t> decimal) {
|
||||
return DecimalV3Cast::to_string<int32_t>(decimal.value, decimal_precision_limit<int32_t>, decimal.scale);
|
||||
}
|
||||
|
||||
std::string VariantUtil::decimal8_to_string(DecimalValue<int64_t> decimal) {
|
||||
return DecimalV3Cast::to_string<int64_t>(decimal.value, decimal_precision_limit<int64_t>, decimal.scale);
|
||||
}
|
||||
|
||||
std::string VariantUtil::decimal16_to_string(DecimalValue<int128_t> decimal) {
|
||||
return DecimalV3Cast::to_string<int128_t>(decimal.value, decimal_precision_limit<int128_t>, decimal.scale);
|
||||
}
|
||||
|
||||
void append_quoted_string(std::stringstream& ss, const std::string& str) {
|
||||
ss << '"' << str << '"';
|
||||
}
|
||||
|
||||
Status VariantUtil::variant_to_json(std::string_view metadata, std::string_view value, std::stringstream& json_str,
|
||||
cctz::time_zone timezone) {
|
||||
Variant variant{metadata, value};
|
||||
switch (variant.type()) {
|
||||
case VariantType::NULL_TYPE:
|
||||
json_str << "null";
|
||||
break;
|
||||
case VariantType::BOOLEAN: {
|
||||
bool res = *variant.get_bool();
|
||||
json_str << (res ? "true" : "false");
|
||||
break;
|
||||
}
|
||||
case VariantType::INT8:
|
||||
json_str << std::to_string(*variant.get_int8());
|
||||
break;
|
||||
case VariantType::INT16:
|
||||
json_str << std::to_string(*variant.get_int16());
|
||||
break;
|
||||
case VariantType::INT32:
|
||||
json_str << std::to_string(*variant.get_int32());
|
||||
break;
|
||||
case VariantType::INT64:
|
||||
json_str << std::to_string(*variant.get_int64());
|
||||
break;
|
||||
case VariantType::FLOAT: {
|
||||
const float f = *variant.get_float();
|
||||
if (std::isfinite(f)) {
|
||||
json_str << std::to_string(f);
|
||||
} else {
|
||||
append_quoted_string(json_str, std::to_string(f));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VariantType::DOUBLE: {
|
||||
const double d = *variant.get_double();
|
||||
if (std::isfinite(d)) {
|
||||
json_str << std::to_string(d);
|
||||
} else {
|
||||
append_quoted_string(json_str, std::to_string(d));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VariantType::DECIMAL4: {
|
||||
DecimalValue<int32_t> decimal = *variant.get_decimal4();
|
||||
json_str << decimal4_to_string(decimal);
|
||||
break;
|
||||
}
|
||||
case VariantType::DECIMAL8: {
|
||||
DecimalValue<int64_t> decimal = *variant.get_decimal8();
|
||||
json_str << decimal8_to_string(decimal);
|
||||
break;
|
||||
}
|
||||
case VariantType::DECIMAL16: {
|
||||
DecimalValue<int128_t> decimal = *variant.get_decimal16();
|
||||
json_str << decimal16_to_string(decimal);
|
||||
break;
|
||||
}
|
||||
case VariantType::STRING: {
|
||||
json_str << *variant.get_string();
|
||||
break;
|
||||
}
|
||||
case VariantType::BINARY: {
|
||||
const std::string_view binary = *variant.get_binary();
|
||||
const std::string binary_str(binary.data(), binary.size());
|
||||
std::string encoded;
|
||||
base64_encode(binary_str, &encoded);
|
||||
append_quoted_string(json_str, encoded);
|
||||
break;
|
||||
}
|
||||
case VariantType::UUID: {
|
||||
const auto uuid_arr = *variant.get_uuid();
|
||||
boost::uuids::uuid uuid{};
|
||||
for (size_t i = 0; i < uuid.size(); ++i) {
|
||||
uuid.data[i] = uuid_arr[i];
|
||||
}
|
||||
append_quoted_string(json_str, boost::uuids::to_string(uuid));
|
||||
break;
|
||||
}
|
||||
case VariantType::DATE: {
|
||||
int32_t date = *variant.get_date();
|
||||
std::string date_str = epoch_day_to_date(date);
|
||||
append_quoted_string(json_str, date_str);
|
||||
break;
|
||||
}
|
||||
case VariantType::TIMESTAMP_TZ: {
|
||||
const int64_t timestamp_micros = *variant.get_timestamp_micros();
|
||||
TimestampValue tsv{};
|
||||
tsv.from_unix_second(timestamp_micros / 1000000, timestamp_micros % 1000000);
|
||||
std::string timestamp_str = timestamp::to_string_with_timezone<false, false>(tsv.timestamp(), timezone);
|
||||
append_quoted_string(json_str, timestamp_str);
|
||||
break;
|
||||
}
|
||||
case VariantType::TIMESTAMP_NTZ: {
|
||||
const int64_t timestamp_micros = *variant.get_timestamp_micros_ntz();
|
||||
TimestampValue tsv{};
|
||||
tsv.from_unix_second(timestamp_micros / 1000000, timestamp_micros % 1000000);
|
||||
std::string timestamp_str = tsv.to_string(false);
|
||||
append_quoted_string(json_str, timestamp_str);
|
||||
break;
|
||||
}
|
||||
case VariantType::OBJECT: {
|
||||
auto info = get_object_info(value);
|
||||
if (!info.ok()) {
|
||||
return info.status();
|
||||
}
|
||||
const auto& [num_elements, id_start_offset, id_size, offset_start_offset, offset_size, data_start_offset] =
|
||||
info.value();
|
||||
json_str << "{";
|
||||
for (size_t i = 0; i < num_elements; ++i) {
|
||||
if (i > 0) {
|
||||
json_str << ",";
|
||||
}
|
||||
|
||||
uint32_t id = readLittleEndianUnsigned(value.data() + id_start_offset + i * id_size, id_size);
|
||||
uint32_t offset =
|
||||
readLittleEndianUnsigned(value.data() + offset_start_offset + i * offset_size, offset_size);
|
||||
auto key = variant.metadata().get_key(id);
|
||||
if (!key.ok()) {
|
||||
return key.status();
|
||||
}
|
||||
|
||||
json_str << *key << ":";
|
||||
|
||||
if (uint32_t next_pos = data_start_offset + offset; next_pos < value.size()) {
|
||||
std::string_view next_value = value.substr(next_pos, value.size() - next_pos);
|
||||
// Recursively convert the next value to JSON
|
||||
auto status = variant_to_json(metadata, next_value, json_str, timezone);
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
} else {
|
||||
return Status::InternalError("Invalid offset in object: " + std::to_string(offset));
|
||||
}
|
||||
}
|
||||
json_str << "}";
|
||||
break;
|
||||
}
|
||||
case VariantType::ARRAY: {
|
||||
auto info = get_array_info(value);
|
||||
if (!info.ok()) {
|
||||
return info.status();
|
||||
}
|
||||
|
||||
const auto& [num_elements, offset_size, offset_start_offset, data_start_offset] = info.value();
|
||||
json_str << "[";
|
||||
for (size_t i = 0; i < num_elements; ++i) {
|
||||
if (i > 0) {
|
||||
json_str << ",";
|
||||
}
|
||||
|
||||
uint32_t offset =
|
||||
readLittleEndianUnsigned(value.data() + offset_start_offset + i * offset_size, offset_size);
|
||||
if (uint32_t next_pos = data_start_offset + offset; next_pos < value.size()) {
|
||||
std::string_view next_value = value.substr(next_pos, value.size() - next_pos);
|
||||
// Recursively convert the next value to JSON
|
||||
auto status = variant_to_json(metadata, next_value, json_str, timezone);
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
} else {
|
||||
return Status::InternalError("Invalid offset in array: " + std::to_string(offset));
|
||||
}
|
||||
}
|
||||
json_str << "]";
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return Status::NotSupported("Unsupported variant type: " + type_to_string(variant.type()));
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
uint8_t VariantUtil::primitiveHeader(VariantPrimitiveType primitive) {
|
||||
return static_cast<uint8_t>(primitive) << 2;
|
||||
}
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
#include <cctz/time_zone.h>
|
||||
|
||||
#include "formats/parquet/variant.h"
|
||||
#include "types/variant_value.h"
|
||||
|
||||
namespace starrocks {
|
||||
struct VariantUtil {
|
||||
static std::string type_to_string(VariantType type);
|
||||
|
||||
static Status variant_to_json(std::string_view metadata, std::string_view value, std::stringstream& json_str,
|
||||
cctz::time_zone timezone = cctz::local_time_zone());
|
||||
static uint32_t readLittleEndianUnsigned(const void* from, uint8_t size);
|
||||
|
||||
static std::string decimal4_to_string(DecimalValue<int32_t> decimal);
|
||||
static std::string decimal8_to_string(DecimalValue<int64_t> decimal);
|
||||
static std::string decimal16_to_string(DecimalValue<int128_t> decimal);
|
||||
|
||||
static uint8_t primitiveHeader(VariantPrimitiveType primitive);
|
||||
};
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -15,6 +15,7 @@ set(EXEC_FILES
|
|||
./column/field_test.cpp
|
||||
./column/fixed_length_column_test.cpp
|
||||
./column/json_column_test.cpp
|
||||
./column/variant_column_test.cpp
|
||||
./column/map_column_test.cpp
|
||||
./column/struct_column_test.cpp
|
||||
./column/nullable_column_test.cpp
|
||||
|
|
@ -460,6 +461,7 @@ set(EXEC_FILES
|
|||
./types/int256_test.cpp
|
||||
./types/int256_arithmetic_test.cpp
|
||||
./types/type_checker_test.cpp
|
||||
./types/variant_value_test.cpp
|
||||
./simd/batch_run_counter_test.cpp
|
||||
./simd/simd_test.cpp
|
||||
./simd/simd_selector_test.cpp
|
||||
|
|
|
|||
|
|
@ -139,6 +139,16 @@ PARALLEL_TEST(ColumnViewTest, test_create_json_column_view) {
|
|||
DCHECK(json_column_view->is_json_view());
|
||||
}
|
||||
}
|
||||
PARALLEL_TEST(ColumnViewTest, test_create_variant_column_view) {
|
||||
TypeDescriptor type_desc = TypeDescriptor(LogicalType::TYPE_VARIANT);
|
||||
for (auto nullable : {true, false}) {
|
||||
auto opt_variant_column_view = ColumnViewHelper::create_column_view(type_desc, nullable, 0, 0);
|
||||
DCHECK(opt_variant_column_view.has_value());
|
||||
auto variant_column_view = std::move(opt_variant_column_view.value());
|
||||
DCHECK(variant_column_view->is_variant_view());
|
||||
EXPECT_TRUE(variant_column_view->is_variant_view());
|
||||
}
|
||||
}
|
||||
PARALLEL_TEST(ColumnViewTest, test_create_binary_column_view) {
|
||||
for (auto ltype : {LogicalType::TYPE_VARBINARY, LogicalType::TYPE_VARCHAR, LogicalType::TYPE_CHAR}) {
|
||||
for (auto nullable : {true, false}) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,241 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "column/column_builder.h"
|
||||
#include "column/type_traits.h"
|
||||
#include "formats/parquet/variant.h"
|
||||
#include "testutil/parallel_test.h"
|
||||
#include "types/logical_type.h"
|
||||
#include "types/variant_value.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
uint8_t primitiveHeader(VariantPrimitiveType primitive) {
|
||||
return static_cast<uint8_t>(primitive) << 2;
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
PARALLEL_TEST(VariantColumnTest, test_build_column) {
|
||||
// create from type traits
|
||||
{
|
||||
const uint8_t int_chars[] = {primitiveHeader(VariantPrimitiveType::INT32), 0xD2, 0x02, 0x96, 0x49};
|
||||
std::string_view int_string(reinterpret_cast<const char*>(int_chars), sizeof(int_chars));
|
||||
VariantValue variant{VariantMetadata::kEmptyMetadata, int_string};
|
||||
auto column = RunTimeColumnType<TYPE_VARIANT>::create();
|
||||
EXPECT_EQ("variant", column->get_name());
|
||||
EXPECT_TRUE(column->is_variant());
|
||||
column->append(&variant);
|
||||
auto res = column->get_object(0);
|
||||
ASSERT_EQ(res->serialize_size(), variant.serialize_size());
|
||||
ASSERT_EQ(res->get_metadata(), variant.get_metadata());
|
||||
ASSERT_EQ(res->get_value(), variant.get_value());
|
||||
EXPECT_EQ(res->to_string(), variant.to_string());
|
||||
EXPECT_EQ("1234567890", res->to_string());
|
||||
}
|
||||
// create from builder
|
||||
{
|
||||
ColumnBuilder<TYPE_VARIANT> builder(1);
|
||||
const uint8_t int_chars[] = {primitiveHeader(VariantPrimitiveType::INT32), 0xD2, 0x02, 0x96, 0x49};
|
||||
std::string_view int_string(reinterpret_cast<const char*>(int_chars), sizeof(int_chars));
|
||||
VariantValue variant{VariantMetadata::kEmptyMetadata, int_string};
|
||||
builder.append(&variant);
|
||||
auto column = builder.build(false);
|
||||
EXPECT_EQ("variant", column->get_name());
|
||||
EXPECT_TRUE(column->is_variant());
|
||||
|
||||
VariantColumn::Ptr column_ptr = ColumnHelper::cast_to<TYPE_VARIANT>(column);
|
||||
ASSERT_EQ(column_ptr->size(), 1);
|
||||
auto res = column_ptr->get_object(0);
|
||||
ASSERT_EQ(res->serialize_size(), variant.serialize_size());
|
||||
ASSERT_EQ(res->get_metadata(), variant.get_metadata());
|
||||
ASSERT_EQ(res->get_value(), variant.get_value());
|
||||
EXPECT_EQ(res->to_string(), variant.to_string());
|
||||
EXPECT_EQ("1234567890", res->to_string());
|
||||
}
|
||||
// clone
|
||||
{
|
||||
VariantColumn::Ptr column = VariantColumn::create();
|
||||
const uint8_t int_chars[] = {primitiveHeader(VariantPrimitiveType::INT32), 0xD2, 0x02, 0x96, 0x49};
|
||||
std::string_view int_string(reinterpret_cast<const char*>(int_chars), sizeof(int_chars));
|
||||
VariantValue variant{VariantMetadata::kEmptyMetadata, int_string};
|
||||
column->append(&variant);
|
||||
|
||||
{
|
||||
auto copy = column->clone();
|
||||
ASSERT_EQ(copy->size(), 1);
|
||||
auto res = copy->get(0).get_variant();
|
||||
ASSERT_EQ(res->serialize_size(), variant.serialize_size());
|
||||
ASSERT_EQ(res->get_metadata(), variant.get_metadata());
|
||||
ASSERT_EQ(res->get_value(), variant.get_value());
|
||||
EXPECT_EQ(res->to_string(), variant.to_string());
|
||||
EXPECT_EQ("1234567890", res->to_string());
|
||||
}
|
||||
// clone nullable by helper
|
||||
{
|
||||
TypeDescriptor desc = TypeDescriptor::create_variant_type();
|
||||
auto copy = ColumnHelper::clone_column(desc, true, column, column->size());
|
||||
ASSERT_EQ(copy->size(), 1);
|
||||
ASSERT_TRUE(copy->is_nullable());
|
||||
|
||||
// unwrap nullable column
|
||||
Column* unwrapped = ColumnHelper::get_data_column(copy.get());
|
||||
|
||||
VariantColumn* variant_column_ptr = down_cast<VariantColumn*>(unwrapped);
|
||||
ASSERT_EQ(variant_column_ptr->size(), 1);
|
||||
auto res = variant_column_ptr->get(0).get_variant();
|
||||
ASSERT_EQ(res->serialize_size(), variant.serialize_size());
|
||||
ASSERT_EQ(res->get_metadata(), variant.get_metadata());
|
||||
ASSERT_EQ(res->get_value(), variant.get_value());
|
||||
EXPECT_EQ(res->to_string(), variant.to_string());
|
||||
EXPECT_EQ("1234567890", res->to_string());
|
||||
}
|
||||
// clone variant_column by helper
|
||||
{
|
||||
TypeDescriptor desc = TypeDescriptor::create_variant_type();
|
||||
ColumnPtr copy = ColumnHelper::clone_column(desc, false, column, column->size());
|
||||
ASSERT_EQ(copy->size(), 1);
|
||||
ASSERT_FALSE(copy->is_nullable());
|
||||
|
||||
VariantColumn::Ptr variant_column_ptr = ColumnHelper::cast_to<TYPE_VARIANT>(copy);
|
||||
ASSERT_EQ(variant_column_ptr->size(), 1);
|
||||
auto res = variant_column_ptr->get(0).get_variant();
|
||||
ASSERT_EQ(res->serialize_size(), variant.serialize_size());
|
||||
ASSERT_EQ(res->get_metadata(), variant.get_metadata());
|
||||
ASSERT_EQ(res->get_value(), variant.get_value());
|
||||
EXPECT_EQ(res->to_string(), variant.to_string());
|
||||
EXPECT_EQ("1234567890", res->to_string());
|
||||
|
||||
VariantColumn* variant_column = ColumnHelper::cast_to_raw<TYPE_VARIANT>(copy.get());
|
||||
ASSERT_EQ(variant_column->size(), 1);
|
||||
auto raw_res = variant_column->get(0).get_variant();
|
||||
ASSERT_EQ(res->serialize_size(), variant.serialize_size());
|
||||
ASSERT_EQ(raw_res->get_metadata(), variant.get_metadata());
|
||||
ASSERT_EQ(raw_res->get_value(), variant.get_value());
|
||||
EXPECT_EQ(raw_res->to_string(), variant.to_string());
|
||||
EXPECT_EQ("1234567890", raw_res->to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
PARALLEL_TEST(VariantColumnTest, test_serialize) {
|
||||
std::string_view empty_metadata = VariantMetadata::kEmptyMetadata;
|
||||
const uint8_t uuid_chars[] = {primitiveHeader(VariantPrimitiveType::UUID),
|
||||
0xf2,
|
||||
0x4f,
|
||||
0x9b,
|
||||
0x64,
|
||||
0x81,
|
||||
0xfa,
|
||||
0x49,
|
||||
0xd1,
|
||||
0xb7,
|
||||
0x4e,
|
||||
0x8c,
|
||||
0x09,
|
||||
0xa6,
|
||||
0xe3,
|
||||
0x1c,
|
||||
0x56};
|
||||
|
||||
std::string_view uuid_string(reinterpret_cast<const char*>(uuid_chars), sizeof(uuid_chars));
|
||||
VariantValue variant{empty_metadata, uuid_string};
|
||||
|
||||
auto column = RunTimeColumnType<TYPE_VARIANT>::create();
|
||||
EXPECT_EQ("variant", column->get_name());
|
||||
EXPECT_TRUE(column->is_variant());
|
||||
column->append(&variant);
|
||||
EXPECT_EQ(variant.serialize_size(), column->serialize_size(0));
|
||||
|
||||
// deserialize
|
||||
std::vector<uint8_t> buffer;
|
||||
buffer.resize(variant.serialize_size());
|
||||
column->serialize(0, buffer.data());
|
||||
auto new_column = column->clone_empty();
|
||||
new_column->deserialize_and_append(buffer.data());
|
||||
const VariantValue* deserialized_variant = new_column->get(0).get_variant();
|
||||
ASSERT_TRUE(deserialized_variant != nullptr);
|
||||
EXPECT_EQ(variant.serialize_size(), deserialized_variant->serialize_size());
|
||||
EXPECT_EQ(variant.to_string(), deserialized_variant->to_string());
|
||||
EXPECT_EQ("\"f24f9b64-81fa-49d1-b74e-8c09a6e31c56\"", deserialized_variant->to_json().value());
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
PARALLEL_TEST(VariantColumnTest, put_mysql_row_buffer) {
|
||||
const uint8_t int_chars[] = {primitiveHeader(VariantPrimitiveType::INT32), 0xD2, 0x02, 0x96, 0x49};
|
||||
std::string_view int_string(reinterpret_cast<const char*>(int_chars), sizeof(int_chars));
|
||||
VariantValue variant{VariantMetadata::kEmptyMetadata, int_string};
|
||||
|
||||
auto column = VariantColumn::create();
|
||||
column->append(&variant);
|
||||
|
||||
MysqlRowBuffer buf;
|
||||
column->put_mysql_row_buffer(&buf, 0);
|
||||
EXPECT_EQ("\n1234567890", buf.data());
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
PARALLEL_TEST(VariantColumnTest, test_create_variant_column) {
|
||||
auto variant_column = VariantColumn::create();
|
||||
|
||||
// Test basic column operations that exercise visitor patterns
|
||||
EXPECT_EQ(0, variant_column->size());
|
||||
EXPECT_TRUE(variant_column->empty());
|
||||
EXPECT_FALSE(variant_column->is_nullable());
|
||||
EXPECT_FALSE(variant_column->is_constant());
|
||||
|
||||
// Test column cloning which uses visitor patterns internally
|
||||
auto cloned = variant_column->clone();
|
||||
EXPECT_EQ(0, cloned->size());
|
||||
|
||||
// Test memory operations
|
||||
size_t memory_usage = variant_column->memory_usage();
|
||||
EXPECT_GE(memory_usage, 0);
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
PARALLEL_TEST(VariantColumnTest, test_append_strings) {
|
||||
const auto variant_column = VariantColumn::create();
|
||||
const uint8_t int1_value[] = {primitiveHeader(VariantPrimitiveType::INT8), 0x01};
|
||||
const std::string_view int1_value_str(reinterpret_cast<const char*>(int1_value), sizeof(int1_value));
|
||||
constexpr uint32_t int1_total_size = sizeof(int1_value) + VariantMetadata::kEmptyMetadata.size();
|
||||
std::string variant_string;
|
||||
variant_string.resize(int1_total_size + sizeof(uint32_t));
|
||||
memcpy(variant_string.data(), &int1_total_size, sizeof(uint32_t));
|
||||
memcpy(variant_string.data() + sizeof(uint32_t), VariantMetadata::kEmptyMetadata.data(),
|
||||
VariantMetadata::kEmptyMetadata.size());
|
||||
memcpy(variant_string.data() + sizeof(uint32_t) + VariantMetadata::kEmptyMetadata.size(), int1_value_str.data(),
|
||||
int1_value_str.size());
|
||||
const Slice slice(variant_string.data(), variant_string.size());
|
||||
variant_column->append_strings(&slice, 1);
|
||||
|
||||
ASSERT_EQ(1, variant_column->size());
|
||||
auto expected = VariantValue::create(slice);
|
||||
ASSERT_TRUE(expected.ok());
|
||||
const VariantValue* actual = variant_column->get_object(0);
|
||||
ASSERT_EQ(expected->serialize_size(), actual->serialize_size());
|
||||
ASSERT_EQ(expected->get_metadata(), actual->get_metadata());
|
||||
ASSERT_EQ(expected->get_value(), actual->get_value());
|
||||
EXPECT_EQ(expected->to_string(), actual->to_string());
|
||||
EXPECT_EQ("1", actual->to_string());
|
||||
|
||||
// Append bad data
|
||||
const Slice bad_slice("");
|
||||
const bool result = variant_column->append_strings(&bad_slice, 1);
|
||||
ASSERT_FALSE(result) << "Appending empty slice should fail";
|
||||
}
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -747,4 +747,98 @@ TEST_F(TypeDescriptorTest, test_promote_types) {
|
|||
}
|
||||
}
|
||||
|
||||
// params: [LogicalType, expected length, logical_type_string, is_scalar_logical_type, is_huge_type]
|
||||
class FromLogicalTypeTest : public ::testing::TestWithParam<std::tuple<LogicalType, int, std::string, bool, bool>> {
|
||||
public:
|
||||
};
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
TEST_P(FromLogicalTypeTest, test_from_logical_type) {
|
||||
auto param = GetParam();
|
||||
LogicalType logical_type = std::get<0>(param);
|
||||
int expected_len = std::get<1>(param);
|
||||
std::string expected_lts = std::get<2>(param);
|
||||
bool is_slt = std::get<3>(param);
|
||||
bool is_ht = std::get<4>(param);
|
||||
|
||||
ASSERT_EQ(expected_lts, logical_type_to_string(logical_type));
|
||||
ASSERT_EQ(is_slt, is_scalar_logical_type(logical_type));
|
||||
ASSERT_EQ(logical_type, string_to_logical_type(expected_lts));
|
||||
|
||||
auto type_desc = TypeDescriptor::from_logical_type(logical_type);
|
||||
ASSERT_EQ(logical_type, type_desc.type);
|
||||
ASSERT_EQ(expected_len, type_desc.len);
|
||||
|
||||
if (logical_type == TYPE_DECIMAL || logical_type == TYPE_DECIMALV2 || logical_type == TYPE_DECIMAL32 ||
|
||||
logical_type == TYPE_DECIMAL64 || logical_type == TYPE_DECIMAL128) {
|
||||
ASSERT_EQ(type_desc.precision, 27); // default precision for decimal types
|
||||
ASSERT_EQ(type_desc.scale, 9); // default scale for decimal types
|
||||
} else {
|
||||
ASSERT_EQ(-1, type_desc.precision);
|
||||
ASSERT_EQ(-1, type_desc.scale);
|
||||
}
|
||||
|
||||
ASSERT_EQ(is_ht, type_desc.is_huge_type());
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
LogicalTypes,
|
||||
FromLogicalTypeTest,
|
||||
::testing::Values(
|
||||
// Basic types
|
||||
std::make_tuple(LogicalType::TYPE_BOOLEAN, -1, "BOOLEAN", true, false),
|
||||
std::make_tuple(LogicalType::TYPE_TINYINT, -1, "TINYINT", true, false),
|
||||
std::make_tuple(LogicalType::TYPE_SMALLINT, -1, "SMALLINT", true, false),
|
||||
std::make_tuple(LogicalType::TYPE_INT, -1, "INT", true, false),
|
||||
std::make_tuple(LogicalType::TYPE_BIGINT, -1, "BIGINT", true, false),
|
||||
std::make_tuple(LogicalType::TYPE_LARGEINT, -1, "LARGEINT", true, false),
|
||||
std::make_tuple(LogicalType::TYPE_FLOAT, -1, "FLOAT", true, false),
|
||||
std::make_tuple(LogicalType::TYPE_DOUBLE, -1, "DOUBLE", true, false),
|
||||
|
||||
// string/binary types
|
||||
std::make_tuple(LogicalType::TYPE_CHAR, TypeDescriptor::MAX_CHAR_LENGTH, "CHAR", true, false),
|
||||
std::make_tuple(LogicalType::TYPE_VARCHAR, TypeDescriptor::MAX_VARCHAR_LENGTH, "VARCHAR", true, false),
|
||||
std::make_tuple(LogicalType::TYPE_BINARY, -1, "BINARY", true, false),
|
||||
std::make_tuple(LogicalType::TYPE_VARBINARY, -1, "VARBINARY", true, false),
|
||||
|
||||
// decimal types
|
||||
std::make_tuple(LogicalType::TYPE_DECIMAL, -1, "DECIMAL", true, false),
|
||||
std::make_tuple(LogicalType::TYPE_DECIMALV2, -1, "DECIMAL_V2", true, false),
|
||||
std::make_tuple(LogicalType::TYPE_DECIMAL32, -1, "DECIMAL32", true, false),
|
||||
std::make_tuple(LogicalType::TYPE_DECIMAL64, -1, "DECIMAL64", true, false),
|
||||
std::make_tuple(LogicalType::TYPE_DECIMAL128, -1, "DECIMAL128", true, false),
|
||||
|
||||
std::make_tuple(LogicalType::TYPE_HLL, HLL_COLUMN_DEFAULT_LEN, "HLL", false, true),
|
||||
std::make_tuple(LogicalType::TYPE_JSON, kJsonDefaultSize, "JSON", true, true),
|
||||
std::make_tuple(LogicalType::TYPE_VARIANT, 128, "VARIANT", true, true),
|
||||
std::make_tuple(LogicalType::TYPE_OBJECT, TypeDescriptor::DEFAULT_BITMAP_LENGTH, "OBJECT", false, true)
|
||||
)
|
||||
);
|
||||
// clang-format on
|
||||
|
||||
TEST_F(TypeDescriptorTest, test_create_variant_type) {
|
||||
// Test create_variant_type() static method
|
||||
TypeDescriptor variant_desc = TypeDescriptor::create_variant_type();
|
||||
|
||||
ASSERT_EQ(LogicalType::TYPE_VARIANT, variant_desc.type);
|
||||
|
||||
ASSERT_EQ(128, variant_desc.len);
|
||||
|
||||
// Verify other default fields are properly initialized
|
||||
ASSERT_EQ(-1, variant_desc.precision);
|
||||
ASSERT_EQ(-1, variant_desc.scale);
|
||||
ASSERT_TRUE(variant_desc.children.empty());
|
||||
|
||||
// Verify the type descriptor is valid
|
||||
ASSERT_TRUE(variant_desc.is_huge_type());
|
||||
ASSERT_FALSE(variant_desc.is_complex_type());
|
||||
ASSERT_EQ("VARIANT", variant_desc.debug_string());
|
||||
|
||||
// Test that multiple calls return equivalent objects
|
||||
TypeDescriptor variant_desc2 = TypeDescriptor::create_variant_type();
|
||||
ASSERT_EQ(variant_desc.type, variant_desc2.type);
|
||||
ASSERT_EQ(variant_desc.len, variant_desc2.len);
|
||||
}
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
|
|||
|
|
@ -24,9 +24,11 @@
|
|||
#include "column/fixed_length_column.h"
|
||||
#include "column/json_column.h"
|
||||
#include "column/nullable_column.h"
|
||||
#include "column/variant_column.h"
|
||||
#include "gutil/strings/substitute.h"
|
||||
#include "testutil/parallel_test.h"
|
||||
#include "util/json.h"
|
||||
#include "util/variant_util.h"
|
||||
|
||||
namespace starrocks::serde {
|
||||
|
||||
|
|
@ -84,6 +86,88 @@ PARALLEL_TEST(ColumnArraySerdeTest, json_column) {
|
|||
}
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
PARALLEL_TEST(ColumnArraySerdeTest, variant_column) {
|
||||
auto c1 = VariantColumn::create();
|
||||
ASSERT_EQ(4, ColumnArraySerde::max_serialized_size(*c1));
|
||||
|
||||
// Prepare 5 int8 variant values
|
||||
const uint8_t int8_values[][2] = {
|
||||
{VariantUtil::primitiveHeader(VariantPrimitiveType::INT8), 0x01}, // 1
|
||||
{VariantUtil::primitiveHeader(VariantPrimitiveType::INT8), 0x02}, // 2
|
||||
{VariantUtil::primitiveHeader(VariantPrimitiveType::INT8), 0x03}, // 3
|
||||
{VariantUtil::primitiveHeader(VariantPrimitiveType::INT8), 0x04}, // 4
|
||||
{VariantUtil::primitiveHeader(VariantPrimitiveType::INT8), 0x05}, // 5
|
||||
};
|
||||
size_t expected_max_size = sizeof(uint32_t);
|
||||
for (size_t i = 0; i < std::size(int8_values); ++i) {
|
||||
std::string_view value(reinterpret_cast<const char*>(int8_values[i]), sizeof(int8_values[i]));
|
||||
VariantValue variant(VariantMetadata::kEmptyMetadata, value);
|
||||
c1->append(&variant);
|
||||
expected_max_size += sizeof(uint64_t) + variant.serialize_size();
|
||||
}
|
||||
ASSERT_EQ(expected_max_size, ColumnArraySerde::max_serialized_size(*c1));
|
||||
|
||||
auto c2 = VariantColumn::create();
|
||||
std::vector<uint8_t> buffer;
|
||||
buffer.resize(ColumnArraySerde::max_serialized_size(*c1));
|
||||
auto p1 = ColumnArraySerde::serialize(*c1, buffer.data());
|
||||
auto p2 = ColumnArraySerde::deserialize(buffer.data(), c2.get());
|
||||
ASSERT_EQ(buffer.data() + buffer.size(), p1);
|
||||
ASSERT_EQ(buffer.data() + buffer.size(), p2);
|
||||
|
||||
ASSERT_EQ(5, c2->size());
|
||||
for (size_t i = 0; i < c1->size(); i++) {
|
||||
const VariantValue* datum1 = c1->get(i).get_variant();
|
||||
const VariantValue* datum2 = c2->get(i).get_variant();
|
||||
ASSERT_EQ(datum1->serialize_size(), datum2->serialize_size());
|
||||
ASSERT_EQ(datum1->get_metadata(), datum2->get_metadata());
|
||||
ASSERT_EQ(datum1->get_value(), datum2->get_value());
|
||||
EXPECT_EQ(datum1->to_string(), datum2->to_string());
|
||||
}
|
||||
|
||||
// no effect
|
||||
for (auto level = -1; level < 8; ++level) {
|
||||
buffer.resize(ColumnArraySerde::max_serialized_size(*c1), level);
|
||||
p1 = ColumnArraySerde::serialize(*c1, buffer.data(), false, level);
|
||||
p2 = ColumnArraySerde::deserialize(buffer.data(), c2.get(), false, level);
|
||||
ASSERT_EQ(buffer.data() + buffer.size(), p1);
|
||||
ASSERT_EQ(buffer.data() + buffer.size(), p2);
|
||||
|
||||
ASSERT_EQ(5, c2->size());
|
||||
for (size_t i = 0; i < c1->size(); i++) {
|
||||
const VariantValue* datum1 = c1->get(i).get_variant();
|
||||
const VariantValue* datum2 = c2->get(i).get_variant();
|
||||
ASSERT_EQ(datum1->serialize_size(), datum2->serialize_size());
|
||||
ASSERT_EQ(datum1->get_metadata(), datum2->get_metadata());
|
||||
ASSERT_EQ(datum1->get_value(), datum2->get_value());
|
||||
EXPECT_EQ(datum1->to_string(), datum2->to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
PARALLEL_TEST(ColumnArraySerdeTest, variant_column_failed_deserialize) {
|
||||
auto c1 = VariantColumn::create();
|
||||
ASSERT_EQ(4, ColumnArraySerde::max_serialized_size(*c1));
|
||||
|
||||
// Prepare a variant value with an unsupported version
|
||||
constexpr uint8_t v2_metadata_charts[] = {0x02, 0x00, 0x00};
|
||||
const std::string_view v2_metadata(reinterpret_cast<const char*>(v2_metadata_charts), sizeof(v2_metadata_charts));
|
||||
const VariantValue variant(v2_metadata, "");
|
||||
c1->append(&variant);
|
||||
ASSERT_EQ(1, c1->size());
|
||||
|
||||
std::vector<uint8_t> buffer;
|
||||
buffer.resize(ColumnArraySerde::max_serialized_size(*c1));
|
||||
ColumnArraySerde::serialize(*c1, buffer.data());
|
||||
|
||||
auto c2 = VariantColumn::create();
|
||||
auto p2 = ColumnArraySerde::deserialize(buffer.data(), c2.get());
|
||||
ASSERT_TRUE(p2 == nullptr); // Deserialization should fail due to empty value
|
||||
ASSERT_EQ(0, c2->size()); // Deserialization should fail, resulting in an empty column
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
PARALLEL_TEST(ColumnArraySerdeTest, decimal_column) {
|
||||
auto c1 = DecimalColumn::create();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,437 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "types/variant_value.h"
|
||||
|
||||
#include <fs/fs.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <boost/uuid/uuid_io.hpp>
|
||||
|
||||
#include "cctz/time_zone.h"
|
||||
#include "formats/parquet/variant.h"
|
||||
#include "runtime/decimalv3.h"
|
||||
#include "types/timestamp_value.h"
|
||||
#include "util/timezone_utils.h"
|
||||
#include "util/variant_util.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
class VariantValueTest : public testing::Test {
|
||||
public:
|
||||
VariantValueTest() = default;
|
||||
~VariantValueTest() override = default;
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
std::string starrocks_home = getenv("STARROCKS_HOME");
|
||||
test_exec_dir = starrocks_home + "/be/test/formats/parquet/test_data/variant";
|
||||
|
||||
_primitive_metadata_file_names = {
|
||||
"primitive_null.metadata", "primitive_boolean_true.metadata", "primitive_boolean_false.metadata",
|
||||
"primitive_date.metadata", "primitive_decimal4.metadata", "primitive_decimal8.metadata",
|
||||
"primitive_decimal16.metadata", "primitive_float.metadata", "primitive_double.metadata",
|
||||
"primitive_int8.metadata", "primitive_int16.metadata", "primitive_int32.metadata",
|
||||
"primitive_int64.metadata", "primitive_binary.metadata", "primitive_string.metadata",
|
||||
"array_primitive.metadata",
|
||||
};
|
||||
|
||||
_boolean_file_names = {
|
||||
{"primitive_boolean_true.metadata", "primitive_boolean_true.value"},
|
||||
{"primitive_boolean_false.metadata", "primitive_boolean_false.value"},
|
||||
};
|
||||
}
|
||||
|
||||
std::string read_file_content(const std::string& file_path) {
|
||||
FileSystem* fs = FileSystem::Default();
|
||||
auto random_access_file = *fs->new_random_access_file(file_path);
|
||||
return *random_access_file->read_all();
|
||||
}
|
||||
|
||||
std::pair<std::string, std::string> load_variant_data(const std::string& metadata_file,
|
||||
const std::string& value_file) {
|
||||
std::string metadata_content = read_file_content(test_exec_dir + "/" + metadata_file);
|
||||
std::string value_content = read_file_content(test_exec_dir + "/" + value_file);
|
||||
|
||||
return {std::move(metadata_content), std::move(value_content)};
|
||||
}
|
||||
|
||||
protected:
|
||||
std::string test_exec_dir;
|
||||
std::vector<std::string> _primitive_metadata_file_names;
|
||||
std::vector<std::pair<std::string, std::string>> _boolean_file_names;
|
||||
};
|
||||
|
||||
TEST_F(VariantValueTest, NullToJson) {
|
||||
uint8_t null_chars[] = {static_cast<uint8_t>(VariantPrimitiveType::NULL_TYPE) << 2};
|
||||
std::string_view null_value(reinterpret_cast<const char*>(null_chars), 1);
|
||||
VariantValue v(VariantMetadata::kEmptyMetadata, null_value);
|
||||
auto json = v.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ("null", *json);
|
||||
}
|
||||
|
||||
TEST_F(VariantValueTest, BooleanToJson) {
|
||||
auto [t_fst, t_snd] = _boolean_file_names[0];
|
||||
auto [t_metadata, t_value] = load_variant_data(t_fst, t_snd);
|
||||
VariantValue variant_true{std::string_view(t_metadata), std::string_view(t_value)};
|
||||
auto json_true = variant_true.to_json();
|
||||
ASSERT_TRUE(json_true.ok());
|
||||
EXPECT_EQ("true", *json_true);
|
||||
|
||||
auto [f_fst, f_snd] = _boolean_file_names[1];
|
||||
auto [f_metadata, f_value] = load_variant_data(f_fst, f_snd);
|
||||
VariantValue variant_false{std::string_view(f_metadata), std::string_view(f_value)};
|
||||
auto json_false = variant_false.to_json();
|
||||
ASSERT_TRUE(json_false.ok());
|
||||
EXPECT_EQ("false", *json_false);
|
||||
}
|
||||
|
||||
TEST_F(VariantValueTest, IntegerToJson) {
|
||||
// Test int8
|
||||
{
|
||||
auto [int8_metadata, int8_value] = load_variant_data("primitive_int8.metadata", "primitive_int8.value");
|
||||
VariantValue variant{std::string_view(int8_metadata), std::string_view(int8_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ("42", *json);
|
||||
}
|
||||
|
||||
// Test int16
|
||||
{
|
||||
auto [int16_metadata, int16_value] = load_variant_data("primitive_int16.metadata", "primitive_int16.value");
|
||||
VariantValue variant{std::string_view(int16_metadata), std::string_view(int16_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ("1234", *json);
|
||||
}
|
||||
|
||||
// Test int32
|
||||
{
|
||||
auto [int32_metadata, int32_value] = load_variant_data("primitive_int32.metadata", "primitive_int32.value");
|
||||
VariantValue variant{std::string_view(int32_metadata), std::string_view(int32_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ("123456", *json);
|
||||
}
|
||||
|
||||
// Test int64
|
||||
{
|
||||
auto [int64_metadata, int64_value] = load_variant_data("primitive_int64.metadata", "primitive_int64.value");
|
||||
VariantValue variant{std::string_view(int64_metadata), std::string_view(int64_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ("1234567890123456789", *json);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(VariantValueTest, FloatToJson) {
|
||||
// Test float
|
||||
{
|
||||
auto [float_metadata, float_value] = load_variant_data("primitive_float.metadata", "primitive_float.value");
|
||||
VariantValue variant{std::string_view(float_metadata), std::string_view(float_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ("1234567936.000000", *json);
|
||||
}
|
||||
|
||||
// Test double
|
||||
{
|
||||
auto [double_metadata, double_value] = load_variant_data("primitive_double.metadata", "primitive_double.value");
|
||||
VariantValue variant{std::string_view(double_metadata), std::string_view(double_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ("1234567890.123400", *json);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(VariantValueTest, StringToJson) {
|
||||
// Test long string
|
||||
{
|
||||
auto [string_metadata, string_value] = load_variant_data("primitive_string.metadata", "primitive_string.value");
|
||||
VariantValue variant{std::string_view(string_metadata), std::string_view(string_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ(
|
||||
"This string is longer than 64 bytes and therefore does not fit in a short_string and it also includes "
|
||||
"several non ascii characters such as 🐢, 💖, ♥️, 🎣 and 🤦!!",
|
||||
*json);
|
||||
}
|
||||
|
||||
// Test short string
|
||||
{
|
||||
auto [short_string_metadata, short_string_value] =
|
||||
load_variant_data("short_string.metadata", "short_string.value");
|
||||
VariantValue variant{std::string_view(short_string_metadata), std::string_view(short_string_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ("Less than 64 bytes (❤️ with utf8)", *json);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(VariantValueTest, BinaryToJson) {
|
||||
auto [binary_metadata, binary_value] = load_variant_data("primitive_binary.metadata", "primitive_binary.value");
|
||||
VariantValue variant{std::string_view(binary_metadata), std::string_view(binary_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
ASSERT_EQ("\"AxM33q2+78r+\"", *json);
|
||||
}
|
||||
|
||||
TEST_F(VariantValueTest, DecimalToJson) {
|
||||
// Test decimal4
|
||||
{
|
||||
auto [decimal4_metadata, decimal4_value] =
|
||||
load_variant_data("primitive_decimal4.metadata", "primitive_decimal4.value");
|
||||
VariantValue variant{std::string_view(decimal4_metadata), std::string_view(decimal4_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ("12.34", *json);
|
||||
}
|
||||
|
||||
// Test decimal8
|
||||
{
|
||||
auto [decimal8_metadata, decimal8_value] =
|
||||
load_variant_data("primitive_decimal8.metadata", "primitive_decimal8.value");
|
||||
VariantValue variant{std::string_view(decimal8_metadata), std::string_view(decimal8_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ("12345678.90", *json);
|
||||
}
|
||||
|
||||
// Test decimal16
|
||||
{
|
||||
auto [decimal16_metadata, decimal16_value] =
|
||||
load_variant_data("primitive_decimal16.metadata", "primitive_decimal16.value");
|
||||
VariantValue variant{std::string_view(decimal16_metadata), std::string_view(decimal16_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ("12345678912345678.90", *json);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(VariantValueTest, UUIDToJson) {
|
||||
std::string_view empty_metadata = VariantMetadata::kEmptyMetadata;
|
||||
const uint8_t uuid_chars[] = {VariantUtil::primitiveHeader(VariantPrimitiveType::UUID),
|
||||
0xf2,
|
||||
0x4f,
|
||||
0x9b,
|
||||
0x64,
|
||||
0x81,
|
||||
0xfa,
|
||||
0x49,
|
||||
0xd1,
|
||||
0xb7,
|
||||
0x4e,
|
||||
0x8c,
|
||||
0x09,
|
||||
0xa6,
|
||||
0xe3,
|
||||
0x1c,
|
||||
0x56};
|
||||
|
||||
std::string_view uuid_string(reinterpret_cast<const char*>(uuid_chars), sizeof(uuid_chars));
|
||||
VariantValue variant{empty_metadata, uuid_string};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ("\"f24f9b64-81fa-49d1-b74e-8c09a6e31c56\"", *json);
|
||||
}
|
||||
|
||||
TEST_F(VariantValueTest, TimestampToJson) {
|
||||
// Test timestamp with timezone
|
||||
{
|
||||
auto [ts_metadata, ts_value] = load_variant_data("primitive_timestamp.metadata", "primitive_timestamp.value");
|
||||
VariantValue variant{std::string_view(ts_metadata), std::string_view(ts_value)};
|
||||
|
||||
// Set timezone to -04:00
|
||||
cctz::time_zone tz;
|
||||
ASSERT_TRUE(TimezoneUtils::find_cctz_time_zone("-04:00", tz));
|
||||
|
||||
auto json = variant.to_json(tz);
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ("\"2025-04-16 12:34:56.78-04:00\"", *json);
|
||||
}
|
||||
|
||||
// Test timestamp without timezone
|
||||
{
|
||||
auto [ts_ntz_metadata, ts_ntz_value] =
|
||||
load_variant_data("primitive_timestampntz.metadata", "primitive_timestampntz.value");
|
||||
VariantValue variant{std::string_view(ts_ntz_metadata), std::string_view(ts_ntz_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ("\"2025-04-16 12:34:56.780000\"", *json);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(VariantValueTest, DateToJson) {
|
||||
auto [date_metadata, date_value] = load_variant_data("primitive_date.metadata", "primitive_date.value");
|
||||
VariantValue variant{std::string_view(date_metadata), std::string_view(date_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ("\"2025-04-16\"", *json);
|
||||
}
|
||||
|
||||
TEST_F(VariantValueTest, ObjectToJson) {
|
||||
// Test simple object
|
||||
{
|
||||
auto [object_metadata, object_value] = load_variant_data("object_primitive.metadata", "object_primitive.value");
|
||||
VariantValue variant{std::string_view(object_metadata), std::string_view(object_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
|
||||
// Should be a valid JSON object
|
||||
EXPECT_TRUE(json->front() == '{');
|
||||
EXPECT_TRUE(json->back() == '}');
|
||||
EXPECT_EQ(
|
||||
"{boolean_false_field:false,boolean_true_field:true,double_field:1.23456789,int_field:1,null_field:"
|
||||
"null,string_field:Apache Parquet,timestamp_field:2025-04-16T12:34:56.78}",
|
||||
*json);
|
||||
}
|
||||
|
||||
// Test empty object
|
||||
{
|
||||
auto [object_empty_metadata, object_empty_value] =
|
||||
load_variant_data("object_empty.metadata", "object_empty.value");
|
||||
VariantValue variant{std::string_view(object_empty_metadata), std::string_view(object_empty_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ("{}", *json);
|
||||
}
|
||||
|
||||
// Test nested object
|
||||
{
|
||||
auto [object_nested_metadata, object_nested_value] =
|
||||
load_variant_data("object_nested.metadata", "object_nested.value");
|
||||
VariantValue variant{std::string_view(object_nested_metadata), std::string_view(object_nested_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
|
||||
// Should be a valid JSON object
|
||||
EXPECT_TRUE(json->front() == '{');
|
||||
EXPECT_TRUE(json->back() == '}');
|
||||
EXPECT_EQ(
|
||||
"{id:1,observation:{location:In the "
|
||||
"Volcano,time:12:34:56,value:{humidity:456,temperature:123}},species:{name:lava "
|
||||
"monster,population:6789}}",
|
||||
*json);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(VariantValueTest, ArrayToJson) {
|
||||
// Test primitive array
|
||||
{
|
||||
auto [array_metadata, array_value] = load_variant_data("array_primitive.metadata", "array_primitive.value");
|
||||
VariantValue variant{std::string_view(array_metadata), std::string_view(array_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
|
||||
// Should be a valid JSON array
|
||||
EXPECT_TRUE(json->front() == '[');
|
||||
EXPECT_TRUE(json->back() == ']');
|
||||
EXPECT_EQ("[2,1,5,9]", *json);
|
||||
}
|
||||
|
||||
// Test empty array
|
||||
{
|
||||
auto [array_empty_metadata, array_empty_value] = load_variant_data("array_empty.metadata", "array_empty.value");
|
||||
VariantValue variant{std::string_view(array_empty_metadata), std::string_view(array_empty_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
EXPECT_EQ("[]", *json);
|
||||
}
|
||||
|
||||
// Test nested array
|
||||
{
|
||||
auto [array_nested_metadata, array_nested_value] =
|
||||
load_variant_data("array_nested.metadata", "array_nested.value");
|
||||
VariantValue variant{std::string_view(array_nested_metadata), std::string_view(array_nested_value)};
|
||||
auto json = variant.to_json();
|
||||
ASSERT_TRUE(json.ok());
|
||||
|
||||
// Should be a valid JSON array
|
||||
EXPECT_TRUE(json->front() == '[');
|
||||
EXPECT_TRUE(json->back() == ']');
|
||||
EXPECT_EQ("[{id:1,thing:{names:[Contrarian,Spider]}},null,{id:2,names:[Apple,Ray,null],type:if}]", *json);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(VariantValueTest, InvalidVariant) {
|
||||
// Test invalid variant with empty metadata
|
||||
{
|
||||
Slice empty_slice(""); // Empty slice
|
||||
auto empty_variant = VariantValue::create(empty_slice);
|
||||
ASSERT_FALSE(empty_variant.ok());
|
||||
EXPECT_EQ("Invalid argument: Invalid variant slice: too small to contain size header",
|
||||
empty_variant.status().to_string());
|
||||
}
|
||||
// Test invalid variant with unsupported version
|
||||
{
|
||||
// Create proper format: 4-byte size header + variant data
|
||||
constexpr char v2_metadata_char[] = {0x2, 0x0, 0x0}; // version 2 metadata
|
||||
constexpr uint32_t data_size = sizeof(v2_metadata_char);
|
||||
|
||||
// Construct proper slice with size header
|
||||
std::string variant_data;
|
||||
variant_data.resize(sizeof(uint32_t) + data_size);
|
||||
|
||||
// Write size header (little endian)
|
||||
std::memcpy(variant_data.data(), &data_size, sizeof(uint32_t));
|
||||
// Write variant data
|
||||
std::memcpy(variant_data.data() + sizeof(uint32_t), v2_metadata_char, data_size);
|
||||
|
||||
Slice variant_slice(variant_data);
|
||||
auto unsupported_variant = VariantValue::create(variant_slice);
|
||||
ASSERT_FALSE(unsupported_variant.ok());
|
||||
EXPECT_EQ("Not supported: Unsupported variant version: 2", unsupported_variant.status().to_string());
|
||||
}
|
||||
// Test exceed maximum variant size
|
||||
{
|
||||
// Create size header that indicates data larger than max size
|
||||
constexpr uint32_t oversized_data_size = VariantValue::kMaxVariantSize + 1;
|
||||
std::string variant_data;
|
||||
variant_data.resize(sizeof(uint32_t));
|
||||
|
||||
// Write oversized size in header
|
||||
std::memcpy(variant_data.data(), &oversized_data_size, sizeof(uint32_t));
|
||||
|
||||
Slice variant_slice(variant_data);
|
||||
auto large_variant = VariantValue::create(variant_slice);
|
||||
ASSERT_FALSE(large_variant.ok());
|
||||
EXPECT_EQ("Invalid argument: Variant size exceeds maximum limit: " +
|
||||
std::to_string(VariantValue::kMaxVariantSize + 1) + " > " +
|
||||
std::to_string(VariantValue::kMaxVariantSize),
|
||||
large_variant.status().to_string());
|
||||
}
|
||||
|
||||
// Test variant with size header but insufficient actual data
|
||||
{
|
||||
constexpr uint32_t claimed_size = 100; // Claim 100 bytes
|
||||
constexpr uint32_t actual_data_size = 10; // But only provide 10 bytes
|
||||
|
||||
std::string variant_data;
|
||||
variant_data.resize(sizeof(uint32_t) + actual_data_size);
|
||||
|
||||
// Write size header claiming more data than available
|
||||
std::memcpy(variant_data.data(), &claimed_size, sizeof(uint32_t));
|
||||
// Fill with some dummy data
|
||||
std::memset(variant_data.data() + sizeof(uint32_t), 0x42, actual_data_size);
|
||||
|
||||
Slice variant_slice(variant_data);
|
||||
auto insufficient_variant = VariantValue::create(variant_slice);
|
||||
ASSERT_FALSE(insufficient_variant.ok());
|
||||
EXPECT_EQ("Invalid argument: Invalid variant size: 100 exceeds available data: 10",
|
||||
insufficient_variant.status().to_string());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -102,7 +102,8 @@ enum TPrimitiveType {
|
|||
FUNCTION,
|
||||
VARBINARY,
|
||||
DECIMAL256,
|
||||
INT256
|
||||
INT256,
|
||||
VARIANT
|
||||
}
|
||||
|
||||
enum TTypeNodeType {
|
||||
|
|
|
|||
Loading…
Reference in New Issue