[BugFix] fix exception-safety of json extraction (#63575)
This commit is contained in:
parent
5f951b8c82
commit
cd4aaee776
|
|
@ -113,6 +113,11 @@ Status FlatJsonColumnWriter::_flat_column(Columns& json_datas) {
|
|||
flattener.flatten(json_data);
|
||||
_flat_columns = flattener.mutable_result();
|
||||
|
||||
// IMPORTANT: Check flattener result integrity to prevent inconsistency
|
||||
for (const auto& flat_col : _flat_columns) {
|
||||
flat_col->check_or_die();
|
||||
}
|
||||
|
||||
// recode null column in 1st
|
||||
if (_json_meta->is_nullable()) {
|
||||
auto nulls = NullColumn::create();
|
||||
|
|
@ -222,6 +227,12 @@ Status FlatJsonColumnWriter::_init_flat_writers() {
|
|||
Status FlatJsonColumnWriter::_write_flat_column() {
|
||||
DCHECK(!_flat_columns.empty());
|
||||
DCHECK_EQ(_flat_columns.size(), _flat_writers.size());
|
||||
|
||||
// IMPORTANT: Final integrity check before writing to prevent inconsistency
|
||||
for (const auto& flat_col : _flat_columns) {
|
||||
flat_col->check_or_die();
|
||||
}
|
||||
|
||||
// flat datas
|
||||
for (size_t i = 0; i < _flat_columns.size(); i++) {
|
||||
RETURN_IF_ERROR(_flat_writers[i]->append(*_flat_columns[i]));
|
||||
|
|
|
|||
|
|
@ -96,6 +96,9 @@ static StatusOr<RunTimeCppType<ResultType>> get_number_from_vpjson(const vpack::
|
|||
return Status::JsonFormatError("not a number");
|
||||
}
|
||||
|
||||
// Converts a JSON value to the specified type and appends it to the result column.
|
||||
// Performs type conversion as needed to match the target type.
|
||||
// TODO(murphy): it's duplicated with extract_number/extract_bool/extract_string in json_flattener.cpp
|
||||
template <LogicalType ResultType, bool AllowThrowException>
|
||||
static Status cast_vpjson_to(const vpack::Slice& slice, ColumnBuilder<ResultType>& result) {
|
||||
if constexpr (!lt_is_arithmetic<ResultType> && !lt_is_string<ResultType> && ResultType != TYPE_JSON) {
|
||||
|
|
@ -183,6 +186,7 @@ static Status cast_vpjson_to(const vpack::Slice& slice, ColumnBuilder<ResultType
|
|||
if constexpr (AllowThrowException) {
|
||||
return Status::JsonFormatError(fmt::format("cast from JSON to {} failed", type_to_string(ResultType)));
|
||||
}
|
||||
LOG(INFO) << "vpack::Exception in cast_vpjson_to: " << e.what();
|
||||
result.append_null();
|
||||
}
|
||||
return Status::OK();
|
||||
|
|
|
|||
|
|
@ -45,7 +45,6 @@
|
|||
#include "exprs/column_ref.h"
|
||||
#include "exprs/expr_context.h"
|
||||
#include "gutil/casts.h"
|
||||
#include "gutil/strings/split.h"
|
||||
#include "runtime/types.h"
|
||||
#include "storage/rowset/column_reader.h"
|
||||
#include "types/logical_type.h"
|
||||
|
|
@ -60,35 +59,46 @@ namespace starrocks {
|
|||
namespace flat_json {
|
||||
template <LogicalType TYPE>
|
||||
void extract_number(const vpack::Slice* json, NullableColumn* result) {
|
||||
// NOTE: this function catch all vpack exceptions, so it must be exception-safe, instead of modifying the result into
|
||||
// an inconsistence state where null_column and data_column have different sizes.
|
||||
using CppType = RunTimeCppType<TYPE>;
|
||||
Datum datum;
|
||||
try {
|
||||
if (LIKELY(json->isNumber() || json->isString())) {
|
||||
auto st = get_number_from_vpjson<TYPE>(*json);
|
||||
if (st.ok()) {
|
||||
result->null_column()->append(0);
|
||||
down_cast<RunTimeColumnType<TYPE>*>(result->data_column().get())->append(st.value());
|
||||
} else {
|
||||
result->append_nulls(1);
|
||||
datum.set<CppType>(st.value());
|
||||
}
|
||||
} else if (json->isNone() || json->isNull()) {
|
||||
result->append_nulls(1);
|
||||
datum.set_null();
|
||||
} else if (json->isBool()) {
|
||||
result->null_column()->append(0);
|
||||
down_cast<RunTimeColumnType<TYPE>*>(result->data_column().get())->append(json->getBool());
|
||||
datum.set<CppType>(static_cast<CppType>(json->getBool()));
|
||||
} else {
|
||||
result->append_nulls(1);
|
||||
datum.set_null();
|
||||
}
|
||||
} catch (const vpack::Exception& e) {
|
||||
LOG(INFO) << "vpack::Exception in extract_number: " << e.what();
|
||||
datum.set_null();
|
||||
}
|
||||
|
||||
if (datum.is_null()) {
|
||||
result->append_nulls(1);
|
||||
} else {
|
||||
result->append_datum(datum);
|
||||
}
|
||||
}
|
||||
|
||||
void extract_bool(const vpack::Slice* json, NullableColumn* result) {
|
||||
// NOTE: this function catch all vpack exceptions, so it must be exception-safe, instead of modifying the result into
|
||||
// an inconsistence state where null_column and data_column have different sizes.
|
||||
using CppType = RunTimeCppType<TYPE_BOOLEAN>;
|
||||
Datum datum;
|
||||
try {
|
||||
if (json->isNone() || json->isNull()) {
|
||||
result->append_nulls(1);
|
||||
datum.set_null();
|
||||
} else if (json->isBool()) {
|
||||
auto res = json->getBool();
|
||||
result->append_datum(res);
|
||||
datum.set<CppType>(res);
|
||||
} else if (json->isString()) {
|
||||
vpack::ValueLength len;
|
||||
const char* str = json->getStringUnchecked(len);
|
||||
|
|
@ -97,42 +107,50 @@ void extract_bool(const vpack::Slice* json, NullableColumn* result) {
|
|||
if (parseResult != StringParser::PARSE_SUCCESS || std::isnan(r) || std::isinf(r)) {
|
||||
bool b = StringParser::string_to_bool(str, len, &parseResult);
|
||||
if (parseResult != StringParser::PARSE_SUCCESS) {
|
||||
result->append_nulls(1);
|
||||
datum.set_null();
|
||||
} else {
|
||||
result->append_datum(b);
|
||||
datum.set<CppType>(b);
|
||||
}
|
||||
} else {
|
||||
result->append_datum(r != 0);
|
||||
datum.set<CppType>(r != 0);
|
||||
}
|
||||
} else if (json->isNumber()) {
|
||||
auto res = json->getNumber<double>();
|
||||
result->append_datum(res != 0);
|
||||
datum.set<CppType>(res != 0);
|
||||
} else {
|
||||
result->append_nulls(1);
|
||||
datum.set_null();
|
||||
}
|
||||
} catch (const vpack::Exception& e) {
|
||||
LOG(INFO) << "vpack::Exception in extract_bool: " << e.what();
|
||||
datum.set_null();
|
||||
}
|
||||
|
||||
if (datum.is_null()) {
|
||||
result->append_nulls(1);
|
||||
} else {
|
||||
result->append_datum(datum);
|
||||
}
|
||||
}
|
||||
|
||||
void extract_string(const vpack::Slice* json, NullableColumn* result) {
|
||||
// NOTE: this function catch all vpack exceptions, so it must be exception-safe, instead of modifying the result into
|
||||
// an inconsistence state where null_column and data_column have different sizes.
|
||||
try {
|
||||
if (json->isNone() || json->isNull()) {
|
||||
result->append_nulls(1);
|
||||
} else if (json->isString()) {
|
||||
result->null_column()->append(0);
|
||||
vpack::ValueLength len;
|
||||
const char* str = json->getStringUnchecked(len);
|
||||
down_cast<BinaryColumn*>(result->data_column().get())->append(Slice(str, len));
|
||||
result->append_datum(Datum(Slice(str, len)));
|
||||
} else {
|
||||
result->null_column()->append(0);
|
||||
vpack::Options options = vpack::Options::Defaults;
|
||||
options.singleLinePrettyPrint = true;
|
||||
options.dumpAttributesInIndexOrder = false;
|
||||
std::string str = json->toJson(&options);
|
||||
down_cast<BinaryColumn*>(result->data_column().get())->append(Slice(str));
|
||||
result->append_datum(Datum(Slice(str)));
|
||||
}
|
||||
} catch (const vpack::Exception& e) {
|
||||
LOG(INFO) << "vpack::Exception in extract_string: " << e.what();
|
||||
result->append_nulls(1);
|
||||
}
|
||||
}
|
||||
|
|
@ -141,8 +159,8 @@ void extract_json(const vpack::Slice* json, NullableColumn* result) {
|
|||
if (json->isNone()) {
|
||||
result->append_nulls(1);
|
||||
} else {
|
||||
result->null_column()->append(0);
|
||||
down_cast<JsonColumn*>(result->data_column().get())->append(JsonValue(*json));
|
||||
result->null_column()->append(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -793,6 +811,11 @@ void JsonFlattener::flatten(const Column* json_column) {
|
|||
for (auto& col : _flat_columns) {
|
||||
DCHECK_EQ(col->size(), json_column->size());
|
||||
}
|
||||
|
||||
// IMPORTANT: Check column integrity to prevent NullableColumn inconsistency
|
||||
for (auto& col : _flat_columns) {
|
||||
col->check_or_die();
|
||||
}
|
||||
}
|
||||
|
||||
template <bool REMAIN>
|
||||
|
|
@ -1003,9 +1026,14 @@ ColumnPtr JsonMerger::merge(const Columns& columns) {
|
|||
_src_columns.clear();
|
||||
if (_output_nullable) {
|
||||
down_cast<NullableColumn*>(_result.get())->update_has_null();
|
||||
// IMPORTANT: Check column integrity to prevent NullableColumn inconsistency
|
||||
_result->check_or_die();
|
||||
return _result;
|
||||
} else {
|
||||
return down_cast<NullableColumn*>(_result.get())->data_column();
|
||||
auto data_column = down_cast<NullableColumn*>(_result.get())->data_column();
|
||||
// IMPORTANT: Check column integrity to prevent NullableColumn inconsistency
|
||||
data_column->check_or_die();
|
||||
return data_column;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1426,6 +1454,12 @@ Status HyperJsonTransformer::trans(const Columns& columns) {
|
|||
DCHECK_EQ(rows, _dst_columns[i]->size());
|
||||
}
|
||||
}
|
||||
|
||||
// IMPORTANT: Check column integrity to prevent NullableColumn inconsistency
|
||||
for (auto& col : _dst_columns) {
|
||||
col->check_or_die();
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue