[BugFix] fix exception-safety of json extraction (#63575)

This commit is contained in:
Murphy 2025-09-28 10:15:41 +08:00 committed by GitHub
parent 5f951b8c82
commit cd4aaee776
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 71 additions and 22 deletions

View File

@ -113,6 +113,11 @@ Status FlatJsonColumnWriter::_flat_column(Columns& json_datas) {
flattener.flatten(json_data);
_flat_columns = flattener.mutable_result();
// IMPORTANT: Check flattener result integrity to prevent inconsistency
for (const auto& flat_col : _flat_columns) {
flat_col->check_or_die();
}
// recode null column in 1st
if (_json_meta->is_nullable()) {
auto nulls = NullColumn::create();
@ -222,6 +227,12 @@ Status FlatJsonColumnWriter::_init_flat_writers() {
Status FlatJsonColumnWriter::_write_flat_column() {
DCHECK(!_flat_columns.empty());
DCHECK_EQ(_flat_columns.size(), _flat_writers.size());
// IMPORTANT: Final integrity check before writing to prevent inconsistency
for (const auto& flat_col : _flat_columns) {
flat_col->check_or_die();
}
// flat datas
for (size_t i = 0; i < _flat_columns.size(); i++) {
RETURN_IF_ERROR(_flat_writers[i]->append(*_flat_columns[i]));

View File

@ -96,6 +96,9 @@ static StatusOr<RunTimeCppType<ResultType>> get_number_from_vpjson(const vpack::
return Status::JsonFormatError("not a number");
}
// Converts a JSON value to the specified type and appends it to the result column.
// Performs type conversion as needed to match the target type.
// TODO(murphy): it's duplicated with extract_number/extract_bool/extract_string in json_flattener.cpp
template <LogicalType ResultType, bool AllowThrowException>
static Status cast_vpjson_to(const vpack::Slice& slice, ColumnBuilder<ResultType>& result) {
if constexpr (!lt_is_arithmetic<ResultType> && !lt_is_string<ResultType> && ResultType != TYPE_JSON) {
@ -183,6 +186,7 @@ static Status cast_vpjson_to(const vpack::Slice& slice, ColumnBuilder<ResultType
if constexpr (AllowThrowException) {
return Status::JsonFormatError(fmt::format("cast from JSON to {} failed", type_to_string(ResultType)));
}
LOG(INFO) << "vpack::Exception in cast_vpjson_to: " << e.what();
result.append_null();
}
return Status::OK();

View File

@ -45,7 +45,6 @@
#include "exprs/column_ref.h"
#include "exprs/expr_context.h"
#include "gutil/casts.h"
#include "gutil/strings/split.h"
#include "runtime/types.h"
#include "storage/rowset/column_reader.h"
#include "types/logical_type.h"
@ -60,35 +59,46 @@ namespace starrocks {
namespace flat_json {
template <LogicalType TYPE>
void extract_number(const vpack::Slice* json, NullableColumn* result) {
// NOTE: this function catch all vpack exceptions, so it must be exception-safe, instead of modifying the result into
// an inconsistence state where null_column and data_column have different sizes.
using CppType = RunTimeCppType<TYPE>;
Datum datum;
try {
if (LIKELY(json->isNumber() || json->isString())) {
auto st = get_number_from_vpjson<TYPE>(*json);
if (st.ok()) {
result->null_column()->append(0);
down_cast<RunTimeColumnType<TYPE>*>(result->data_column().get())->append(st.value());
} else {
result->append_nulls(1);
datum.set<CppType>(st.value());
}
} else if (json->isNone() || json->isNull()) {
result->append_nulls(1);
datum.set_null();
} else if (json->isBool()) {
result->null_column()->append(0);
down_cast<RunTimeColumnType<TYPE>*>(result->data_column().get())->append(json->getBool());
datum.set<CppType>(static_cast<CppType>(json->getBool()));
} else {
result->append_nulls(1);
datum.set_null();
}
} catch (const vpack::Exception& e) {
LOG(INFO) << "vpack::Exception in extract_number: " << e.what();
datum.set_null();
}
if (datum.is_null()) {
result->append_nulls(1);
} else {
result->append_datum(datum);
}
}
void extract_bool(const vpack::Slice* json, NullableColumn* result) {
// NOTE: this function catch all vpack exceptions, so it must be exception-safe, instead of modifying the result into
// an inconsistence state where null_column and data_column have different sizes.
using CppType = RunTimeCppType<TYPE_BOOLEAN>;
Datum datum;
try {
if (json->isNone() || json->isNull()) {
result->append_nulls(1);
datum.set_null();
} else if (json->isBool()) {
auto res = json->getBool();
result->append_datum(res);
datum.set<CppType>(res);
} else if (json->isString()) {
vpack::ValueLength len;
const char* str = json->getStringUnchecked(len);
@ -97,42 +107,50 @@ void extract_bool(const vpack::Slice* json, NullableColumn* result) {
if (parseResult != StringParser::PARSE_SUCCESS || std::isnan(r) || std::isinf(r)) {
bool b = StringParser::string_to_bool(str, len, &parseResult);
if (parseResult != StringParser::PARSE_SUCCESS) {
result->append_nulls(1);
datum.set_null();
} else {
result->append_datum(b);
datum.set<CppType>(b);
}
} else {
result->append_datum(r != 0);
datum.set<CppType>(r != 0);
}
} else if (json->isNumber()) {
auto res = json->getNumber<double>();
result->append_datum(res != 0);
datum.set<CppType>(res != 0);
} else {
result->append_nulls(1);
datum.set_null();
}
} catch (const vpack::Exception& e) {
LOG(INFO) << "vpack::Exception in extract_bool: " << e.what();
datum.set_null();
}
if (datum.is_null()) {
result->append_nulls(1);
} else {
result->append_datum(datum);
}
}
void extract_string(const vpack::Slice* json, NullableColumn* result) {
// NOTE: this function catch all vpack exceptions, so it must be exception-safe, instead of modifying the result into
// an inconsistence state where null_column and data_column have different sizes.
try {
if (json->isNone() || json->isNull()) {
result->append_nulls(1);
} else if (json->isString()) {
result->null_column()->append(0);
vpack::ValueLength len;
const char* str = json->getStringUnchecked(len);
down_cast<BinaryColumn*>(result->data_column().get())->append(Slice(str, len));
result->append_datum(Datum(Slice(str, len)));
} else {
result->null_column()->append(0);
vpack::Options options = vpack::Options::Defaults;
options.singleLinePrettyPrint = true;
options.dumpAttributesInIndexOrder = false;
std::string str = json->toJson(&options);
down_cast<BinaryColumn*>(result->data_column().get())->append(Slice(str));
result->append_datum(Datum(Slice(str)));
}
} catch (const vpack::Exception& e) {
LOG(INFO) << "vpack::Exception in extract_string: " << e.what();
result->append_nulls(1);
}
}
@ -141,8 +159,8 @@ void extract_json(const vpack::Slice* json, NullableColumn* result) {
if (json->isNone()) {
result->append_nulls(1);
} else {
result->null_column()->append(0);
down_cast<JsonColumn*>(result->data_column().get())->append(JsonValue(*json));
result->null_column()->append(0);
}
}
@ -793,6 +811,11 @@ void JsonFlattener::flatten(const Column* json_column) {
for (auto& col : _flat_columns) {
DCHECK_EQ(col->size(), json_column->size());
}
// IMPORTANT: Check column integrity to prevent NullableColumn inconsistency
for (auto& col : _flat_columns) {
col->check_or_die();
}
}
template <bool REMAIN>
@ -1003,9 +1026,14 @@ ColumnPtr JsonMerger::merge(const Columns& columns) {
_src_columns.clear();
if (_output_nullable) {
down_cast<NullableColumn*>(_result.get())->update_has_null();
// IMPORTANT: Check column integrity to prevent NullableColumn inconsistency
_result->check_or_die();
return _result;
} else {
return down_cast<NullableColumn*>(_result.get())->data_column();
auto data_column = down_cast<NullableColumn*>(_result.get())->data_column();
// IMPORTANT: Check column integrity to prevent NullableColumn inconsistency
data_column->check_or_die();
return data_column;
}
}
@ -1426,6 +1454,12 @@ Status HyperJsonTransformer::trans(const Columns& columns) {
DCHECK_EQ(rows, _dst_columns[i]->size());
}
}
// IMPORTANT: Check column integrity to prevent NullableColumn inconsistency
for (auto& col : _dst_columns) {
col->check_or_die();
}
return Status::OK();
}