[BugFix] Fix the crash issue caused by using an unintialized column evaluator in iceberg partition writer. (#63782)
Signed-off-by: GavinMar <yangguansuo@starrocks.com>
This commit is contained in:
parent
647ed56800
commit
3b454d6ec9
|
|
@ -56,8 +56,9 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> FileChunkSinkProvider::create_chun
|
|||
std::shared_ptr<formats::FileWriterFactory> file_writer_factory;
|
||||
if (boost::iequals(ctx->format, formats::PARQUET)) {
|
||||
file_writer_factory = std::make_shared<formats::ParquetFileWriterFactory>(
|
||||
fs, ctx->compression_type, ctx->options, ctx->column_names, std::move(column_evaluators), std::nullopt,
|
||||
ctx->executor, runtime_state);
|
||||
fs, ctx->compression_type, ctx->options, ctx->column_names,
|
||||
std::make_shared<std::vector<std::unique_ptr<ColumnEvaluator>>>(std::move(column_evaluators)),
|
||||
std::nullopt, ctx->executor, runtime_state);
|
||||
} else if (boost::iequals(ctx->format, formats::ORC)) {
|
||||
file_writer_factory = std::make_shared<formats::ORCFileWriterFactory>(
|
||||
fs, ctx->compression_type, ctx->options, ctx->column_names, std::move(column_evaluators), ctx->executor,
|
||||
|
|
|
|||
|
|
@ -66,7 +66,8 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> HiveChunkSinkProvider::create_chun
|
|||
ctx->options[formats::ParquetWriterOptions::USE_LEGACY_DECIMAL_ENCODING] = "true";
|
||||
ctx->options[formats::ParquetWriterOptions::USE_INT96_TIMESTAMP_ENCODING] = "true";
|
||||
file_writer_factory = std::make_shared<formats::ParquetFileWriterFactory>(
|
||||
fs, ctx->compression_type, ctx->options, ctx->data_column_names, std::move(data_column_evaluators),
|
||||
fs, ctx->compression_type, ctx->options, ctx->data_column_names,
|
||||
std::make_shared<std::vector<std::unique_ptr<ColumnEvaluator>>>(std::move(data_column_evaluators)),
|
||||
std::nullopt, ctx->executor, runtime_state);
|
||||
} else if (boost::iequals(ctx->format, formats::ORC)) {
|
||||
file_writer_factory = std::make_shared<formats::ORCFileWriterFactory>(
|
||||
|
|
|
|||
|
|
@ -82,7 +82,8 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> IcebergChunkSinkProvider::create_c
|
|||
auto ctx = std::dynamic_pointer_cast<IcebergChunkSinkContext>(context);
|
||||
auto runtime_state = ctx->fragment_context->runtime_state();
|
||||
std::shared_ptr<FileSystem> fs = FileSystem::CreateUniqueFromString(ctx->path, FSOptions(&ctx->cloud_conf)).value();
|
||||
auto column_evaluators = ColumnEvaluator::clone(ctx->column_evaluators);
|
||||
auto column_evaluators = std::make_shared<std::vector<std::unique_ptr<ColumnEvaluator>>>(
|
||||
ColumnEvaluator::clone(ctx->column_evaluators));
|
||||
auto location_provider = std::make_shared<connector::LocationProvider>(
|
||||
ctx->path, print_id(ctx->fragment_context->query_id()), runtime_state->be_number(), driver_id,
|
||||
boost::to_lower_copy(ctx->format));
|
||||
|
|
@ -93,8 +94,8 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> IcebergChunkSinkProvider::create_c
|
|||
std::shared_ptr<formats::FileWriterFactory> file_writer_factory;
|
||||
if (boost::iequals(ctx->format, formats::PARQUET)) {
|
||||
file_writer_factory = std::make_shared<formats::ParquetFileWriterFactory>(
|
||||
fs, ctx->compression_type, ctx->options, ctx->column_names, std::move(column_evaluators),
|
||||
ctx->parquet_field_ids, ctx->executor, runtime_state);
|
||||
fs, ctx->compression_type, ctx->options, ctx->column_names, column_evaluators, ctx->parquet_field_ids,
|
||||
ctx->executor, runtime_state);
|
||||
} else {
|
||||
file_writer_factory = std::make_shared<formats::UnknownFileWriterFactory>(ctx->format);
|
||||
}
|
||||
|
|
@ -107,7 +108,7 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> IcebergChunkSinkProvider::create_c
|
|||
fs,
|
||||
ctx->fragment_context,
|
||||
runtime_state->desc_tbl().get_tuple_descriptor(ctx->tuple_desc_id),
|
||||
&ctx->column_evaluators,
|
||||
column_evaluators,
|
||||
ctx->sort_ordering});
|
||||
partition_chunk_writer_factory = std::make_unique<SpillPartitionChunkWriterFactory>(partition_chunk_writer_ctx);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ struct SpillPartitionChunkWriterContext : public PartitionChunkWriterContext {
|
|||
std::shared_ptr<FileSystem> fs;
|
||||
pipeline::FragmentContext* fragment_context = nullptr;
|
||||
TupleDescriptor* tuple_desc = nullptr;
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>>* column_evaluators;
|
||||
std::shared_ptr<std::vector<std::unique_ptr<ColumnEvaluator>>> column_evaluators;
|
||||
std::shared_ptr<SortOrdering> sort_ordering;
|
||||
};
|
||||
|
||||
|
|
@ -185,13 +185,12 @@ private:
|
|||
std::shared_ptr<FileSystem> _fs = nullptr;
|
||||
pipeline::FragmentContext* _fragment_context = nullptr;
|
||||
TupleDescriptor* _tuple_desc = nullptr;
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>>* _column_evaluators;
|
||||
std::shared_ptr<std::vector<std::unique_ptr<ColumnEvaluator>>> _column_evaluators;
|
||||
std::shared_ptr<SortOrdering> _sort_ordering;
|
||||
std::unique_ptr<ThreadPoolToken> _chunk_spill_token;
|
||||
std::unique_ptr<ThreadPoolToken> _block_merge_token;
|
||||
std::unique_ptr<LoadSpillBlockManager> _load_spill_block_mgr;
|
||||
std::shared_ptr<LoadChunkSpiller> _load_chunk_spiller;
|
||||
//std::function<StatusOr<ColumnPtr>(Chunk*, size_t)> _column_eval_func;
|
||||
TUniqueId _writer_id;
|
||||
|
||||
std::list<ChunkPtr> _chunks;
|
||||
|
|
|
|||
|
|
@ -455,13 +455,12 @@ Status ParquetFileWriter::init() {
|
|||
|
||||
ParquetFileWriter::~ParquetFileWriter() = default;
|
||||
|
||||
ParquetFileWriterFactory::ParquetFileWriterFactory(std::shared_ptr<FileSystem> fs,
|
||||
TCompressionType::type compression_type,
|
||||
std::map<std::string, std::string> options,
|
||||
std::vector<std::string> column_names,
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>>&& column_evaluators,
|
||||
std::optional<std::vector<formats::FileColumnId>> field_ids,
|
||||
PriorityThreadPool* executors, RuntimeState* runtime_state)
|
||||
ParquetFileWriterFactory::ParquetFileWriterFactory(
|
||||
std::shared_ptr<FileSystem> fs, TCompressionType::type compression_type,
|
||||
std::map<std::string, std::string> options, std::vector<std::string> column_names,
|
||||
std::shared_ptr<std::vector<std::unique_ptr<ColumnEvaluator>>> column_evaluators,
|
||||
std::optional<std::vector<formats::FileColumnId>> field_ids, PriorityThreadPool* executors,
|
||||
RuntimeState* runtime_state)
|
||||
: _fs(std::move(fs)),
|
||||
_compression_type(compression_type),
|
||||
_field_ids(std::move(field_ids)),
|
||||
|
|
@ -472,7 +471,7 @@ ParquetFileWriterFactory::ParquetFileWriterFactory(std::shared_ptr<FileSystem> f
|
|||
_runtime_state(runtime_state) {}
|
||||
|
||||
Status ParquetFileWriterFactory::init() {
|
||||
RETURN_IF_ERROR(ColumnEvaluator::init(_column_evaluators));
|
||||
RETURN_IF_ERROR(ColumnEvaluator::init(*_column_evaluators));
|
||||
_parsed_options = std::make_shared<ParquetWriterOptions>();
|
||||
_parsed_options->column_ids = _field_ids;
|
||||
if (_options.contains(ParquetWriterOptions::USE_LEGACY_DECIMAL_ENCODING)) {
|
||||
|
|
@ -506,8 +505,8 @@ StatusOr<WriterAndStream> ParquetFileWriterFactory::create(const std::string& pa
|
|||
auto rollback_action = [fs = _fs, path = path]() {
|
||||
WARN_IF_ERROR(ignore_not_found(fs->delete_file(path)), "fail to delete file");
|
||||
};
|
||||
auto column_evaluators = ColumnEvaluator::clone(_column_evaluators);
|
||||
auto types = ColumnEvaluator::types(_column_evaluators);
|
||||
auto column_evaluators = ColumnEvaluator::clone(*_column_evaluators);
|
||||
auto types = ColumnEvaluator::types(*_column_evaluators);
|
||||
auto async_output_stream =
|
||||
std::make_unique<io::AsyncFlushOutputStream>(std::move(file), _executors, _runtime_state);
|
||||
auto parquet_output_stream = std::make_shared<parquet::AsyncParquetOutputStream>(async_output_stream.get());
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ class ParquetFileWriterFactory : public FileWriterFactory {
|
|||
public:
|
||||
ParquetFileWriterFactory(std::shared_ptr<FileSystem> fs, TCompressionType::type compression_type,
|
||||
std::map<std::string, std::string> options, std::vector<std::string> column_names,
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>>&& column_evaluators,
|
||||
std::shared_ptr<std::vector<std::unique_ptr<ColumnEvaluator>>> column_evaluators,
|
||||
std::optional<std::vector<formats::FileColumnId>> field_ids, PriorityThreadPool* executors,
|
||||
RuntimeState* runtime_state);
|
||||
|
||||
|
|
@ -178,7 +178,7 @@ private:
|
|||
std::shared_ptr<ParquetWriterOptions> _parsed_options;
|
||||
|
||||
std::vector<std::string> _column_names;
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>> _column_evaluators;
|
||||
std::shared_ptr<std::vector<std::unique_ptr<ColumnEvaluator>>> _column_evaluators;
|
||||
PriorityThreadPool* _executors = nullptr;
|
||||
RuntimeState* _runtime_state = nullptr;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1123,10 +1123,11 @@ TEST_F(ParquetFileWriterTest, TestFactory) {
|
|||
std::vector<TypeDescriptor> type_descs{type_bool};
|
||||
|
||||
auto column_names = _make_type_names(type_descs);
|
||||
auto column_evaluators = ColumnSlotIdEvaluator::from_types(type_descs);
|
||||
auto column_evaluators = std::make_shared<std::vector<std::unique_ptr<ColumnEvaluator>>>(
|
||||
ColumnSlotIdEvaluator::from_types(type_descs));
|
||||
auto fs = std::make_shared<MemoryFileSystem>();
|
||||
auto factory = formats::ParquetFileWriterFactory(fs, TCompressionType::NO_COMPRESSION, {}, column_names,
|
||||
std::move(column_evaluators), std::nullopt, nullptr, nullptr);
|
||||
column_evaluators, std::nullopt, nullptr, nullptr);
|
||||
ASSERT_OK(factory.init());
|
||||
auto maybe_writer = factory.create(_file_path);
|
||||
ASSERT_OK(maybe_writer.status());
|
||||
|
|
|
|||
Loading…
Reference in New Issue