[BugFix] Fix async delta writer crash due to null pointer (backport #62626) (#62651)

Signed-off-by: xiangguangyxg <xiangguangyxg@gmail.com>
Co-authored-by: xiangguangyxg <110401425+xiangguangyxg@users.noreply.github.com>
This commit is contained in:
mergify[bot] 2025-09-02 10:28:34 +00:00 committed by GitHub
parent 4a333abbd2
commit ebf06bbe4e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 59 additions and 8 deletions

View File

@ -452,8 +452,8 @@ void LakeTabletsChannel::add_chunk(Chunk* chunk, const PTabletWriterAddChunkRequ
}
total_row_num += size;
int64_t tablet_id = tablet_ids[row_indexes[from]];
auto& dw = _delta_writers[tablet_id];
if (dw == nullptr) {
auto delta_writer_iter = _delta_writers.find(tablet_id);
if (delta_writer_iter == _delta_writers.end()) {
LOG(WARNING) << "LakeTabletsChannel txn_id: " << _txn_id << " load_id: " << print_id(request.id())
<< " not found tablet_id: " << tablet_id;
response->mutable_status()->set_status_code(TStatusCode::INTERNAL_ERROR);
@ -464,6 +464,7 @@ void LakeTabletsChannel::add_chunk(Chunk* chunk, const PTabletWriterAddChunkRequ
}
// back pressure OlapTableSink since there are too many memtables need to flush
auto& dw = delta_writer_iter->second;
while (dw->queueing_memtable_num() >= config::max_queueing_memtable_per_tablet) {
if (watch.elapsed_time() / 1000000 > request.timeout_ms()) {
LOG(INFO) << "LakeTabletsChannel txn_id: " << _txn_id << " load_id: " << print_id(request.id())
@ -549,7 +550,18 @@ void LakeTabletsChannel::add_chunk(Chunk* chunk, const PTabletWriterAddChunkRequ
std::set<long> immutable_tablet_ids;
for (auto tablet_id : request.tablet_ids()) {
auto& writer = _delta_writers[tablet_id];
auto writer_iter = _delta_writers.find(tablet_id);
if (writer_iter == _delta_writers.end()) {
LOG(WARNING) << "LakeTabletsChannel txn_id: " << _txn_id << " load_id: " << print_id(request.id())
<< " not found tablet_id: " << tablet_id;
response->mutable_status()->set_status_code(TStatusCode::INTERNAL_ERROR);
response->mutable_status()->add_error_msgs(
fmt::format("Failed to add_chunk since tablet_id {} not exists, txn_id: {}, load_id: {}", tablet_id,
_txn_id, print_id(request.id())));
return;
}
auto& writer = writer_iter->second;
if (writer->is_immutable() && immutable_tablet_ids.count(tablet_id) == 0) {
response->add_immutable_tablet_ids(tablet_id);
response->add_immutable_partition_ids(writer->partition_id());

View File

@ -44,7 +44,7 @@ TEST(AggCompressedKey, could_bound) {
used_bytes.resize(1);
auto type1 = TypeDescriptor(TYPE_INT);
groupby.emplace_back(type1, false);
groupby.emplace_back(ColumnType{type1, false});
std::vector<std::optional<std::pair<VectorizedLiteral*, VectorizedLiteral*>>> ranges;
auto* min = pool.add(new VectorizedLiteral(ColumnHelper::create_const_column<TYPE_INT>(0, 1), type1));
auto* max = pool.add(new VectorizedLiteral(ColumnHelper::create_const_column<TYPE_INT>(100, 1), type1));
@ -70,8 +70,8 @@ TEST(AggCompressedKey, could_bound) {
used_bytes.resize(2);
auto type1 = TypeDescriptor(TYPE_INT);
groupby.emplace_back(type1, false);
groupby.emplace_back(type1, true);
groupby.emplace_back(ColumnType{type1, false});
groupby.emplace_back(ColumnType{type1, true});
std::vector<std::optional<std::pair<VectorizedLiteral*, VectorizedLiteral*>>> ranges;
auto* min = pool.add(new VectorizedLiteral(ColumnHelper::create_const_column<TYPE_INT>(0, 1), type1));
auto* max = pool.add(new VectorizedLiteral(ColumnHelper::create_const_column<TYPE_INT>(100, 1), type1));
@ -98,8 +98,8 @@ TEST(AggCompressedKey, could_bound) {
used_bytes.resize(2);
auto type1 = TypeDescriptor::create_decimalv3_type(TYPE_DECIMAL128, 8, 4);
groupby.emplace_back(type1, false);
groupby.emplace_back(type1, true);
groupby.emplace_back(ColumnType{type1, false});
groupby.emplace_back(ColumnType{type1, true});
std::vector<std::optional<std::pair<VectorizedLiteral*, VectorizedLiteral*>>> ranges;
auto* min = pool.add(
new VectorizedLiteral(ColumnHelper::create_const_decimal_column<TYPE_DECIMAL128>(0, 8, 4, 1), type1));

View File

@ -690,6 +690,45 @@ TEST_F(LakeTabletsChannelTest, test_write_failed) {
ASSERT_FALSE(fs::path_exist(_tablet_manager->txn_log_location(10088, kTxnId)));
}
TEST_F(LakeTabletsChannelTest, test_tablet_not_existed) {
auto open_request = _open_request;
open_request.set_num_senders(1);
ASSERT_OK(_tablets_channel->open(open_request, &_open_response, _schema_param, false));
constexpr int kChunkSize = 128;
constexpr int kChunkSizePerTablet = kChunkSize / 4;
auto chunk = generate_data(kChunkSize);
PTabletWriterAddChunkRequest add_chunk_request;
PTabletWriterAddBatchResult add_chunk_response;
add_chunk_request.set_index_id(kIndexId);
add_chunk_request.set_sender_id(0);
add_chunk_request.set_eos(false);
add_chunk_request.set_packet_seq(0);
for (int i = 0; i < kChunkSize; i++) {
int64_t tablet_id = 10086 + (i / kChunkSizePerTablet);
add_chunk_request.add_tablet_ids(tablet_id);
add_chunk_request.add_partition_ids(tablet_id < 10088 ? 10 : 11);
}
ASSIGN_OR_ABORT(auto chunk_pb, serde::ProtobufChunkSerde::serialize(chunk));
add_chunk_request.mutable_chunk()->Swap(&chunk_pb);
add_chunk_request.add_tablet_ids(10000); // Not existed tablet id
bool close_channel;
_tablets_channel->add_chunk(&chunk, add_chunk_request, &add_chunk_response, &close_channel);
ASSERT_NE(TStatusCode::INTERNAL_ERROR, add_chunk_response.status().status_code());
ASSERT_FALSE(close_channel);
_tablets_channel->abort();
ASSERT_FALSE(fs::path_exist(_tablet_manager->txn_log_location(10086, kTxnId)));
ASSERT_FALSE(fs::path_exist(_tablet_manager->txn_log_location(10087, kTxnId)));
ASSERT_FALSE(fs::path_exist(_tablet_manager->txn_log_location(10088, kTxnId)));
}
TEST_F(LakeTabletsChannelTest, test_empty_tablet) {
auto open_request = _open_request;
open_request.set_num_senders(1);