[BugFix] Fix pk index cumulative compaction strategy when max_rss_rowid is same (backport #63277) (#63359)

Signed-off-by: luohaha <18810541851@163.com>
Co-authored-by: Yixin Luo <18810541851@163.com>
This commit is contained in:
mergify[bot] 2025-09-22 14:49:20 +08:00 committed by GitHub
parent e82a6491a5
commit bf1bd75627
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 94 additions and 0 deletions

View File

@ -402,6 +402,22 @@ void LakePersistentIndex::pick_sstables_for_merge(const PersistentIndexSstableMe
if (sstables->size() > max_limit) {
sstables->resize(max_limit);
}
if (!*merge_base_level && sstables->size() > 0 &&
sstable_meta.sstables(0).max_rss_rowid() == sstables->back().max_rss_rowid()) {
// If base sstable's max_rss_rowid is same as cumulative sstable's max_rss_rowid,
// we should force to do base merge.
// That's because in `LakePersistentIndex::apply_opcompaction`, we will sort sstable
// by max_rss_rowid, and if they are same, the base sstable will be after cumulative sstable,
// which is not what we want. E.g.
// t1 : (base sst: max_rss_rowid = 4, cumulative sst: max_rss_rowid = 4, cumulative sst: max_rss_rowid = 4)
// t2 : do cumulative compaction, merge these two cumulative sst, and get new (cumulative sst: max_rss_rowid = 4).
// t3 : apply new cumulative sst, now the order is:
// (cumulative sst: max_rss_rowid = 4, base sst: max_rss_rowid = 4)
// which is wrong, because base sst should be before cumulative sst.
// so we force to do base merge here.
*merge_base_level = true;
sstables->insert(sstables->begin(), sstable_meta.sstables(0));
}
}
Status LakePersistentIndex::prepare_merging_iterator(

View File

@ -262,9 +262,11 @@ TEST_F(LakePersistentIndexTest, test_compaction_strategy) {
auto* sstable_pb = sstable_meta.add_sstables();
sstable_pb->set_filesize(1000000);
sstable_pb->set_filename("aaa.sst");
sstable_pb->set_max_rss_rowid(0);
for (int i = 0; i < N; i++) {
sstable_pb = sstable_meta.add_sstables();
sstable_pb->set_filesize(sub_size);
sstable_pb->set_max_rss_rowid(i + 1);
}
LakePersistentIndex::pick_sstables_for_merge(sstable_meta, &sstables, &merge_base_level);
if (is_base) {
@ -379,6 +381,82 @@ TEST_F(LakePersistentIndexTest, test_memtable_full) {
config::l0_max_mem_usage = old_l0_max_mem_usage;
}
TEST_F(LakePersistentIndexTest, test_compaction_strategy_same_max_rss_rowid) {
// Test case for the fix: when base sstable's max_rss_rowid is same as cumulative sstable's max_rss_rowid,
// we should force to do base merge instead of cumulative merge.
PersistentIndexSstableMetaPB sstable_meta;
std::vector<PersistentIndexSstablePB> sstables;
bool merge_base_level = false;
// Setup: create a scenario where cumulative merge would normally be preferred
// but base and cumulative sstables have the same max_rss_rowid
sstable_meta.Clear();
sstables.clear();
// Add base sstable (index 0) with large size
auto* base_sstable = sstable_meta.add_sstables();
base_sstable->set_filesize(1000000); // 1MB
base_sstable->set_filename("base.sst");
base_sstable->set_max_rss_rowid(100); // Same max_rss_rowid
// Add cumulative sstables with small total size (would trigger cumulative merge normally)
auto* cumulative_sstable = sstable_meta.add_sstables();
cumulative_sstable->set_filesize(50000); // 50KB - much smaller than base
cumulative_sstable->set_filename("cumulative1.sst");
cumulative_sstable->set_max_rss_rowid(100); // Same max_rss_rowid as base
// Without the fix, this would choose cumulative merge because:
// base_level_bytes * ratio (1000000 * 0.1 = 100000) > cumulative_level_bytes (50000)
// But with the fix, it should choose base merge due to same max_rss_rowid
LakePersistentIndex::pick_sstables_for_merge(sstable_meta, &sstables, &merge_base_level);
// Verify that base merge is chosen (merge_base_level = true)
ASSERT_TRUE(merge_base_level) << "Should force base merge when max_rss_rowid is same";
ASSERT_EQ(2, sstables.size()) << "Should include both base and cumulative sstables";
ASSERT_EQ("base.sst", sstables[0].filename()) << "Base sstable should be first";
ASSERT_EQ("cumulative1.sst", sstables[1].filename()) << "Cumulative sstable should be second";
// Test the normal case where max_rss_rowid is different
sstable_meta.Clear();
sstables.clear();
base_sstable = sstable_meta.add_sstables();
base_sstable->set_filesize(1000000);
base_sstable->set_filename("base2.sst");
base_sstable->set_max_rss_rowid(100); // Different max_rss_rowid
cumulative_sstable = sstable_meta.add_sstables();
cumulative_sstable->set_filesize(50000);
cumulative_sstable->set_filename("cumulative2.sst");
cumulative_sstable->set_max_rss_rowid(200); // Different max_rss_rowid
LakePersistentIndex::pick_sstables_for_merge(sstable_meta, &sstables, &merge_base_level);
// This should choose cumulative merge since max_rss_rowid is different
ASSERT_FALSE(merge_base_level) << "Should choose cumulative merge when max_rss_rowid is different";
ASSERT_EQ(1, sstables.size()) << "Should only include cumulative sstables";
ASSERT_EQ("cumulative2.sst", sstables[0].filename()) << "Only cumulative sstable should be included";
// Test edge case: empty cumulative sstables
sstable_meta.Clear();
sstables.clear();
base_sstable = sstable_meta.add_sstables();
base_sstable->set_filesize(1000000);
base_sstable->set_filename("base3.sst");
base_sstable->set_max_rss_rowid(100);
// No cumulative sstables added
LakePersistentIndex::pick_sstables_for_merge(sstable_meta, &sstables, &merge_base_level);
// Should choose base merge since there are no cumulative sstables
ASSERT_TRUE(!merge_base_level) << "Should choose cumulative merge when no cumulative sstables exist";
ASSERT_EQ(0, sstables.size()) << "Should be empty since no cumulative sstables exist";
}
TEST_F(LakePersistentIndexTest, test_major_compaction_with_predicate) {
auto l0_max_mem_usage = config::l0_max_mem_usage;
auto lake_pk_index_cumulative_base_compaction_ratio = config::lake_pk_index_cumulative_base_compaction_ratio;