[BugFix] Fix pk index cumulative compaction strategy when max_rss_rowid is same (backport #63277) (#63359)
Signed-off-by: luohaha <18810541851@163.com> Co-authored-by: Yixin Luo <18810541851@163.com>
This commit is contained in:
parent
e82a6491a5
commit
bf1bd75627
|
|
@ -402,6 +402,22 @@ void LakePersistentIndex::pick_sstables_for_merge(const PersistentIndexSstableMe
|
|||
if (sstables->size() > max_limit) {
|
||||
sstables->resize(max_limit);
|
||||
}
|
||||
if (!*merge_base_level && sstables->size() > 0 &&
|
||||
sstable_meta.sstables(0).max_rss_rowid() == sstables->back().max_rss_rowid()) {
|
||||
// If base sstable's max_rss_rowid is same as cumulative sstable's max_rss_rowid,
|
||||
// we should force to do base merge.
|
||||
// That's because in `LakePersistentIndex::apply_opcompaction`, we will sort sstable
|
||||
// by max_rss_rowid, and if they are same, the base sstable will be after cumulative sstable,
|
||||
// which is not what we want. E.g.
|
||||
// t1 : (base sst: max_rss_rowid = 4, cumulative sst: max_rss_rowid = 4, cumulative sst: max_rss_rowid = 4)
|
||||
// t2 : do cumulative compaction, merge these two cumulative sst, and get new (cumulative sst: max_rss_rowid = 4).
|
||||
// t3 : apply new cumulative sst, now the order is:
|
||||
// (cumulative sst: max_rss_rowid = 4, base sst: max_rss_rowid = 4)
|
||||
// which is wrong, because base sst should be before cumulative sst.
|
||||
// so we force to do base merge here.
|
||||
*merge_base_level = true;
|
||||
sstables->insert(sstables->begin(), sstable_meta.sstables(0));
|
||||
}
|
||||
}
|
||||
|
||||
Status LakePersistentIndex::prepare_merging_iterator(
|
||||
|
|
|
|||
|
|
@ -262,9 +262,11 @@ TEST_F(LakePersistentIndexTest, test_compaction_strategy) {
|
|||
auto* sstable_pb = sstable_meta.add_sstables();
|
||||
sstable_pb->set_filesize(1000000);
|
||||
sstable_pb->set_filename("aaa.sst");
|
||||
sstable_pb->set_max_rss_rowid(0);
|
||||
for (int i = 0; i < N; i++) {
|
||||
sstable_pb = sstable_meta.add_sstables();
|
||||
sstable_pb->set_filesize(sub_size);
|
||||
sstable_pb->set_max_rss_rowid(i + 1);
|
||||
}
|
||||
LakePersistentIndex::pick_sstables_for_merge(sstable_meta, &sstables, &merge_base_level);
|
||||
if (is_base) {
|
||||
|
|
@ -379,6 +381,82 @@ TEST_F(LakePersistentIndexTest, test_memtable_full) {
|
|||
config::l0_max_mem_usage = old_l0_max_mem_usage;
|
||||
}
|
||||
|
||||
TEST_F(LakePersistentIndexTest, test_compaction_strategy_same_max_rss_rowid) {
|
||||
// Test case for the fix: when base sstable's max_rss_rowid is same as cumulative sstable's max_rss_rowid,
|
||||
// we should force to do base merge instead of cumulative merge.
|
||||
|
||||
PersistentIndexSstableMetaPB sstable_meta;
|
||||
std::vector<PersistentIndexSstablePB> sstables;
|
||||
bool merge_base_level = false;
|
||||
|
||||
// Setup: create a scenario where cumulative merge would normally be preferred
|
||||
// but base and cumulative sstables have the same max_rss_rowid
|
||||
sstable_meta.Clear();
|
||||
sstables.clear();
|
||||
|
||||
// Add base sstable (index 0) with large size
|
||||
auto* base_sstable = sstable_meta.add_sstables();
|
||||
base_sstable->set_filesize(1000000); // 1MB
|
||||
base_sstable->set_filename("base.sst");
|
||||
base_sstable->set_max_rss_rowid(100); // Same max_rss_rowid
|
||||
|
||||
// Add cumulative sstables with small total size (would trigger cumulative merge normally)
|
||||
auto* cumulative_sstable = sstable_meta.add_sstables();
|
||||
cumulative_sstable->set_filesize(50000); // 50KB - much smaller than base
|
||||
cumulative_sstable->set_filename("cumulative1.sst");
|
||||
cumulative_sstable->set_max_rss_rowid(100); // Same max_rss_rowid as base
|
||||
|
||||
// Without the fix, this would choose cumulative merge because:
|
||||
// base_level_bytes * ratio (1000000 * 0.1 = 100000) > cumulative_level_bytes (50000)
|
||||
// But with the fix, it should choose base merge due to same max_rss_rowid
|
||||
|
||||
LakePersistentIndex::pick_sstables_for_merge(sstable_meta, &sstables, &merge_base_level);
|
||||
|
||||
// Verify that base merge is chosen (merge_base_level = true)
|
||||
ASSERT_TRUE(merge_base_level) << "Should force base merge when max_rss_rowid is same";
|
||||
ASSERT_EQ(2, sstables.size()) << "Should include both base and cumulative sstables";
|
||||
ASSERT_EQ("base.sst", sstables[0].filename()) << "Base sstable should be first";
|
||||
ASSERT_EQ("cumulative1.sst", sstables[1].filename()) << "Cumulative sstable should be second";
|
||||
|
||||
// Test the normal case where max_rss_rowid is different
|
||||
sstable_meta.Clear();
|
||||
sstables.clear();
|
||||
|
||||
base_sstable = sstable_meta.add_sstables();
|
||||
base_sstable->set_filesize(1000000);
|
||||
base_sstable->set_filename("base2.sst");
|
||||
base_sstable->set_max_rss_rowid(100); // Different max_rss_rowid
|
||||
|
||||
cumulative_sstable = sstable_meta.add_sstables();
|
||||
cumulative_sstable->set_filesize(50000);
|
||||
cumulative_sstable->set_filename("cumulative2.sst");
|
||||
cumulative_sstable->set_max_rss_rowid(200); // Different max_rss_rowid
|
||||
|
||||
LakePersistentIndex::pick_sstables_for_merge(sstable_meta, &sstables, &merge_base_level);
|
||||
|
||||
// This should choose cumulative merge since max_rss_rowid is different
|
||||
ASSERT_FALSE(merge_base_level) << "Should choose cumulative merge when max_rss_rowid is different";
|
||||
ASSERT_EQ(1, sstables.size()) << "Should only include cumulative sstables";
|
||||
ASSERT_EQ("cumulative2.sst", sstables[0].filename()) << "Only cumulative sstable should be included";
|
||||
|
||||
// Test edge case: empty cumulative sstables
|
||||
sstable_meta.Clear();
|
||||
sstables.clear();
|
||||
|
||||
base_sstable = sstable_meta.add_sstables();
|
||||
base_sstable->set_filesize(1000000);
|
||||
base_sstable->set_filename("base3.sst");
|
||||
base_sstable->set_max_rss_rowid(100);
|
||||
|
||||
// No cumulative sstables added
|
||||
|
||||
LakePersistentIndex::pick_sstables_for_merge(sstable_meta, &sstables, &merge_base_level);
|
||||
|
||||
// Should choose base merge since there are no cumulative sstables
|
||||
ASSERT_TRUE(!merge_base_level) << "Should choose cumulative merge when no cumulative sstables exist";
|
||||
ASSERT_EQ(0, sstables.size()) << "Should be empty since no cumulative sstables exist";
|
||||
}
|
||||
|
||||
TEST_F(LakePersistentIndexTest, test_major_compaction_with_predicate) {
|
||||
auto l0_max_mem_usage = config::l0_max_mem_usage;
|
||||
auto lake_pk_index_cumulative_base_compaction_ratio = config::lake_pk_index_cumulative_base_compaction_ratio;
|
||||
|
|
|
|||
Loading…
Reference in New Issue