[Enhancement] create string column zonemap with prefix truncation (backport #61975) (#62317)

Co-authored-by: Murphy <96611012+murphyatwork@users.noreply.github.com>
Co-authored-by: Cursor Agent <cursoragent@cursor.com>
This commit is contained in:
mergify[bot] 2025-08-26 06:33:09 +00:00 committed by GitHub
parent 6ca0ee936b
commit 98032f2b0a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 139 additions and 15 deletions

View File

@ -323,6 +323,11 @@ CONF_mBool(enable_zonemap_index_memory_page_cache, "true");
// whether to enable the ordinal index memory cache
CONF_mBool(enable_ordinal_index_memory_page_cache, "true");
// Enable ZoneMap for string (CHAR/VARCHAR) columns using prefix-based min/max
CONF_mBool(enable_string_prefix_zonemap, "true");
// Prefix length used for string ZoneMap min/max when enabled
CONF_mInt32(string_prefix_zonemap_prefix_len, "16");
CONF_mInt32(base_compaction_check_interval_seconds, "60");
CONF_mInt64(min_base_compaction_num_singleton_deltas, "5");
CONF_mInt64(max_base_compaction_num_singleton_deltas, "100");

View File

@ -394,6 +394,9 @@ Status ScalarColumnWriter::init() {
if (_opts.need_zone_map) {
_has_index_builder = true;
_zone_map_index_builder = ZoneMapIndexWriter::create(type_info());
if (_opts.zone_map_truncate_string) {
_zone_map_index_builder->enable_truncate_string();
}
}
if (_opts.need_bitmap_index) {
_has_index_builder = true;

View File

@ -73,6 +73,7 @@ struct ColumnWriterOptions {
// space saving = 1 - compressed_size / uncompressed_size
double compression_min_space_saving = 0.1;
bool need_zone_map = false;
bool zone_map_truncate_string = false; // truncate string at write time to reduce comparison/metadata overhead.
bool need_bitmap_index = false;
bool need_bloom_filter = false;
bool need_vector_index = false;

View File

@ -194,6 +194,8 @@ Status FlatJsonColumnWriter::_init_flat_writers() {
opts.meta->set_name(_flat_paths[i]);
opts.need_flat = false;
opts.need_zone_map = config::json_flat_create_zonemap && is_zone_map_key_type(_flat_types[i]);
opts.need_zone_map |= config::enable_string_prefix_zonemap && is_string_type(_flat_types[i]);
opts.zone_map_truncate_string = config::enable_string_prefix_zonemap && is_string_type(_flat_types[i]);
// Set global dict for sub-columns that support it
if (is_string_type(_flat_types[i])) {

View File

@ -169,6 +169,10 @@ Status SegmentWriter::init(const std::vector<uint32_t>& column_indexes, bool has
const bool enable_dup_zone_map =
_tablet_schema->keys_type() == KeysType::DUP_KEYS && is_zone_map_key_type(column.type());
opts.need_zone_map = column.is_key() || enable_pk_zone_map || enable_dup_zone_map || column.is_sort_key();
// Create prefix zonemap for string type, but only truncate it for non-key columns
opts.need_zone_map |= config::enable_string_prefix_zonemap && is_string_type(column.type());
opts.zone_map_truncate_string =
config::enable_string_prefix_zonemap && is_string_type(column.type()) && !column.is_key();
if (column.type() == LogicalType::TYPE_ARRAY) {
opts.need_zone_map = false;
}

View File

@ -38,6 +38,7 @@
#include "column/column_helper.h"
#include "column/column_viewer.h"
#include "common/config.h"
#include "storage/chunk_helper.h"
#include "storage/decimal_type_info.h"
#include "storage/olap_define.h"
@ -169,6 +170,8 @@ public:
// length is only used for CHAR/VARCHAR, and used to allocate enough memory for min/max value.
explicit ZoneMapIndexWriterImpl(TypeInfo* type_info);
void enable_truncate_string() override { _truncate_string = true; }
void add_values(const void* values, size_t count) override;
void add_nulls(uint32_t count) override { _page_zone_map.has_null |= count > 0; }
@ -181,6 +184,8 @@ public:
uint64_t size() const override { return _estimated_size; }
private:
void _truncate_string_minmax_if_needed(ZoneMap<type>* zm);
void _reset_zone_map(ZoneMap<type>* zone_map) {
// we should allocate max varchar length and set to max for min value
zone_map->min_value.reset(_type_info);
@ -197,6 +202,9 @@ private:
// serialized ZoneMapPB for each data page
std::vector<std::string> _values;
uint64_t _estimated_size = 0;
// Whether truncate the string to `string_prefix_zonemap_prefix_len` length
bool _truncate_string = false;
};
template <LogicalType type>
@ -205,6 +213,28 @@ ZoneMapIndexWriterImpl<type>::ZoneMapIndexWriterImpl(TypeInfo* type_info) : _typ
_reset_zone_map(&_segment_zone_map);
}
// Truncate string min/max values at write time to reduce comparison/metadata overhead.
// For max values that are truncated, append 0xFF to preserve an upper bound.
template <LogicalType LT>
void ZoneMapIndexWriterImpl<LT>::_truncate_string_minmax_if_needed(ZoneMap<LT>* zm) {
if (!_truncate_string) {
return;
}
const size_t kPrefixLen = std::max<int32_t>(8, config::string_prefix_zonemap_prefix_len);
if constexpr (is_string_type(LT) || is_binary_type(LT)) {
auto& min_slice = zm->min_value.value;
auto& max_slice = zm->max_value.value;
if (min_slice.size > kPrefixLen) {
min_slice.size = kPrefixLen;
}
if (max_slice.size > kPrefixLen) {
// Safe, original buffer has length > kPrefixLen, ensure buffer has room for 0xFF
max_slice.data[kPrefixLen] = static_cast<char>(0xFF);
max_slice.size = kPrefixLen + 1;
}
}
}
template <LogicalType type>
void ZoneMapIndexWriterImpl<type>::add_values(const void* values, size_t count) {
if (count > 0) {
@ -215,10 +245,12 @@ void ZoneMapIndexWriterImpl<type>::add_values(const void* values, size_t count)
if (unaligned_load<CppType>(pmin) < _page_zone_map.min_value.value) {
_page_zone_map.min_value.resize_container_for_fit(_type_info, pmin);
_type_info->direct_copy(&_page_zone_map.min_value.value, pmin);
_truncate_string_minmax_if_needed(&_page_zone_map);
}
if (unaligned_load<CppType>(pmax) > _page_zone_map.max_value.value) {
_page_zone_map.max_value.resize_container_for_fit(_type_info, pmax);
_type_info->direct_copy(&_page_zone_map.max_value.value, pmax);
_truncate_string_minmax_if_needed(&_page_zone_map);
}
} else {
_page_zone_map.min_value.resize_container_for_fit(_type_info, pmin);
@ -226,6 +258,7 @@ void ZoneMapIndexWriterImpl<type>::add_values(const void* values, size_t count)
_page_zone_map.max_value.resize_container_for_fit(_type_info, pmax);
_type_info->direct_copy(&_page_zone_map.max_value.value, pmax);
_truncate_string_minmax_if_needed(&_page_zone_map);
}
_page_zone_map.has_not_null = true;
}
@ -239,10 +272,12 @@ Status ZoneMapIndexWriterImpl<type>::flush() {
if (_page_zone_map.min_value.value < _segment_zone_map.min_value.value) {
_segment_zone_map.min_value.resize_container_for_fit(_type_info, &_page_zone_map.min_value.value);
_type_info->direct_copy(&_segment_zone_map.min_value.value, &_page_zone_map.min_value.value);
_truncate_string_minmax_if_needed(&_segment_zone_map);
}
if (_page_zone_map.max_value.value > _segment_zone_map.max_value.value) {
_segment_zone_map.max_value.resize_container_for_fit(_type_info, &_page_zone_map.max_value.value);
_type_info->direct_copy(&_segment_zone_map.max_value.value, &_page_zone_map.max_value.value);
_truncate_string_minmax_if_needed(&_segment_zone_map);
}
} else {
_segment_zone_map.min_value.resize_container_for_fit(_type_info, &_page_zone_map.min_value.value);
@ -250,6 +285,7 @@ Status ZoneMapIndexWriterImpl<type>::flush() {
_segment_zone_map.max_value.resize_container_for_fit(_type_info, &_page_zone_map.max_value.value);
_type_info->direct_copy(&_segment_zone_map.max_value.value, &_page_zone_map.max_value.value);
_truncate_string_minmax_if_needed(&_segment_zone_map);
}
_segment_zone_map.has_not_null = true;
}

View File

@ -62,6 +62,8 @@ public:
virtual ~ZoneMapIndexWriter() = default;
virtual void enable_truncate_string() = 0;
virtual void add_values(const void* values, size_t count) = 0;
virtual void add_nulls(uint32_t count) = 0;

View File

@ -143,14 +143,8 @@ inline bool is_decimalv3_field_type(LogicalType type) {
LogicalType string_to_logical_type(const std::string& type_str);
const char* logical_type_to_string(LogicalType type);
inline bool is_binary_type(LogicalType type) {
switch (type) {
case TYPE_BINARY:
case TYPE_VARBINARY:
return true;
default:
return false;
}
constexpr bool is_binary_type(LogicalType type) {
return type == TYPE_BINARY || type == TYPE_VARBINARY;
}
inline bool is_scalar_field_type(LogicalType type) {

View File

@ -40,6 +40,7 @@
#include <string>
#include "cache/object_cache/page_cache.h"
#include "common/config.h"
#include "fs/fs_memory.h"
#include "storage/tablet_schema_helper.h"
#include "testutil/assert.h"
@ -100,13 +101,12 @@ protected:
ASSERT_EQ(3, column_zone_map.num_pages());
const std::vector<ZoneMapPB>& zone_maps = column_zone_map.page_zone_maps();
ASSERT_EQ(3, zone_maps.size());
ASSERT_EQ("aaaa", zone_maps[0].min());
ASSERT_EQ("ffff", zone_maps[0].max());
size_t pfx = config::enable_string_prefix_zonemap ? (size_t)config::string_prefix_zonemap_prefix_len : 64;
check_result_prefix(zone_maps[0], true, true, "aaaa", "ffff", false, true, pfx);
ASSERT_EQ(false, zone_maps[0].has_null());
ASSERT_EQ(true, zone_maps[0].has_not_null());
ASSERT_EQ("aaaaa", zone_maps[1].min());
ASSERT_EQ("fffff", zone_maps[1].max());
check_result_prefix(zone_maps[1], true, true, "aaaaa", "fffff", true, true, pfx);
ASSERT_EQ(true, zone_maps[1].has_null());
ASSERT_EQ(true, zone_maps[1].has_not_null());
@ -119,6 +119,23 @@ protected:
void check_result(const ZoneMapPB& zone_map, bool has_min, bool has_max, const std::string& min,
const std::string& max, bool has_null, bool has_not_null);
// Check with prefix truncation semantics for string zonemap entries: min is prefix; max is prefix possibly with 0xFF.
void check_result_prefix(const ZoneMapPB& zone_map, bool has_min, bool has_max, const std::string& min,
const std::string& max, bool has_null, bool has_not_null, size_t prefix_len = 64) {
ASSERT_EQ(has_min, zone_map.has_min());
ASSERT_EQ(has_max, zone_map.has_max());
if (has_min) {
const auto& zmin = zone_map.min();
ASSERT_TRUE(min.rfind(zmin, 0) == 0 || zmin == min.substr(0, std::min(prefix_len, min.size())));
ASSERT_TRUE(zmin <= min);
}
if (has_max) {
ASSERT_TRUE(zone_map.max() >= max);
}
ASSERT_EQ(has_null, zone_map.has_null());
ASSERT_EQ(has_not_null, zone_map.has_not_null());
}
std::shared_ptr<MemoryFileSystem> _fs = nullptr;
std::unique_ptr<MemTracker> _mem_tracker = nullptr;
};
@ -268,12 +285,13 @@ TEST_F(ColumnZoneMapTest, StringResize) {
const auto& zone_maps = reader.page_zone_maps();
ASSERT_EQ(2, zone_maps.size());
check_result(zone_maps[0], true, true, str1, str2, false, true);
check_result(zone_maps[1], true, true, str3, str4, false, true);
size_t pfx = config::enable_string_prefix_zonemap ? (size_t)config::string_prefix_zonemap_prefix_len : 64;
check_result_prefix(zone_maps[0], true, true, str1, str2, false, true, pfx);
check_result_prefix(zone_maps[1], true, true, str3, str4, false, true, pfx);
// segment zonemap
const auto& segment_zonemap = index_meta.zone_map_index().segment_zone_map();
check_result(segment_zonemap, true, true, str1, str4, false, true);
check_result_prefix(segment_zonemap, true, true, str1, str4, false, true, pfx);
}
TEST_F(ColumnZoneMapTest, AllNullPage) {
@ -349,6 +367,7 @@ TEST_F(ColumnZoneMapTest, NormalTestIntPage) {
TEST_F(ColumnZoneMapTest, NormalTestVarcharPage) {
TabletColumn varchar_column = create_varchar_key(0);
TypeInfoPtr type_info = get_type_info(varchar_column);
// Use prefix check inside test_string by reading page checks
test_string("NormalTestVarcharPage", type_info);
}
@ -438,4 +457,62 @@ TEST_F(ColumnZoneMapTest, VarbinaryWithBinaryData) {
true, true);
}
TEST_F(ColumnZoneMapTest, StringPrefixZonemapVariants) {
// Enable string prefix zonemap for this test context
bool old_switch = config::enable_string_prefix_zonemap;
int old_len = config::string_prefix_zonemap_prefix_len;
config::enable_string_prefix_zonemap = true;
config::string_prefix_zonemap_prefix_len = 16;
// Build a segment with various string lengths and patterns
std::string filename = kTestDir + "/StringPrefixZonemapVariants";
TabletColumn varchar_column = create_varchar_key(0);
TypeInfoPtr type_info = get_type_info(varchar_column);
auto writer = ZoneMapIndexWriter::create(type_info.get());
// Short strings
std::vector<Slice> shorts = {{"a", 1}, {"b", 1}, {"c", 1}};
writer->add_values(shorts.data(), shorts.size());
writer->flush();
// Common prefix strings
std::vector<std::string> cp = {"prefix_0001", "prefix_0002", "prefix_9999"};
std::vector<Slice> cp_slices;
for (auto& s : cp) cp_slices.push_back({s.data(), s.size()});
writer->add_values(cp_slices.data(), cp_slices.size());
writer->flush();
// Random long strings (> 64 to ensure truncation even if config changes)
std::string long1(80, 'X');
std::string long2(120, 'Y');
std::vector<Slice> longs = {{long1.data(), long1.size()}, {long2.data(), long2.size()}};
writer->add_values(longs.data(), longs.size());
writer->flush();
// Write index out
ColumnIndexMetaPB index_meta;
write_file(*writer, index_meta, filename);
// Read back
ZoneMapIndexReader reader;
load_zone_map(reader, index_meta, filename);
ASSERT_EQ(3, reader.num_pages());
const auto& zone_maps = reader.page_zone_maps();
size_t pfx = (size_t)config::string_prefix_zonemap_prefix_len;
// Page 0: shorts
check_result_prefix(zone_maps[0], true, true, "a", "c", false, true, pfx);
// Page 1: common prefix
check_result_prefix(zone_maps[1], true, true, cp.front(), cp.back(), false, true, pfx);
// Page 2: long strings
check_result_prefix(zone_maps[2], true, true, long1, long2, false, true, pfx);
// Restore config
config::enable_string_prefix_zonemap = old_switch;
config::string_prefix_zonemap_prefix_len = old_len;
}
} // namespace starrocks