Compare commits
95 Commits
main
...
branch-4.0
| Author | SHA1 | Date |
|---|---|---|
|
|
124213e5e1 | |
|
|
7b398707ee | |
|
|
6c87b5ffbf | |
|
|
790a87c95a | |
|
|
3bfca8ac5c | |
|
|
8344528886 | |
|
|
87cef74ab8 | |
|
|
58ac488b7b | |
|
|
c26ce675bb | |
|
|
120776d9b6 | |
|
|
77ce82456f | |
|
|
9274edcdb6 | |
|
|
5bbbde9039 | |
|
|
158e566b3f | |
|
|
76aa383452 | |
|
|
6a8b9c50fe | |
|
|
7b5be809a4 | |
|
|
04fb732754 | |
|
|
11046c4d76 | |
|
|
20ef5b6e26 | |
|
|
70778b7d6f | |
|
|
e5a58f3bfe | |
|
|
3eae6e3e81 | |
|
|
9c2dd0c1c5 | |
|
|
15c68e1001 | |
|
|
e791e524b4 | |
|
|
aa776de6b4 | |
|
|
26d9d4bcfb | |
|
|
20bb38dba9 | |
|
|
1808e43355 | |
|
|
d2db3d5b4d | |
|
|
f68d3b62a6 | |
|
|
94043d9c01 | |
|
|
f4d472f906 | |
|
|
f5f20afaf2 | |
|
|
e98f32834b | |
|
|
0e39d339cb | |
|
|
94bd28bbd8 | |
|
|
ec4c0ecd2b | |
|
|
d90d3bc5b6 | |
|
|
8d11089dcb | |
|
|
ed1d4cc111 | |
|
|
c42eaf88df | |
|
|
843806e61e | |
|
|
85b141ca97 | |
|
|
f93eadcee6 | |
|
|
7fb868e211 | |
|
|
b68721abdc | |
|
|
4217260158 | |
|
|
945d51a80b | |
|
|
7b3f2789b9 | |
|
|
5b41a92084 | |
|
|
0df4fb0522 | |
|
|
ae28c45368 | |
|
|
90f1f3be58 | |
|
|
cf4a3df21c | |
|
|
7e26ff974e | |
|
|
24d26c33ac | |
|
|
3e09498f8f | |
|
|
89bc4ff068 | |
|
|
04bb4e3f1b | |
|
|
e0fe6d4e72 | |
|
|
8413284035 | |
|
|
8dd56fd7ad | |
|
|
d989b56d51 | |
|
|
ea8c32a0d8 | |
|
|
dbb3e1d5f8 | |
|
|
982f2ebd3e | |
|
|
f5fac98bdb | |
|
|
960c351557 | |
|
|
17f92859be | |
|
|
a670068304 | |
|
|
492586e993 | |
|
|
9df260eee1 | |
|
|
f5a74aa16d | |
|
|
3708c97461 | |
|
|
f571bb1ac0 | |
|
|
3635b317d8 | |
|
|
f96b93e208 | |
|
|
7082f55ab0 | |
|
|
c7f97d8f46 | |
|
|
a65a4e2eb9 | |
|
|
6b4f0cbef5 | |
|
|
c6da99c2bb | |
|
|
6bebdbac4d | |
|
|
1cf54d7670 | |
|
|
9837153661 | |
|
|
109deb7a80 | |
|
|
c8e77680d7 | |
|
|
e371915c8c | |
|
|
459a5fc3f0 | |
|
|
288b12572d | |
|
|
898d7a400e | |
|
|
e70b5139dd | |
|
|
1c0ffd7f4c |
|
|
@ -1,137 +1,2 @@
|
|||
# committer will be the owner of all codes
|
||||
* @StarRocks/starrocks-committer
|
||||
|
||||
# cpp miscellaneous
|
||||
/be/src/common/ @StarRocks/cpp-misc-maintainer
|
||||
/be/src/gen_cpp/ @StarRocks/cpp-misc-maintainer
|
||||
/be/src/gutil/ @StarRocks/cpp-misc-maintainer
|
||||
/be/src/simd/ @StarRocks/cpp-misc-maintainer
|
||||
/be/src/testutil/ @StarRocks/cpp-misc-maintainer
|
||||
/be/src/util/ @StarRocks/cpp-misc-maintainer
|
||||
|
||||
# execution engine
|
||||
/be/src/column/ @StarRocks/execution-maintainer
|
||||
/be/src/exec/ @StarRocks/execution-maintainer
|
||||
/be/src/exprs/ @StarRocks/execution-maintainer
|
||||
/be/src/runtime/ @StarRocks/execution-maintainer
|
||||
/be/src/types/ @StarRocks/execution-maintainer
|
||||
/be/src/udf/ @StarRocks/execution-maintainer
|
||||
|
||||
# open formats
|
||||
/be/src/formats/ @StarRocks/open-format-maintainer
|
||||
|
||||
# storage engine
|
||||
/be/src/fs/ @StarRocks/storage-maintainer
|
||||
/be/src/io/ @StarRocks/storage-maintainer
|
||||
/be/src/storage/ @StarRocks/storage-maintainer
|
||||
|
||||
# /docs/ belong to docs-maintainer
|
||||
/docs/ @StarRocks/docs-maintainer
|
||||
|
||||
# /docker
|
||||
/docker/ @StarRocks/docker-maintainer
|
||||
|
||||
# metadata
|
||||
/fe/fe-core/src/main/java/com/starrocks/authentication/ @StarRocks/metadata-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/privilege/ @StarRocks/metadata-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/common/util/concurrent/ @StarRocks/metadata-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/mysql/ @StarRocks/metadata-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/healthchecker/ @StarRocks/metadata-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/clone/ @StarRocks/metadata-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/consistency/ @StarRocks/metadata-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/ha/ @StarRocks/metadata-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/journal/ @StarRocks/metadata-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/leader/ @StarRocks/metadata-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/meta/ @StarRocks/metadata-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/persist/ @StarRocks/metadata-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/alter/ @StarRocks/metadata-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/backup/ @StarRocks/metadata-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/catalog/ @StarRocks/metadata-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/metric/ @StarRocks/metadata-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/system/ @StarRocks/metadata-maintainer
|
||||
|
||||
# connector
|
||||
/fe/fe-core/src/main/java/com/starrocks/connector/ @StarRocks/connector-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/credential/ @StarRocks/connector-maintainer
|
||||
|
||||
# parser
|
||||
/fe/fe-core/src/main/java/com/starrocks/sql/ast/ @StarRocks/parser
|
||||
/fe/fe-core/src/main/java/com/starrocks/sql/parser/ @StarRocks/parser
|
||||
|
||||
# analyzer
|
||||
/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/ @StarRocks/analyzer
|
||||
/fe/fe-core/src/main/java/com/starrocks/analysis/ @StarRocks/analyzer
|
||||
|
||||
# optimizer
|
||||
/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/ @StarRocks/optimizer
|
||||
/fe/fe-core/src/main/java/com/starrocks/statistic/ @StarRocks/optimizer
|
||||
|
||||
# scheduler
|
||||
/fe/fe-core/src/main/java/com/starrocks/qe/scheduler/ @StarRocks/scheduler-maintainer
|
||||
|
||||
# sql/parser/StarRocksLex.g4 sql/parser/StarRocks.g4 belong to syntax-committer
|
||||
/fe/fe-core/src/main/java/com/starrocks/sql/parser/StarRocksLex.g4 @StarRocks/syntax-committer
|
||||
/fe/fe-core/src/main/java/com/starrocks/sql/parser/StarRocks.g4 @StarRocks/syntax-committer
|
||||
/gensrc/script/functions.py @StarRocks/syntax-committer
|
||||
|
||||
# /thirdparty/ /docker/dockerfiles/dev-env/dev-env.Dockerfile belong to thirdparty-maintainer
|
||||
/be/src/thirdparty/ @StarRocks/thirdparty-maintainer
|
||||
/thirdparty/ @StarRocks/thirdparty-maintainer
|
||||
/docker/dockerfiles/dev-env/dev-env.Dockerfile @StarRocks/thirdparty-maintainer
|
||||
|
||||
# cloud native
|
||||
/be/src/storage/lake/ @StarRocks/cloud-native-maintainer
|
||||
/be/src/runtime/lake_tablets_channel.h @StarRocks/cloud-native-maintainer
|
||||
/be/src/runtime/lake_tablets_channel.cpp @StarRocks/cloud-native-maintainer
|
||||
|
||||
# error message
|
||||
/fe/fe-core/src/main/java/com/starrocks/common/ErrorCode.java @StarRocks/msg-reviewer
|
||||
|
||||
# StorageEngine/ExecEnv/GlobalEnv
|
||||
/be/src/runtime/exec_env.h @StarRocks/thread-committer
|
||||
/be/src/runtime/exec_env.cpp @StarRocks/thread-committer
|
||||
/be/src/storage/olap_server.cpp @StarRocks/thread-committer
|
||||
/be/src/storage/storage_engine.h @StarRocks/thread-committer
|
||||
/be/src/storage/storage_engine.cpp @StarRocks/thread-committer
|
||||
/be/src/service/starrocks_main.cpp @StarRocks/thread-committer
|
||||
/be/src/service/service_be/starrocks_be.cpp @StarRocks/thread-committer
|
||||
|
||||
# restful
|
||||
/fe/fe-core/src/main/java/com/starrocks/http @StarRocks/restful-maintainer
|
||||
/be/src/http @StarRocks/restful-maintainer
|
||||
|
||||
|
||||
# load and unload
|
||||
/fe/fe-core/src/main/java/com/starrocks/load/* @StarRocks/load-unload-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/plan/StreamLoad* @StarRocks/load-unload-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/plan/*Sink.java @StarRocks/load-unload-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/sql/InsertPlanner.java @StarRocks/load-unload-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/sql/LoadPlanner.java @StarRocks/load-unload-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/backup/* @StarRocks/load-unload-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/alter/Optimize* @StarRocks/load-unload-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/alter/Compaction* @StarRocks/load-unload-maintainer
|
||||
/fe/fe-core/src/main/java/com/starrocks/catalog/*Partition* @StarRocks/load-unload-maintainer
|
||||
|
||||
/be/src/storage/* @StarRocks/load-unload-maintainer
|
||||
/be/src/exec/tablet_sink* @StarRocks/load-unload-maintainer
|
||||
/be/src/exec/csv_scanner.cpp @StarRocks/load-unload-maintainer
|
||||
/be/src/exec/json_scanner.cpp @StarRocks/load-unload-maintainer
|
||||
/be/src/exec/pipeline/olap_table_sink_operator.cpp @StarRocks/load-unload-maintainer
|
||||
/be/src/formats/avro/* @StarRocks/load-unload-maintainer
|
||||
/be/src/formats/csv/* @StarRocks/load-unload-maintainer
|
||||
/be/src/formats/json/* @StarRocks/load-unload-maintainer
|
||||
/be/src/http/action/compaction_action.cpp @StarRocks/load-unload-maintainer
|
||||
/be/src/http/action/*stream_load.cpp @StarRocks/load-unload-maintainer
|
||||
/be/src/http/action/restore* @StarRocks/load-unload-maintainer
|
||||
/be/src/runtime/batch_write/* @StarRocks/load-unload-maintainer
|
||||
/be/src/runtime/routine_load/* @StarRocks/load-unload-maintainer
|
||||
/be/src/runtime/stream_load/* @StarRocks/load-unload-maintainer
|
||||
/be/src/runtime/load* @StarRocks/load-unload-maintainer
|
||||
/be/src/runtime/tablets_channel.cpp @StarRocks/load-unload-maintainer
|
||||
/be/src/runtime/local_tablets_channel* @StarRocks/load-unload-maintainer
|
||||
/be/src/runtime/export_sink.cpp @StarRocks/load-unload-maintainer
|
||||
|
||||
# meta upgrade/downgrade compatibility
|
||||
/fe/fe-core/src/main/java/com/starrocks/persist/gson/GsonUtils.java @StarRocks/meta-compatibility-maintainer
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -412,6 +412,8 @@ void run_clone_task(const std::shared_ptr<CloneAgentTaskRequest>& agent_task_req
|
|||
LOG(INFO) << "clone success, set tablet infos. status:" << status
|
||||
<< ", signature:" << agent_task_req->signature;
|
||||
finish_task_request.__set_finish_tablet_infos(tablet_infos);
|
||||
finish_task_request.__set_copy_size(engine_task.get_copy_size());
|
||||
finish_task_request.__set_copy_time_ms(engine_task.get_copy_time_ms());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ struct TabletPublishVersionTask {
|
|||
// or 0 which means tablet not found or publish task cannot be submitted
|
||||
int64_t max_continuous_version{0};
|
||||
bool is_double_write{false};
|
||||
bool is_shadow{false};
|
||||
};
|
||||
|
||||
void run_publish_version_task(ThreadPoolToken* token, const TPublishVersionRequest& publish_version_req,
|
||||
|
|
@ -91,7 +92,7 @@ void run_publish_version_task(ThreadPoolToken* token, const TPublishVersionReque
|
|||
}
|
||||
}
|
||||
} else {
|
||||
std::vector<std::map<TabletInfo, RowsetSharedPtr>> partitions(num_partition);
|
||||
std::vector<std::map<TabletInfo, std::pair<RowsetSharedPtr, bool>>> partitions(num_partition);
|
||||
for (size_t i = 0; i < publish_version_req.partition_version_infos.size(); i++) {
|
||||
StorageEngine::instance()->txn_manager()->get_txn_related_tablets(
|
||||
transaction_id, publish_version_req.partition_version_infos[i].partition_id, &partitions[i]);
|
||||
|
|
@ -108,7 +109,8 @@ void run_publish_version_task(ThreadPoolToken* token, const TPublishVersionReque
|
|||
task.partition_id = publish_version_req.partition_version_infos[i].partition_id;
|
||||
task.tablet_id = itr.first.tablet_id;
|
||||
task.version = publish_version_req.partition_version_infos[i].version;
|
||||
task.rowset = std::move(itr.second);
|
||||
task.rowset = std::move(itr.second.first);
|
||||
task.is_shadow = itr.second.second;
|
||||
// rowset can be nullptr if it just prepared but not committed
|
||||
if (task.rowset != nullptr) {
|
||||
task.rowset->rowset_meta()->set_gtid(publish_version_req.gtid);
|
||||
|
|
@ -235,10 +237,13 @@ void run_publish_version_task(ThreadPoolToken* token, const TPublishVersionReque
|
|||
if (st.ok()) {
|
||||
st = task.st;
|
||||
}
|
||||
} else {
|
||||
} else if (!task.is_shadow) {
|
||||
auto& pair = tablet_publish_versions.emplace_back();
|
||||
pair.__set_tablet_id(task.tablet_id);
|
||||
pair.__set_version(task.version);
|
||||
} else {
|
||||
VLOG(1) << "publish_version success tablet:" << task.tablet_id << " version:" << task.version
|
||||
<< " is_shadow:" << task.is_shadow;
|
||||
}
|
||||
}
|
||||
// return tablet and its version which has already finished.
|
||||
|
|
|
|||
|
|
@ -848,7 +848,8 @@ void* ReportDataCacheMetricsTaskWorkerPool::_worker_thread_callback(void* arg_th
|
|||
request.__set_report_version(g_report_version.load(std::memory_order_relaxed));
|
||||
|
||||
TDataCacheMetrics t_metrics{};
|
||||
const LocalCacheEngine* cache = DataCache::GetInstance()->local_cache();
|
||||
// TODO: mem_metrics + disk_metrics
|
||||
const LocalCacheEngine* cache = DataCache::GetInstance()->local_disk_cache();
|
||||
if (cache != nullptr && cache->is_initialized()) {
|
||||
const auto metrics = cache->cache_metrics();
|
||||
DataCacheUtils::set_metrics_from_thrift(t_metrics, metrics);
|
||||
|
|
|
|||
|
|
@ -129,8 +129,6 @@ void ObjectCacheBench::init_cache(CacheType cache_type) {
|
|||
_page_cache = std::make_shared<StoragePageCache>();
|
||||
_page_cache->init(_lru_cache.get());
|
||||
} else {
|
||||
opt.engine = "starcache";
|
||||
|
||||
_star_cache = std::make_shared<StarCacheEngine>();
|
||||
Status st = _star_cache->init(opt);
|
||||
if (!st.ok()) {
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ BlockCache::~BlockCache() {
|
|||
(void)shutdown();
|
||||
}
|
||||
|
||||
Status BlockCache::init(const CacheOptions& options, std::shared_ptr<LocalCacheEngine> local_cache,
|
||||
Status BlockCache::init(const BlockCacheOptions& options, std::shared_ptr<LocalCacheEngine> local_cache,
|
||||
std::shared_ptr<RemoteCacheEngine> remote_cache) {
|
||||
_block_size = std::min(options.block_size, MAX_BLOCK_SIZE);
|
||||
_local_cache = std::move(local_cache);
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ public:
|
|||
~BlockCache();
|
||||
|
||||
// Init the block cache instance
|
||||
Status init(const CacheOptions& options, std::shared_ptr<LocalCacheEngine> local_cache,
|
||||
Status init(const BlockCacheOptions& options, std::shared_ptr<LocalCacheEngine> local_cache,
|
||||
std::shared_ptr<RemoteCacheEngine> remote_cache);
|
||||
|
||||
// Write data buffer to cache, the `offset` must be aligned by block size
|
||||
|
|
|
|||
|
|
@ -42,7 +42,15 @@ struct DirSpace {
|
|||
size_t size;
|
||||
};
|
||||
|
||||
struct CacheOptions {
|
||||
struct RemoteCacheOptions {
|
||||
double skip_read_factor = 0;
|
||||
};
|
||||
|
||||
struct MemCacheOptions {
|
||||
size_t mem_space_size = 0;
|
||||
};
|
||||
|
||||
struct DiskCacheOptions {
|
||||
// basic
|
||||
size_t mem_space_size = 0;
|
||||
std::vector<DirSpace> dir_spaces;
|
||||
|
|
@ -54,7 +62,6 @@ struct CacheOptions {
|
|||
bool enable_direct_io = false;
|
||||
bool enable_tiered_cache = true;
|
||||
bool enable_datacache_persistence = false;
|
||||
std::string engine;
|
||||
size_t max_concurrent_inserts = 0;
|
||||
size_t max_flying_memory_mb = 0;
|
||||
double scheduler_threads_per_cpu = 0;
|
||||
|
|
@ -63,6 +70,10 @@ struct CacheOptions {
|
|||
std::string eviction_policy;
|
||||
};
|
||||
|
||||
struct BlockCacheOptions {
|
||||
size_t block_size = 0;
|
||||
};
|
||||
|
||||
struct WriteCacheOptions {
|
||||
int8_t priority = 0;
|
||||
// If ttl_seconds=0 (default), no ttl restriction will be set. If an old one exists, remove it.
|
||||
|
|
|
|||
|
|
@ -44,14 +44,9 @@ Status DataCache::init(const std::vector<StorePath>& store_paths) {
|
|||
_page_cache = std::make_shared<StoragePageCache>();
|
||||
|
||||
#if defined(WITH_STARCACHE)
|
||||
if (config::datacache_engine == "" || config::datacache_engine == "starcache") {
|
||||
config::datacache_engine = "starcache";
|
||||
} else {
|
||||
config::datacache_engine = "lrucache";
|
||||
}
|
||||
#else
|
||||
config::datacache_engine = "lrucache";
|
||||
_local_disk_cache_engine = "starcache";
|
||||
#endif
|
||||
_local_mem_cache_engine = "lrucache";
|
||||
|
||||
if (!config::datacache_enable) {
|
||||
config::disable_storage_page_cache = true;
|
||||
|
|
@ -59,22 +54,22 @@ Status DataCache::init(const std::vector<StorePath>& store_paths) {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
ASSIGN_OR_RETURN(auto cache_options, _init_cache_options());
|
||||
ASSIGN_OR_RETURN(auto mem_cache_options, _init_mem_cache_options());
|
||||
|
||||
if (config::datacache_engine == "starcache") {
|
||||
#if defined(WITH_STARCACHE)
|
||||
RETURN_IF_ERROR(_init_starcache_engine(&cache_options));
|
||||
RETURN_IF_ERROR(_init_peer_cache(cache_options));
|
||||
ASSIGN_OR_RETURN(auto disk_cache_options, _init_disk_cache_options());
|
||||
RETURN_IF_ERROR(_init_starcache_engine(&disk_cache_options));
|
||||
|
||||
if (config::block_cache_enable) {
|
||||
RETURN_IF_ERROR(_block_cache->init(cache_options, _local_cache, _remote_cache));
|
||||
}
|
||||
#else
|
||||
return Status::InternalError("starcache engine is not supported");
|
||||
#endif
|
||||
} else {
|
||||
RETURN_IF_ERROR(_init_lrucache_engine(cache_options));
|
||||
auto remote_cache_options = _init_remote_cache_options();
|
||||
RETURN_IF_ERROR(_init_peer_cache(remote_cache_options));
|
||||
|
||||
if (config::block_cache_enable) {
|
||||
auto block_cache_options = _init_block_cache_options();
|
||||
RETURN_IF_ERROR(_block_cache->init(block_cache_options, _local_disk_cache, _remote_cache));
|
||||
}
|
||||
#endif
|
||||
|
||||
RETURN_IF_ERROR(_init_lrucache_engine(mem_cache_options));
|
||||
|
||||
RETURN_IF_ERROR(_init_page_cache());
|
||||
|
||||
|
|
@ -100,14 +95,15 @@ void DataCache::destroy() {
|
|||
LOG(INFO) << "pagecache shutdown successfully";
|
||||
|
||||
_block_cache.reset();
|
||||
_local_cache.reset();
|
||||
_local_mem_cache.reset();
|
||||
_local_disk_cache.reset();
|
||||
_remote_cache.reset();
|
||||
LOG(INFO) << "datacache shutdown successfully";
|
||||
}
|
||||
|
||||
bool DataCache::adjust_mem_capacity(int64_t delta, size_t min_capacity) {
|
||||
if (_local_cache != nullptr) {
|
||||
Status st = _local_cache->adjust_mem_quota(delta, min_capacity);
|
||||
if (_local_mem_cache != nullptr) {
|
||||
Status st = _local_mem_cache->adjust_mem_quota(delta, min_capacity);
|
||||
if (st.ok()) {
|
||||
return true;
|
||||
} else {
|
||||
|
|
@ -119,52 +115,67 @@ bool DataCache::adjust_mem_capacity(int64_t delta, size_t min_capacity) {
|
|||
}
|
||||
|
||||
size_t DataCache::get_mem_capacity() const {
|
||||
if (_local_cache != nullptr) {
|
||||
return _local_cache->mem_quota();
|
||||
if (_local_mem_cache != nullptr) {
|
||||
return _local_mem_cache->mem_quota();
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
Status DataCache::_init_lrucache_engine(const CacheOptions& cache_options) {
|
||||
_local_cache = std::make_shared<LRUCacheEngine>();
|
||||
RETURN_IF_ERROR(_local_cache->init(cache_options));
|
||||
Status DataCache::_init_lrucache_engine(const MemCacheOptions& cache_options) {
|
||||
_local_mem_cache = std::make_shared<LRUCacheEngine>();
|
||||
RETURN_IF_ERROR(reinterpret_cast<LRUCacheEngine*>(_local_mem_cache.get())->init(cache_options));
|
||||
LOG(INFO) << "lrucache engine init successfully";
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status DataCache::_init_page_cache() {
|
||||
_page_cache->init(_local_cache.get());
|
||||
_page_cache->init(_local_mem_cache.get());
|
||||
_page_cache->init_metrics();
|
||||
LOG(INFO) << "storage page cache init successfully";
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
#if defined(WITH_STARCACHE)
|
||||
Status DataCache::_init_starcache_engine(CacheOptions* cache_options) {
|
||||
Status DataCache::_init_starcache_engine(DiskCacheOptions* cache_options) {
|
||||
// init starcache & disk monitor
|
||||
// TODO: DiskSpaceMonitor needs to be decoupled from StarCacheEngine.
|
||||
_local_cache = std::make_shared<StarCacheEngine>();
|
||||
_disk_space_monitor = std::make_shared<DiskSpaceMonitor>(_local_cache.get());
|
||||
_local_disk_cache = std::make_shared<StarCacheEngine>();
|
||||
_disk_space_monitor = std::make_shared<DiskSpaceMonitor>(_local_disk_cache.get());
|
||||
RETURN_IF_ERROR(_disk_space_monitor->init(&cache_options->dir_spaces));
|
||||
RETURN_IF_ERROR(_local_cache->init(*cache_options));
|
||||
RETURN_IF_ERROR(reinterpret_cast<StarCacheEngine*>(_local_disk_cache.get())->init(*cache_options));
|
||||
_disk_space_monitor->start();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status DataCache::_init_peer_cache(const CacheOptions& cache_options) {
|
||||
Status DataCache::_init_peer_cache(const RemoteCacheOptions& cache_options) {
|
||||
_remote_cache = std::make_shared<PeerCacheEngine>();
|
||||
return _remote_cache->init(cache_options);
|
||||
}
|
||||
#endif
|
||||
|
||||
StatusOr<CacheOptions> DataCache::_init_cache_options() {
|
||||
CacheOptions cache_options;
|
||||
RemoteCacheOptions DataCache::_init_remote_cache_options() {
|
||||
RemoteCacheOptions cache_options{.skip_read_factor = config::datacache_skip_read_factor};
|
||||
return cache_options;
|
||||
}
|
||||
|
||||
StatusOr<MemCacheOptions> DataCache::_init_mem_cache_options() {
|
||||
MemCacheOptions cache_options;
|
||||
RETURN_IF_ERROR(DataCacheUtils::parse_conf_datacache_mem_size(
|
||||
config::datacache_mem_size, _global_env->process_mem_limit(), &cache_options.mem_space_size));
|
||||
cache_options.engine = config::datacache_engine;
|
||||
return cache_options;
|
||||
}
|
||||
|
||||
if (config::datacache_engine == "starcache") {
|
||||
BlockCacheOptions DataCache::_init_block_cache_options() {
|
||||
BlockCacheOptions cache_options;
|
||||
cache_options.block_size = config::datacache_block_size;
|
||||
return cache_options;
|
||||
}
|
||||
|
||||
StatusOr<DiskCacheOptions> DataCache::_init_disk_cache_options() {
|
||||
DiskCacheOptions cache_options;
|
||||
|
||||
if (_local_disk_cache_engine == "starcache") {
|
||||
#ifdef USE_STAROS
|
||||
std::vector<string> corresponding_starlet_dirs;
|
||||
if (config::datacache_unified_instance_enable && !config::starlet_cache_dir.empty()) {
|
||||
|
|
@ -276,8 +287,8 @@ void DataCache::try_release_resource_before_core_dump() {
|
|||
return release_all || modules.contains(name);
|
||||
};
|
||||
|
||||
if (_local_cache != nullptr && need_release("data_cache")) {
|
||||
(void)_local_cache->update_mem_quota(0, false);
|
||||
if (_local_mem_cache != nullptr && need_release("data_cache")) {
|
||||
(void)_local_mem_cache->update_mem_quota(0, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ namespace starrocks {
|
|||
class Status;
|
||||
class StorePath;
|
||||
class RemoteCacheEngine;
|
||||
class CacheOptions;
|
||||
class DiskCacheOptions;
|
||||
class GlobalEnv;
|
||||
class DiskSpaceMonitor;
|
||||
class MemSpaceMonitor;
|
||||
|
|
@ -39,10 +39,16 @@ public:
|
|||
|
||||
void try_release_resource_before_core_dump();
|
||||
|
||||
void set_local_cache(std::shared_ptr<LocalCacheEngine> local_cache) { _local_cache = std::move(local_cache); }
|
||||
void set_local_mem_cache(std::shared_ptr<LocalCacheEngine> local_mem_cache) {
|
||||
_local_mem_cache = std::move(local_mem_cache);
|
||||
}
|
||||
void set_local_disk_cache(std::shared_ptr<LocalCacheEngine> local_disk_cache) {
|
||||
_local_disk_cache = std::move(local_disk_cache);
|
||||
}
|
||||
void set_page_cache(std::shared_ptr<StoragePageCache> page_cache) { _page_cache = std::move(page_cache); }
|
||||
|
||||
LocalCacheEngine* local_cache() { return _local_cache.get(); }
|
||||
LocalCacheEngine* local_mem_cache() { return _local_mem_cache.get(); }
|
||||
LocalCacheEngine* local_disk_cache() { return _local_disk_cache.get(); }
|
||||
BlockCache* block_cache() const { return _block_cache.get(); }
|
||||
void set_block_cache(std::shared_ptr<BlockCache> block_cache) { _block_cache = std::move(block_cache); }
|
||||
StoragePageCache* page_cache() const { return _page_cache.get(); }
|
||||
|
|
@ -56,19 +62,26 @@ public:
|
|||
size_t get_mem_capacity() const;
|
||||
|
||||
private:
|
||||
StatusOr<CacheOptions> _init_cache_options();
|
||||
StatusOr<MemCacheOptions> _init_mem_cache_options();
|
||||
StatusOr<DiskCacheOptions> _init_disk_cache_options();
|
||||
RemoteCacheOptions _init_remote_cache_options();
|
||||
BlockCacheOptions _init_block_cache_options();
|
||||
|
||||
#if defined(WITH_STARCACHE)
|
||||
Status _init_starcache_engine(CacheOptions* cache_options);
|
||||
Status _init_peer_cache(const CacheOptions& cache_options);
|
||||
Status _init_starcache_engine(DiskCacheOptions* cache_options);
|
||||
Status _init_peer_cache(const RemoteCacheOptions& cache_options);
|
||||
#endif
|
||||
Status _init_lrucache_engine(const CacheOptions& cache_options);
|
||||
Status _init_lrucache_engine(const MemCacheOptions& cache_options);
|
||||
Status _init_page_cache();
|
||||
|
||||
GlobalEnv* _global_env;
|
||||
std::vector<StorePath> _store_paths;
|
||||
|
||||
// cache engine
|
||||
std::shared_ptr<LocalCacheEngine> _local_cache;
|
||||
std::string _local_mem_cache_engine;
|
||||
std::string _local_disk_cache_engine;
|
||||
std::shared_ptr<LocalCacheEngine> _local_mem_cache;
|
||||
std::shared_ptr<LocalCacheEngine> _local_disk_cache;
|
||||
std::shared_ptr<RemoteCacheEngine> _remote_cache;
|
||||
|
||||
std::shared_ptr<BlockCache> _block_cache;
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ class LocalCacheEngine {
|
|||
public:
|
||||
virtual ~LocalCacheEngine() = default;
|
||||
|
||||
virtual Status init(const CacheOptions& options) = 0;
|
||||
virtual bool is_initialized() const = 0;
|
||||
|
||||
// Write data to cache
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@
|
|||
#include <butil/fast_rand.h>
|
||||
|
||||
namespace starrocks {
|
||||
Status LRUCacheEngine::init(const CacheOptions& options) {
|
||||
Status LRUCacheEngine::init(const MemCacheOptions& options) {
|
||||
_cache = std::make_unique<ShardedLRUCache>(options.mem_space_size);
|
||||
_initialized.store(true, std::memory_order_relaxed);
|
||||
return Status::OK();
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ public:
|
|||
LRUCacheEngine() = default;
|
||||
virtual ~LRUCacheEngine() override = default;
|
||||
|
||||
Status init(const CacheOptions& options) override;
|
||||
Status init(const MemCacheOptions& options);
|
||||
bool is_initialized() const override { return _initialized.load(std::memory_order_relaxed); }
|
||||
|
||||
Status write(const std::string& key, const IOBuffer& buffer, WriteCacheOptions* options) override;
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@
|
|||
|
||||
namespace starrocks {
|
||||
|
||||
Status PeerCacheEngine::init(const CacheOptions& options) {
|
||||
Status PeerCacheEngine::init(const RemoteCacheOptions& options) {
|
||||
_cache_adaptor.reset(starcache::create_default_adaptor(options.skip_read_factor));
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ public:
|
|||
PeerCacheEngine() = default;
|
||||
~PeerCacheEngine() override = default;
|
||||
|
||||
Status init(const CacheOptions& options) override;
|
||||
Status init(const RemoteCacheOptions& options) override;
|
||||
|
||||
Status read(const std::string& key, size_t off, size_t size, IOBuffer* buffer, ReadCacheOptions* options) override;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ public:
|
|||
virtual ~RemoteCacheEngine() = default;
|
||||
|
||||
// Init remote cache
|
||||
virtual Status init(const CacheOptions& options) = 0;
|
||||
virtual Status init(const RemoteCacheOptions& options) = 0;
|
||||
|
||||
// Write data to remote cache
|
||||
virtual Status write(const std::string& key, const IOBuffer& buffer, WriteCacheOptions* options) = 0;
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@
|
|||
|
||||
namespace starrocks {
|
||||
|
||||
Status StarCacheEngine::init(const CacheOptions& options) {
|
||||
Status StarCacheEngine::init(const DiskCacheOptions& options) {
|
||||
starcache::CacheOptions opt;
|
||||
opt.mem_quota_bytes = options.mem_space_size;
|
||||
for (auto& dir : options.dir_spaces) {
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ public:
|
|||
StarCacheEngine() = default;
|
||||
virtual ~StarCacheEngine() override = default;
|
||||
|
||||
Status init(const CacheOptions& options) override;
|
||||
Status init(const DiskCacheOptions& options);
|
||||
bool is_initialized() const override { return _initialized.load(std::memory_order_relaxed); }
|
||||
|
||||
Status write(const std::string& key, const IOBuffer& buffer, WriteCacheOptions* options) override;
|
||||
|
|
|
|||
|
|
@ -83,35 +83,69 @@ void BinaryColumnBase<T>::append(const Column& src, size_t offset, size_t count)
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
void BinaryColumnBase<T>::append_selective(const Column& src, const uint32_t* indexes, uint32_t from, uint32_t size) {
|
||||
void BinaryColumnBase<T>::append_selective(const Column& src, const uint32_t* indexes, uint32_t from,
|
||||
const uint32_t size) {
|
||||
if (src.is_binary_view()) {
|
||||
down_cast<const ColumnView*>(&src)->append_to(*this, indexes, from, size);
|
||||
return;
|
||||
}
|
||||
|
||||
indexes += from;
|
||||
|
||||
const auto& src_column = down_cast<const BinaryColumnBase<T>&>(src);
|
||||
const auto& src_offsets = src_column.get_offset();
|
||||
const auto& src_bytes = src_column.get_bytes();
|
||||
|
||||
size_t cur_row_count = _offsets.size() - 1;
|
||||
size_t cur_byte_size = _bytes.size();
|
||||
const size_t prev_num_offsets = _offsets.size();
|
||||
const size_t prev_num_rows = prev_num_offsets - 1;
|
||||
|
||||
_offsets.resize(cur_row_count + size + 1);
|
||||
_offsets.resize(prev_num_offsets + size * 2);
|
||||
auto* __restrict new_offsets = _offsets.data() + prev_num_offsets;
|
||||
const auto* __restrict src_offsets = src_column.get_offset().data();
|
||||
|
||||
// Buffer i-th start offset and end offset in new_offsets[i * 2] and new_offsets[i * 2 + 1].
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
uint32_t row_idx = indexes[from + i];
|
||||
T str_size = src_offsets[row_idx + 1] - src_offsets[row_idx];
|
||||
_offsets[cur_row_count + i + 1] = _offsets[cur_row_count + i] + str_size;
|
||||
cur_byte_size += str_size;
|
||||
const uint32_t src_idx = indexes[i];
|
||||
new_offsets[i * 2] = src_offsets[src_idx];
|
||||
new_offsets[i * 2 + 1] = src_offsets[src_idx + 1];
|
||||
}
|
||||
_bytes.resize(cur_byte_size);
|
||||
|
||||
auto* dest_bytes = _bytes.data();
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
uint32_t row_idx = indexes[from + i];
|
||||
T str_size = src_offsets[row_idx + 1] - src_offsets[row_idx];
|
||||
strings::memcpy_inlined(dest_bytes + _offsets[cur_row_count + i], src_bytes.data() + src_offsets[row_idx],
|
||||
str_size);
|
||||
// Write bytes
|
||||
{
|
||||
size_t num_bytes = _bytes.size();
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
num_bytes += new_offsets[i * 2 + 1] - new_offsets[i * 2];
|
||||
}
|
||||
_bytes.resize(num_bytes);
|
||||
const auto* __restrict src_bytes = src_column.get_bytes().data();
|
||||
auto* __restrict dest_bytes = _bytes.data();
|
||||
size_t cur_offset = _offsets[prev_num_rows];
|
||||
|
||||
if (src_column.get_bytes().size() > 32 * 1024 * 1024ull) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
if (i + 16 < size) {
|
||||
// If the source column is large enough, use prefetch to speed up copying.
|
||||
__builtin_prefetch(src_bytes + new_offsets[i * 2 + 32]);
|
||||
}
|
||||
const T str_size = new_offsets[i * 2 + 1] - new_offsets[i * 2];
|
||||
strings::memcpy_inlined(dest_bytes + cur_offset, src_bytes + new_offsets[i * 2], str_size);
|
||||
cur_offset += str_size;
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
const T str_size = new_offsets[i * 2 + 1] - new_offsets[i * 2];
|
||||
// Only copy 16 bytes extra when src_column is small enough, because the overhead of copying 16 bytes
|
||||
// will be large when src_column is large enough.
|
||||
strings::memcpy_inlined_overflow16(dest_bytes + cur_offset, src_bytes + new_offsets[i * 2], str_size);
|
||||
cur_offset += str_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write offsets.
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
new_offsets[i] = new_offsets[i - 1] + (new_offsets[i * 2 + 1] - new_offsets[i * 2]);
|
||||
}
|
||||
_offsets.resize(prev_num_offsets + size);
|
||||
|
||||
_slices_cache = false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -255,7 +255,7 @@ std::unique_ptr<Chunk> Chunk::clone_empty_with_slot(size_t size) const {
|
|||
columns[i] = _columns[i]->clone_empty();
|
||||
columns[i]->reserve(size);
|
||||
}
|
||||
return std::make_unique<Chunk>(columns, _slot_id_to_index);
|
||||
return std::make_unique<Chunk>(std::move(columns), _slot_id_to_index);
|
||||
}
|
||||
|
||||
std::unique_ptr<Chunk> Chunk::clone_empty_with_schema() const {
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
#include "runtime/mem_pool.h"
|
||||
#include "storage/olap_type_infra.h"
|
||||
#include "storage/type_traits.h"
|
||||
#include "types/logical_type.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
|
|
@ -51,6 +52,7 @@ Status datum_from_string(TypeInfo* type_info, Datum* dst, const std::string& str
|
|||
return Status::OK();
|
||||
}
|
||||
/* Type need memory allocated */
|
||||
case TYPE_VARBINARY:
|
||||
case TYPE_CHAR:
|
||||
case TYPE_VARCHAR: {
|
||||
/* Type need memory allocated */
|
||||
|
|
@ -92,6 +94,7 @@ std::string datum_to_string(TypeInfo* type_info, const Datum& datum) {
|
|||
switch (type) {
|
||||
case TYPE_BOOLEAN:
|
||||
return datum_to_string<TYPE_TINYINT>(type_info, datum);
|
||||
case TYPE_VARBINARY:
|
||||
case TYPE_CHAR:
|
||||
case TYPE_VARCHAR:
|
||||
return datum_to_string<TYPE_VARCHAR>(type_info, datum);
|
||||
|
|
|
|||
|
|
@ -37,28 +37,36 @@ StatusOr<ColumnPtr> FixedLengthColumnBase<T>::upgrade_if_overflow() {
|
|||
|
||||
template <typename T>
|
||||
void FixedLengthColumnBase<T>::append(const Column& src, size_t offset, size_t count) {
|
||||
const auto& num_src = down_cast<const FixedLengthColumnBase<T>&>(src);
|
||||
_data.insert(_data.end(), num_src._data.begin() + offset, num_src._data.begin() + offset + count);
|
||||
DCHECK(this != &src);
|
||||
|
||||
const size_t orig_size = _data.size();
|
||||
raw::stl_vector_resize_uninitialized(&_data, orig_size + count);
|
||||
|
||||
const T* src_data = reinterpret_cast<const T*>(src.raw_data());
|
||||
strings::memcpy_inlined(_data.data() + orig_size, src_data + offset, count * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void FixedLengthColumnBase<T>::append_selective(const Column& src, const uint32_t* indexes, uint32_t from,
|
||||
uint32_t size) {
|
||||
DCHECK(this != &src);
|
||||
indexes += from;
|
||||
const T* src_data = reinterpret_cast<const T*>(src.raw_data());
|
||||
|
||||
const size_t orig_size = _data.size();
|
||||
_data.resize(orig_size + size);
|
||||
raw::stl_vector_resize_uninitialized(&_data, orig_size + size);
|
||||
auto* dest_data = _data.data() + orig_size;
|
||||
|
||||
const T* src_data = reinterpret_cast<const T*>(src.raw_data());
|
||||
SIMDGather::gather(dest_data, src_data, indexes, size);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void FixedLengthColumnBase<T>::append_value_multiple_times(const Column& src, uint32_t index, uint32_t size) {
|
||||
const T* src_data = reinterpret_cast<const T*>(src.raw_data());
|
||||
DCHECK(this != &src);
|
||||
size_t orig_size = _data.size();
|
||||
_data.resize(orig_size + size);
|
||||
|
||||
const T* src_data = reinterpret_cast<const T*>(src.raw_data());
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
_data[orig_size + i] = src_data[index];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -323,6 +323,20 @@ CONF_mBool(enable_zonemap_index_memory_page_cache, "true");
|
|||
// whether to enable the ordinal index memory cache
|
||||
CONF_mBool(enable_ordinal_index_memory_page_cache, "true");
|
||||
|
||||
// ========================== ZONEMAP BEGIN ===================================
|
||||
// Enable ZoneMap for string (CHAR/VARCHAR) columns using prefix-based min/max
|
||||
CONF_mBool(enable_string_prefix_zonemap, "true");
|
||||
// Prefix length used for string ZoneMap min/max when enabled
|
||||
CONF_mInt32(string_prefix_zonemap_prefix_len, "16");
|
||||
// Adaptive creation of string zonemap index based on page overlap quality.
|
||||
// If the estimated overlap ratio across consecutive pages is greater than this threshold,
|
||||
// skip writing the page-level string zonemap index. Range: [0.0, 1.0].
|
||||
CONF_mDouble(string_zonemap_overlap_threshold, "0.8");
|
||||
// Minimum number of non-empty pages before applying the adaptive check.
|
||||
CONF_mInt32(string_zonemap_min_pages_for_adaptive_check, "16");
|
||||
|
||||
// ========================== ZONEMAP END ===================================
|
||||
|
||||
CONF_mInt32(base_compaction_check_interval_seconds, "60");
|
||||
CONF_mInt64(min_base_compaction_num_singleton_deltas, "5");
|
||||
CONF_mInt64(max_base_compaction_num_singleton_deltas, "100");
|
||||
|
|
@ -1511,8 +1525,10 @@ CONF_mBool(lake_enable_vertical_compaction_fill_data_cache, "true");
|
|||
|
||||
CONF_mInt32(dictionary_cache_refresh_timeout_ms, "60000"); // 1 min
|
||||
CONF_mInt32(dictionary_cache_refresh_threadpool_size, "8");
|
||||
|
||||
// ======================= FLAT JSON start ==============================================
|
||||
// json flat flag
|
||||
CONF_mBool(enable_json_flat, "false");
|
||||
CONF_mBool(enable_json_flat, "true");
|
||||
|
||||
// enable compaction is base on flat json, not whole json
|
||||
CONF_mBool(enable_compaction_flat_json, "true");
|
||||
|
|
@ -1546,6 +1562,7 @@ CONF_mInt32(json_flat_column_max, "100");
|
|||
|
||||
// for whitelist on flat json remain data, max set 1kb
|
||||
CONF_mInt32(json_flat_remain_filter_max_bytes, "1024");
|
||||
// ======================= FLAT JSON end ==============================================
|
||||
|
||||
// Allowable intervals for continuous generation of pk dumps
|
||||
// Disable when pk_dump_interval_seconds <= 0
|
||||
|
|
@ -1589,6 +1606,8 @@ CONF_mBool(apply_del_vec_after_all_index_filter, "true");
|
|||
CONF_mDouble(connector_sink_mem_high_watermark_ratio, "0.3");
|
||||
CONF_mDouble(connector_sink_mem_low_watermark_ratio, "0.1");
|
||||
CONF_mDouble(connector_sink_mem_urgent_space_ratio, "0.1");
|
||||
// Whether enable spill intermediate data for connector sink.
|
||||
CONF_mBool(enable_connector_sink_spill, "false");
|
||||
|
||||
// .crm file can be removed after 1day.
|
||||
CONF_mInt32(unused_crm_file_threshold_second, "86400" /** 1day **/);
|
||||
|
|
@ -1729,4 +1748,5 @@ CONF_mInt64(split_exchanger_buffer_chunk_num, "1000");
|
|||
|
||||
// when to split hashmap/hashset into two level hashmap/hashset, negative number means use default value
|
||||
CONF_mInt64(two_level_memory_threshold, "-1");
|
||||
|
||||
} // namespace starrocks::config
|
||||
|
|
|
|||
|
|
@ -78,11 +78,11 @@
|
|||
#define VLOG_OPERATOR VLOG(3)
|
||||
#define VLOG_ROW VLOG(10)
|
||||
#define VLOG_PROGRESS VLOG(2)
|
||||
#define VLOG_CACHE VLOG(1)
|
||||
#define VLOG_CACHE VLOG(3)
|
||||
|
||||
#define VLOG_CONNECTION_IS_ON VLOG_IS_ON(1)
|
||||
#define VLOG_CONNECTION_IS_ON VLOG_IS_ON(2)
|
||||
#define VLOG_RPC_IS_ON VLOG_IS_ON(2)
|
||||
#define VLOG_QUERY_IS_ON VLOG_IS_ON(1)
|
||||
#define VLOG_QUERY_IS_ON VLOG_IS_ON(2)
|
||||
#define VLOG_FILE_IS_ON VLOG_IS_ON(2)
|
||||
#define VLOG_OPERATOR_IS_ON VLOG_IS_ON(3)
|
||||
#define VLOG_ROW_IS_ON VLOG_IS_ON(10)
|
||||
|
|
|
|||
|
|
@ -31,6 +31,8 @@ add_library(Connector STATIC
|
|||
utils.cpp
|
||||
async_flush_stream_poller.cpp
|
||||
sink_memory_manager.cpp
|
||||
partition_chunk_writer.cpp
|
||||
connector_sink_executor.cpp
|
||||
deletion_vector/deletion_vector.cpp
|
||||
deletion_vector/deletion_bitmap.cpp
|
||||
)
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
namespace starrocks::connector {
|
||||
|
||||
void AsyncFlushStreamPoller::enqueue(std::unique_ptr<Stream> stream) {
|
||||
void AsyncFlushStreamPoller::enqueue(std::shared_ptr<Stream> stream) {
|
||||
auto async_status = stream->io_status();
|
||||
_queue.push_back({
|
||||
.stream = std::move(stream),
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ public:
|
|||
|
||||
virtual ~AsyncFlushStreamPoller() = default;
|
||||
|
||||
virtual void enqueue(std::unique_ptr<Stream> stream);
|
||||
virtual void enqueue(std::shared_ptr<Stream> stream);
|
||||
|
||||
// return a pair of
|
||||
// 1. io status
|
||||
|
|
@ -45,7 +45,7 @@ public:
|
|||
|
||||
private:
|
||||
struct StreamWithStatus {
|
||||
std::unique_ptr<Stream> stream;
|
||||
std::shared_ptr<Stream> stream;
|
||||
std::future<Status> async_status;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -24,21 +24,18 @@ namespace starrocks::connector {
|
|||
|
||||
ConnectorChunkSink::ConnectorChunkSink(std::vector<std::string> partition_columns,
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>>&& partition_column_evaluators,
|
||||
std::unique_ptr<LocationProvider> location_provider,
|
||||
std::unique_ptr<formats::FileWriterFactory> file_writer_factory,
|
||||
int64_t max_file_size, RuntimeState* state, bool support_null_partition)
|
||||
std::unique_ptr<PartitionChunkWriterFactory> partition_chunk_writer_factory,
|
||||
RuntimeState* state, bool support_null_partition)
|
||||
: _partition_column_names(std::move(partition_columns)),
|
||||
_partition_column_evaluators(std::move(partition_column_evaluators)),
|
||||
_location_provider(std::move(location_provider)),
|
||||
_file_writer_factory(std::move(file_writer_factory)),
|
||||
_max_file_size(max_file_size),
|
||||
_partition_chunk_writer_factory(std::move(partition_chunk_writer_factory)),
|
||||
_state(state),
|
||||
_support_null_partition(support_null_partition) {}
|
||||
|
||||
Status ConnectorChunkSink::init() {
|
||||
RETURN_IF_ERROR(ColumnEvaluator::init(_partition_column_evaluators));
|
||||
RETURN_IF_ERROR(_file_writer_factory->init());
|
||||
_op_mem_mgr->init(&_writer_stream_pairs, _io_poller,
|
||||
RETURN_IF_ERROR(_partition_chunk_writer_factory->init());
|
||||
_op_mem_mgr->init(&_partition_chunk_writers, _io_poller,
|
||||
[this](const CommitResult& r) { this->callback_on_commit(r); });
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
@ -49,38 +46,20 @@ Status ConnectorChunkSink::write_partition_chunk(const std::string& partition,
|
|||
// They are under the same dir path, but should not in the same data file.
|
||||
// We should record them in different files so that each data file could has its own meta info.
|
||||
// otherwise, the scanFileTask may filter data incorrectly.
|
||||
auto it = _writer_stream_pairs.find(std::make_pair(partition, partition_field_null_list));
|
||||
if (it != _writer_stream_pairs.end()) {
|
||||
Writer* writer = it->second.first.get();
|
||||
if (writer->get_written_bytes() >= _max_file_size) {
|
||||
string null_fingerprint(partition_field_null_list.size(), '0');
|
||||
std::transform(partition_field_null_list.begin(), partition_field_null_list.end(), null_fingerprint.begin(),
|
||||
[](int8_t b) { return b + '0'; });
|
||||
callback_on_commit(writer->commit().set_extra_data(null_fingerprint));
|
||||
_writer_stream_pairs.erase(it);
|
||||
auto path =
|
||||
!_partition_column_names.empty() ? _location_provider->get(partition) : _location_provider->get();
|
||||
ASSIGN_OR_RETURN(auto new_writer_and_stream, _file_writer_factory->create(path));
|
||||
std::unique_ptr<Writer> new_writer = std::move(new_writer_and_stream.writer);
|
||||
std::unique_ptr<Stream> new_stream = std::move(new_writer_and_stream.stream);
|
||||
RETURN_IF_ERROR(new_writer->init());
|
||||
RETURN_IF_ERROR(new_writer->write(chunk));
|
||||
_writer_stream_pairs[std::make_pair(partition, partition_field_null_list)] =
|
||||
std::make_pair(std::move(new_writer), new_stream.get());
|
||||
_io_poller->enqueue(std::move(new_stream));
|
||||
} else {
|
||||
RETURN_IF_ERROR(writer->write(chunk));
|
||||
}
|
||||
PartitionKey partition_key = std::make_pair(partition, partition_field_null_list);
|
||||
auto it = _partition_chunk_writers.find(partition_key);
|
||||
if (it != _partition_chunk_writers.end()) {
|
||||
return it->second->write(chunk);
|
||||
} else {
|
||||
auto path = !_partition_column_names.empty() ? _location_provider->get(partition) : _location_provider->get();
|
||||
ASSIGN_OR_RETURN(auto new_writer_and_stream, _file_writer_factory->create(path));
|
||||
std::unique_ptr<Writer> new_writer = std::move(new_writer_and_stream.writer);
|
||||
std::unique_ptr<Stream> new_stream = std::move(new_writer_and_stream.stream);
|
||||
RETURN_IF_ERROR(new_writer->init());
|
||||
RETURN_IF_ERROR(new_writer->write(chunk));
|
||||
_writer_stream_pairs[std::make_pair(partition, partition_field_null_list)] =
|
||||
std::make_pair(std::move(new_writer), new_stream.get());
|
||||
_io_poller->enqueue(std::move(new_stream));
|
||||
auto writer = _partition_chunk_writer_factory->create(partition, partition_field_null_list);
|
||||
auto commit_callback = [this](const CommitResult& r) { this->callback_on_commit(r); };
|
||||
auto error_handler = [this](const Status& s) { this->set_status(s); };
|
||||
writer->set_commit_callback(commit_callback);
|
||||
writer->set_error_handler(error_handler);
|
||||
writer->set_io_poller(_io_poller);
|
||||
RETURN_IF_ERROR(writer->init());
|
||||
RETURN_IF_ERROR(writer->write(chunk));
|
||||
_partition_chunk_writers[partition_key] = writer;
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
@ -100,11 +79,8 @@ Status ConnectorChunkSink::add(Chunk* chunk) {
|
|||
}
|
||||
|
||||
Status ConnectorChunkSink::finish() {
|
||||
for (auto& [partition_key, writer_and_stream] : _writer_stream_pairs) {
|
||||
string extra_data(partition_key.second.size(), '0');
|
||||
std::transform(partition_key.second.begin(), partition_key.second.end(), extra_data.begin(),
|
||||
[](int8_t b) { return b + '0'; });
|
||||
callback_on_commit(writer_and_stream.first->commit().set_extra_data(extra_data));
|
||||
for (auto& [partition_key, writer] : _partition_chunk_writers) {
|
||||
RETURN_IF_ERROR(writer->finish());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
@ -115,4 +91,23 @@ void ConnectorChunkSink::rollback() {
|
|||
}
|
||||
}
|
||||
|
||||
void ConnectorChunkSink::set_status(const Status& status) {
|
||||
std::unique_lock<std::shared_mutex> wlck(_mutex);
|
||||
_status = status;
|
||||
}
|
||||
|
||||
Status ConnectorChunkSink::status() {
|
||||
std::shared_lock<std::shared_mutex> rlck(_mutex);
|
||||
return _status;
|
||||
}
|
||||
|
||||
bool ConnectorChunkSink::is_finished() {
|
||||
for (auto& [partition_key, writer] : _partition_chunk_writers) {
|
||||
if (!writer->is_finished()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace starrocks::connector
|
||||
|
|
|
|||
|
|
@ -20,8 +20,8 @@
|
|||
|
||||
#include "column/chunk.h"
|
||||
#include "common/status.h"
|
||||
#include "connector/partition_chunk_writer.h"
|
||||
#include "connector/utils.h"
|
||||
#include "formats/file_writer.h"
|
||||
#include "fs/fs.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
|
||||
|
|
@ -30,20 +30,14 @@ namespace starrocks::connector {
|
|||
class AsyncFlushStreamPoller;
|
||||
class SinkOperatorMemoryManager;
|
||||
|
||||
using Writer = formats::FileWriter;
|
||||
using Stream = io::AsyncFlushOutputStream;
|
||||
using WriterStreamPair = std::pair<std::unique_ptr<Writer>, Stream*>;
|
||||
using PartitionKey = std::pair<std::string, std::vector<int8_t>>;
|
||||
using CommitResult = formats::FileWriter::CommitResult;
|
||||
using CommitFunc = std::function<void(const CommitResult& result)>;
|
||||
|
||||
class ConnectorChunkSink {
|
||||
public:
|
||||
ConnectorChunkSink(std::vector<std::string> partition_columns,
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>>&& partition_column_evaluators,
|
||||
std::unique_ptr<LocationProvider> location_provider,
|
||||
std::unique_ptr<formats::FileWriterFactory> file_writer_factory, int64_t max_file_size,
|
||||
RuntimeState* state, bool support_null_partition);
|
||||
std::unique_ptr<PartitionChunkWriterFactory> partition_chunk_writer_factory, RuntimeState* state,
|
||||
bool support_null_partition);
|
||||
|
||||
void set_io_poller(AsyncFlushStreamPoller* poller) { _io_poller = poller; }
|
||||
|
||||
|
|
@ -59,26 +53,33 @@ public:
|
|||
|
||||
void rollback();
|
||||
|
||||
bool is_finished();
|
||||
|
||||
virtual void callback_on_commit(const CommitResult& result) = 0;
|
||||
|
||||
Status write_partition_chunk(const std::string& partition, const vector<int8_t>& partition_field_null_list,
|
||||
Chunk* chunk);
|
||||
|
||||
Status status();
|
||||
|
||||
void set_status(const Status& status);
|
||||
|
||||
protected:
|
||||
AsyncFlushStreamPoller* _io_poller = nullptr;
|
||||
SinkOperatorMemoryManager* _op_mem_mgr = nullptr;
|
||||
|
||||
std::vector<std::string> _partition_column_names;
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>> _partition_column_evaluators;
|
||||
std::unique_ptr<LocationProvider> _location_provider;
|
||||
std::unique_ptr<formats::FileWriterFactory> _file_writer_factory;
|
||||
int64_t _max_file_size = 1024L * 1024 * 1024;
|
||||
std::unique_ptr<PartitionChunkWriterFactory> _partition_chunk_writer_factory;
|
||||
RuntimeState* _state = nullptr;
|
||||
bool _support_null_partition{false};
|
||||
std::vector<std::function<void()>> _rollback_actions;
|
||||
|
||||
std::map<PartitionKey, WriterStreamPair> _writer_stream_pairs;
|
||||
std::map<PartitionKey, PartitionChunkWriterPtr> _partition_chunk_writers;
|
||||
inline static std::string DEFAULT_PARTITION = "__DEFAULT_PARTITION__";
|
||||
|
||||
std::shared_mutex _mutex;
|
||||
Status _status;
|
||||
};
|
||||
|
||||
struct ConnectorChunkSinkContext {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,66 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "connector/connector_sink_executor.h"
|
||||
|
||||
#include "column/chunk.h"
|
||||
#include "common/status.h"
|
||||
#include "connector/partition_chunk_writer.h"
|
||||
#include "storage/load_chunk_spiller.h"
|
||||
|
||||
namespace starrocks::connector {
|
||||
|
||||
Status ConnectorSinkSpillExecutor::init() {
|
||||
return ThreadPoolBuilder(_executor_name)
|
||||
.set_min_threads(0)
|
||||
.set_max_threads(calc_max_thread_num())
|
||||
.build(&_thread_pool);
|
||||
}
|
||||
|
||||
int ConnectorSinkSpillExecutor::calc_max_thread_num() {
|
||||
int dir_count = 0;
|
||||
std::vector<starrocks::StorePath> spill_local_storage_paths;
|
||||
Status st = parse_conf_store_paths(config::spill_local_storage_dir, &spill_local_storage_paths);
|
||||
if (st.ok()) {
|
||||
dir_count = spill_local_storage_paths.size();
|
||||
}
|
||||
|
||||
int threads = config::lake_flush_thread_num_per_store;
|
||||
if (threads == 0) {
|
||||
threads = -2;
|
||||
}
|
||||
if (threads <= 0) {
|
||||
threads = -threads;
|
||||
threads *= CpuInfo::num_cores();
|
||||
}
|
||||
dir_count = std::max(1, dir_count);
|
||||
dir_count = std::min(8, dir_count);
|
||||
return dir_count * threads;
|
||||
}
|
||||
|
||||
void ChunkSpillTask::run() {
|
||||
auto res = _load_chunk_spiller->spill(*_chunk);
|
||||
if (_cb) {
|
||||
_cb(_chunk, res);
|
||||
}
|
||||
}
|
||||
|
||||
void MergeBlockTask::run() {
|
||||
auto st = _writer->merge_blocks();
|
||||
if (_cb) {
|
||||
_cb(st);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace starrocks::connector
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "column/chunk.h"
|
||||
#include "common/status.h"
|
||||
#include "connector/utils.h"
|
||||
#include "util/threadpool.h"
|
||||
|
||||
namespace starrocks {
|
||||
class LoadChunkSpiller;
|
||||
}
|
||||
|
||||
namespace starrocks::connector {
|
||||
|
||||
class SpillPartitionChunkWriter;
|
||||
|
||||
class ConnectorSinkExecutor {
|
||||
public:
|
||||
ConnectorSinkExecutor(const std::string& executor_name) : _executor_name(executor_name) {}
|
||||
virtual ~ConnectorSinkExecutor() {}
|
||||
|
||||
virtual Status init() = 0;
|
||||
|
||||
ThreadPool* get_thread_pool() { return _thread_pool.get(); }
|
||||
|
||||
std::unique_ptr<ThreadPoolToken> create_token() {
|
||||
return _thread_pool->new_token(ThreadPool::ExecutionMode::SERIAL);
|
||||
}
|
||||
|
||||
Status refresh_max_thread_num() {
|
||||
if (_thread_pool != nullptr) {
|
||||
return _thread_pool->update_max_threads(calc_max_thread_num());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual int calc_max_thread_num() = 0;
|
||||
|
||||
protected:
|
||||
std::string _executor_name;
|
||||
std::unique_ptr<ThreadPool> _thread_pool;
|
||||
};
|
||||
|
||||
class ConnectorSinkSpillExecutor : public ConnectorSinkExecutor {
|
||||
public:
|
||||
ConnectorSinkSpillExecutor() : ConnectorSinkExecutor("conn_sink_spill") {}
|
||||
|
||||
Status init() override;
|
||||
|
||||
protected:
|
||||
int calc_max_thread_num() override;
|
||||
};
|
||||
|
||||
class ChunkSpillTask final : public Runnable {
|
||||
public:
|
||||
ChunkSpillTask(LoadChunkSpiller* load_chunk_spiller, ChunkPtr chunk,
|
||||
std::function<void(ChunkPtr chunk, const StatusOr<size_t>&)> cb)
|
||||
: _load_chunk_spiller(load_chunk_spiller), _chunk(chunk), _cb(std::move(cb)) {}
|
||||
|
||||
~ChunkSpillTask() override = default;
|
||||
|
||||
void run() override;
|
||||
|
||||
private:
|
||||
LoadChunkSpiller* _load_chunk_spiller;
|
||||
ChunkPtr _chunk;
|
||||
std::function<void(ChunkPtr, const StatusOr<size_t>&)> _cb;
|
||||
};
|
||||
|
||||
class MergeBlockTask : public Runnable {
|
||||
public:
|
||||
MergeBlockTask(SpillPartitionChunkWriter* writer, std::function<void(const Status&)> cb)
|
||||
: _writer(writer), _cb(std::move(cb)) {}
|
||||
|
||||
void run() override;
|
||||
|
||||
private:
|
||||
SpillPartitionChunkWriter* _writer;
|
||||
std::function<void(const Status&)> _cb;
|
||||
};
|
||||
|
||||
} // namespace starrocks::connector
|
||||
|
|
@ -14,7 +14,6 @@
|
|||
|
||||
#include "connector/es_connector.h"
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "exec/es/es_predicate.h"
|
||||
#include "exec/es/es_query_builder.h"
|
||||
#include "exec/es/es_scan_reader.h"
|
||||
|
|
@ -22,6 +21,7 @@
|
|||
#include "exec/es/es_scroll_query.h"
|
||||
#include "exec/exec_node.h"
|
||||
#include "exprs/expr.h"
|
||||
#include "service/backend_options.h"
|
||||
#include "storage/chunk_helper.h"
|
||||
|
||||
namespace starrocks::connector {
|
||||
|
|
|
|||
|
|
@ -31,12 +31,10 @@ namespace starrocks::connector {
|
|||
|
||||
FileChunkSink::FileChunkSink(std::vector<std::string> partition_columns,
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>>&& partition_column_evaluators,
|
||||
std::unique_ptr<LocationProvider> location_provider,
|
||||
std::unique_ptr<formats::FileWriterFactory> file_writer_factory, int64_t max_file_size,
|
||||
std::unique_ptr<PartitionChunkWriterFactory> partition_chunk_writer_factory,
|
||||
RuntimeState* state)
|
||||
: ConnectorChunkSink(std::move(partition_columns), std::move(partition_column_evaluators),
|
||||
std::move(location_provider), std::move(file_writer_factory), max_file_size, state, true) {
|
||||
}
|
||||
std::move(partition_chunk_writer_factory), state, true) {}
|
||||
|
||||
void FileChunkSink::callback_on_commit(const CommitResult& result) {
|
||||
_rollback_actions.push_back(std::move(result.rollback_action));
|
||||
|
|
@ -51,25 +49,25 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> FileChunkSinkProvider::create_chun
|
|||
auto runtime_state = ctx->fragment_context->runtime_state();
|
||||
auto fs = FileSystem::CreateUniqueFromString(ctx->path, FSOptions(&ctx->cloud_conf)).value();
|
||||
auto column_evaluators = ColumnEvaluator::clone(ctx->column_evaluators);
|
||||
auto location_provider = std::make_unique<connector::LocationProvider>(
|
||||
auto location_provider = std::make_shared<connector::LocationProvider>(
|
||||
ctx->path, print_id(ctx->fragment_context->query_id()), runtime_state->be_number(), driver_id,
|
||||
boost::to_lower_copy(ctx->format));
|
||||
|
||||
std::unique_ptr<formats::FileWriterFactory> file_writer_factory;
|
||||
std::shared_ptr<formats::FileWriterFactory> file_writer_factory;
|
||||
if (boost::iequals(ctx->format, formats::PARQUET)) {
|
||||
file_writer_factory = std::make_unique<formats::ParquetFileWriterFactory>(
|
||||
file_writer_factory = std::make_shared<formats::ParquetFileWriterFactory>(
|
||||
std::move(fs), ctx->compression_type, ctx->options, ctx->column_names, std::move(column_evaluators),
|
||||
std::nullopt, ctx->executor, runtime_state);
|
||||
} else if (boost::iequals(ctx->format, formats::ORC)) {
|
||||
file_writer_factory = std::make_unique<formats::ORCFileWriterFactory>(
|
||||
file_writer_factory = std::make_shared<formats::ORCFileWriterFactory>(
|
||||
std::move(fs), ctx->compression_type, ctx->options, ctx->column_names, std::move(column_evaluators),
|
||||
ctx->executor, runtime_state);
|
||||
} else if (boost::iequals(ctx->format, formats::CSV)) {
|
||||
file_writer_factory = std::make_unique<formats::CSVFileWriterFactory>(
|
||||
file_writer_factory = std::make_shared<formats::CSVFileWriterFactory>(
|
||||
std::move(fs), ctx->compression_type, ctx->options, ctx->column_names, std::move(column_evaluators),
|
||||
ctx->executor, runtime_state);
|
||||
} else {
|
||||
file_writer_factory = std::make_unique<formats::UnknownFileWriterFactory>(ctx->format);
|
||||
file_writer_factory = std::make_shared<formats::UnknownFileWriterFactory>(ctx->format);
|
||||
}
|
||||
|
||||
std::vector<std::string> partition_columns;
|
||||
|
|
@ -78,9 +76,27 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> FileChunkSinkProvider::create_chun
|
|||
partition_columns.push_back(ctx->column_names[idx]);
|
||||
partition_column_evaluators.push_back(ctx->column_evaluators[idx]->clone());
|
||||
}
|
||||
|
||||
std::unique_ptr<PartitionChunkWriterFactory> partition_chunk_writer_factory;
|
||||
// Disable the load spill for file sink temperarily
|
||||
if (/* config::enable_connector_sink_spill */ false) {
|
||||
auto partition_chunk_writer_ctx =
|
||||
std::make_shared<SpillPartitionChunkWriterContext>(SpillPartitionChunkWriterContext{
|
||||
{file_writer_factory, location_provider, ctx->max_file_size, partition_columns.empty()},
|
||||
ctx->fragment_context,
|
||||
nullptr,
|
||||
nullptr});
|
||||
partition_chunk_writer_factory = std::make_unique<SpillPartitionChunkWriterFactory>(partition_chunk_writer_ctx);
|
||||
} else {
|
||||
auto partition_chunk_writer_ctx =
|
||||
std::make_shared<BufferPartitionChunkWriterContext>(BufferPartitionChunkWriterContext{
|
||||
{file_writer_factory, location_provider, ctx->max_file_size, partition_columns.empty()}});
|
||||
partition_chunk_writer_factory =
|
||||
std::make_unique<BufferPartitionChunkWriterFactory>(partition_chunk_writer_ctx);
|
||||
}
|
||||
|
||||
return std::make_unique<connector::FileChunkSink>(partition_columns, std::move(partition_column_evaluators),
|
||||
std::move(location_provider), std::move(file_writer_factory),
|
||||
ctx->max_file_size, runtime_state);
|
||||
std::move(partition_chunk_writer_factory), runtime_state);
|
||||
}
|
||||
|
||||
} // namespace starrocks::connector
|
||||
|
|
|
|||
|
|
@ -36,9 +36,7 @@ class FileChunkSink : public ConnectorChunkSink {
|
|||
public:
|
||||
FileChunkSink(std::vector<std::string> partition_columns,
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>>&& partition_column_evaluators,
|
||||
std::unique_ptr<LocationProvider> location_provider,
|
||||
std::unique_ptr<formats::FileWriterFactory> file_writer_factory, int64_t max_file_size,
|
||||
RuntimeState* state);
|
||||
std::unique_ptr<PartitionChunkWriterFactory> partition_chunk_writer_factory, RuntimeState* state);
|
||||
|
||||
~FileChunkSink() override = default;
|
||||
|
||||
|
|
|
|||
|
|
@ -29,12 +29,10 @@ namespace starrocks::connector {
|
|||
|
||||
HiveChunkSink::HiveChunkSink(std::vector<std::string> partition_columns,
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>>&& partition_column_evaluators,
|
||||
std::unique_ptr<LocationProvider> location_provider,
|
||||
std::unique_ptr<formats::FileWriterFactory> file_writer_factory, int64_t max_file_size,
|
||||
std::unique_ptr<PartitionChunkWriterFactory> partition_chunk_writer_factory,
|
||||
RuntimeState* state)
|
||||
: ConnectorChunkSink(std::move(partition_columns), std::move(partition_column_evaluators),
|
||||
std::move(location_provider), std::move(file_writer_factory), max_file_size, state,
|
||||
false) {}
|
||||
std::move(partition_chunk_writer_factory), state, false) {}
|
||||
|
||||
void HiveChunkSink::callback_on_commit(const CommitResult& result) {
|
||||
_rollback_actions.push_back(std::move(result.rollback_action));
|
||||
|
|
@ -57,34 +55,52 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> HiveChunkSinkProvider::create_chun
|
|||
auto runtime_state = ctx->fragment_context->runtime_state();
|
||||
auto fs = FileSystem::CreateUniqueFromString(ctx->path, FSOptions(&ctx->cloud_conf)).value(); // must succeed
|
||||
auto data_column_evaluators = ColumnEvaluator::clone(ctx->data_column_evaluators);
|
||||
auto location_provider = std::make_unique<connector::LocationProvider>(
|
||||
auto location_provider = std::make_shared<connector::LocationProvider>(
|
||||
ctx->path, print_id(ctx->fragment_context->query_id()), runtime_state->be_number(), driver_id,
|
||||
boost::to_lower_copy(ctx->format));
|
||||
|
||||
std::unique_ptr<formats::FileWriterFactory> file_writer_factory;
|
||||
std::shared_ptr<formats::FileWriterFactory> file_writer_factory;
|
||||
if (boost::iequals(ctx->format, formats::PARQUET)) {
|
||||
// ensure hive compatibility since hive 3 and lower version accepts specific encoding
|
||||
ctx->options[formats::ParquetWriterOptions::USE_LEGACY_DECIMAL_ENCODING] = "true";
|
||||
ctx->options[formats::ParquetWriterOptions::USE_INT96_TIMESTAMP_ENCODING] = "true";
|
||||
file_writer_factory = std::make_unique<formats::ParquetFileWriterFactory>(
|
||||
file_writer_factory = std::make_shared<formats::ParquetFileWriterFactory>(
|
||||
std::move(fs), ctx->compression_type, ctx->options, ctx->data_column_names,
|
||||
std::move(data_column_evaluators), std::nullopt, ctx->executor, runtime_state);
|
||||
} else if (boost::iequals(ctx->format, formats::ORC)) {
|
||||
file_writer_factory = std::make_unique<formats::ORCFileWriterFactory>(
|
||||
file_writer_factory = std::make_shared<formats::ORCFileWriterFactory>(
|
||||
std::move(fs), ctx->compression_type, ctx->options, ctx->data_column_names,
|
||||
std::move(data_column_evaluators), ctx->executor, runtime_state);
|
||||
} else if (boost::iequals(ctx->format, formats::TEXTFILE)) {
|
||||
file_writer_factory = std::make_unique<formats::CSVFileWriterFactory>(
|
||||
file_writer_factory = std::make_shared<formats::CSVFileWriterFactory>(
|
||||
std::move(fs), ctx->compression_type, ctx->options, ctx->data_column_names,
|
||||
std::move(data_column_evaluators), ctx->executor, runtime_state);
|
||||
} else {
|
||||
file_writer_factory = std::make_unique<formats::UnknownFileWriterFactory>(ctx->format);
|
||||
file_writer_factory = std::make_shared<formats::UnknownFileWriterFactory>(ctx->format);
|
||||
}
|
||||
|
||||
std::unique_ptr<PartitionChunkWriterFactory> partition_chunk_writer_factory;
|
||||
// Disable the load spill for hive sink temperarily
|
||||
if (/* config::enable_connector_sink_spill */ false) {
|
||||
auto partition_chunk_writer_ctx = std::make_shared<SpillPartitionChunkWriterContext>(
|
||||
SpillPartitionChunkWriterContext{{file_writer_factory, location_provider, ctx->max_file_size,
|
||||
ctx->partition_column_names.empty()},
|
||||
ctx->fragment_context,
|
||||
nullptr,
|
||||
nullptr});
|
||||
partition_chunk_writer_factory = std::make_unique<SpillPartitionChunkWriterFactory>(partition_chunk_writer_ctx);
|
||||
} else {
|
||||
auto partition_chunk_writer_ctx = std::make_shared<BufferPartitionChunkWriterContext>(
|
||||
BufferPartitionChunkWriterContext{{file_writer_factory, location_provider, ctx->max_file_size,
|
||||
ctx->partition_column_names.empty()}});
|
||||
partition_chunk_writer_factory =
|
||||
std::make_unique<BufferPartitionChunkWriterFactory>(partition_chunk_writer_ctx);
|
||||
}
|
||||
|
||||
auto partition_column_evaluators = ColumnEvaluator::clone(ctx->partition_column_evaluators);
|
||||
return std::make_unique<connector::HiveChunkSink>(
|
||||
ctx->partition_column_names, std::move(partition_column_evaluators), std::move(location_provider),
|
||||
std::move(file_writer_factory), ctx->max_file_size, runtime_state);
|
||||
return std::make_unique<connector::HiveChunkSink>(ctx->partition_column_names,
|
||||
std::move(partition_column_evaluators),
|
||||
std::move(partition_chunk_writer_factory), runtime_state);
|
||||
}
|
||||
|
||||
} // namespace starrocks::connector
|
||||
|
|
|
|||
|
|
@ -38,9 +38,7 @@ class HiveChunkSink : public ConnectorChunkSink {
|
|||
public:
|
||||
HiveChunkSink(std::vector<std::string> partition_columns,
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>>&& partition_column_evaluators,
|
||||
std::unique_ptr<LocationProvider> location_provider,
|
||||
std::unique_ptr<formats::FileWriterFactory> file_writer_factory, int64_t max_file_size,
|
||||
RuntimeState* state);
|
||||
std::unique_ptr<PartitionChunkWriterFactory> partition_chunk_writer_factory, RuntimeState* state);
|
||||
|
||||
~HiveChunkSink() override = default;
|
||||
|
||||
|
|
|
|||
|
|
@ -30,11 +30,10 @@ namespace starrocks::connector {
|
|||
|
||||
IcebergChunkSink::IcebergChunkSink(std::vector<std::string> partition_columns, std::vector<std::string> transform_exprs,
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>>&& partition_column_evaluators,
|
||||
std::unique_ptr<LocationProvider> location_provider,
|
||||
std::unique_ptr<formats::FileWriterFactory> file_writer_factory,
|
||||
int64_t max_file_size, RuntimeState* state)
|
||||
std::unique_ptr<PartitionChunkWriterFactory> partition_chunk_writer_factory,
|
||||
RuntimeState* state)
|
||||
: ConnectorChunkSink(std::move(partition_columns), std::move(partition_column_evaluators),
|
||||
std::move(location_provider), std::move(file_writer_factory), max_file_size, state, true),
|
||||
std::move(partition_chunk_writer_factory), state, true),
|
||||
_transform_exprs(std::move(transform_exprs)) {}
|
||||
|
||||
void IcebergChunkSink::callback_on_commit(const CommitResult& result) {
|
||||
|
|
@ -84,25 +83,42 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> IcebergChunkSinkProvider::create_c
|
|||
auto runtime_state = ctx->fragment_context->runtime_state();
|
||||
auto fs = FileSystem::CreateUniqueFromString(ctx->path, FSOptions(&ctx->cloud_conf)).value();
|
||||
auto column_evaluators = ColumnEvaluator::clone(ctx->column_evaluators);
|
||||
auto location_provider = std::make_unique<connector::LocationProvider>(
|
||||
auto location_provider = std::make_shared<connector::LocationProvider>(
|
||||
ctx->path, print_id(ctx->fragment_context->query_id()), runtime_state->be_number(), driver_id,
|
||||
boost::to_lower_copy(ctx->format));
|
||||
|
||||
std::unique_ptr<formats::FileWriterFactory> file_writer_factory;
|
||||
if (boost::iequals(ctx->format, formats::PARQUET)) {
|
||||
file_writer_factory = std::make_unique<formats::ParquetFileWriterFactory>(
|
||||
std::move(fs), ctx->compression_type, ctx->options, ctx->column_names, std::move(column_evaluators),
|
||||
ctx->parquet_field_ids, ctx->executor, runtime_state);
|
||||
} else {
|
||||
file_writer_factory = std::make_unique<formats::UnknownFileWriterFactory>(ctx->format);
|
||||
}
|
||||
|
||||
std::vector<std::string>& partition_columns = ctx->partition_column_names;
|
||||
std::vector<std::string>& transform_exprs = ctx->transform_exprs;
|
||||
auto partition_evaluators = ColumnEvaluator::clone(ctx->partition_evaluators);
|
||||
return std::make_unique<connector::IcebergChunkSink>(
|
||||
partition_columns, transform_exprs, std::move(partition_evaluators), std::move(location_provider),
|
||||
std::move(file_writer_factory), ctx->max_file_size, runtime_state);
|
||||
std::shared_ptr<formats::FileWriterFactory> file_writer_factory;
|
||||
if (boost::iequals(ctx->format, formats::PARQUET)) {
|
||||
file_writer_factory = std::make_shared<formats::ParquetFileWriterFactory>(
|
||||
std::move(fs), ctx->compression_type, ctx->options, ctx->column_names, std::move(column_evaluators),
|
||||
ctx->parquet_field_ids, ctx->executor, runtime_state);
|
||||
} else {
|
||||
file_writer_factory = std::make_shared<formats::UnknownFileWriterFactory>(ctx->format);
|
||||
}
|
||||
|
||||
std::unique_ptr<PartitionChunkWriterFactory> partition_chunk_writer_factory;
|
||||
if (config::enable_connector_sink_spill) {
|
||||
auto partition_chunk_writer_ctx =
|
||||
std::make_shared<SpillPartitionChunkWriterContext>(SpillPartitionChunkWriterContext{
|
||||
{file_writer_factory, location_provider, ctx->max_file_size, partition_columns.empty()},
|
||||
ctx->fragment_context,
|
||||
runtime_state->desc_tbl().get_tuple_descriptor(ctx->tuple_desc_id),
|
||||
ctx->sort_ordering});
|
||||
partition_chunk_writer_factory = std::make_unique<SpillPartitionChunkWriterFactory>(partition_chunk_writer_ctx);
|
||||
} else {
|
||||
auto partition_chunk_writer_ctx =
|
||||
std::make_shared<BufferPartitionChunkWriterContext>(BufferPartitionChunkWriterContext{
|
||||
{file_writer_factory, location_provider, ctx->max_file_size, partition_columns.empty()}});
|
||||
partition_chunk_writer_factory =
|
||||
std::make_unique<BufferPartitionChunkWriterFactory>(partition_chunk_writer_ctx);
|
||||
}
|
||||
|
||||
return std::make_unique<connector::IcebergChunkSink>(partition_columns, transform_exprs,
|
||||
std::move(partition_evaluators),
|
||||
std::move(partition_chunk_writer_factory), runtime_state);
|
||||
}
|
||||
|
||||
Status IcebergChunkSink::add(Chunk* chunk) {
|
||||
|
|
|
|||
|
|
@ -37,9 +37,7 @@ class IcebergChunkSink : public ConnectorChunkSink {
|
|||
public:
|
||||
IcebergChunkSink(std::vector<std::string> partition_columns, std::vector<std::string> transform_exprs,
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>>&& partition_column_evaluators,
|
||||
std::unique_ptr<LocationProvider> location_provider,
|
||||
std::unique_ptr<formats::FileWriterFactory> file_writer_factory, int64_t max_file_size,
|
||||
RuntimeState* state);
|
||||
std::unique_ptr<PartitionChunkWriterFactory> partition_chunk_writer_factory, RuntimeState* state);
|
||||
|
||||
~IcebergChunkSink() override = default;
|
||||
|
||||
|
|
@ -70,6 +68,8 @@ struct IcebergChunkSinkContext : public ConnectorChunkSinkContext {
|
|||
PriorityThreadPool* executor = nullptr;
|
||||
TCloudConfiguration cloud_conf;
|
||||
pipeline::FragmentContext* fragment_context = nullptr;
|
||||
int tuple_desc_id = -1;
|
||||
std::shared_ptr<SortOrdering> sort_ordering;
|
||||
};
|
||||
|
||||
class IcebergChunkSinkProvider : public ConnectorChunkSinkProvider {
|
||||
|
|
|
|||
|
|
@ -434,7 +434,15 @@ Status LakeDataSource::_extend_schema_by_access_paths() {
|
|||
column.set_type(value_type);
|
||||
column.set_length(path->value_type().len);
|
||||
column.set_is_nullable(true);
|
||||
column.set_extended_info(std::make_unique<ExtendedColumnInfo>(path.get(), root_column_index));
|
||||
int32_t root_uid = _tablet_schema->column(static_cast<size_t>(root_column_index)).unique_id();
|
||||
column.set_extended_info(std::make_unique<ExtendedColumnInfo>(path.get(), root_uid));
|
||||
|
||||
// For UNIQUE/AGG tables, extended flat JSON subcolumns behave like value columns
|
||||
// and must carry a valid aggregation for pre-aggregation. Use REPLACE.
|
||||
auto keys_type = _tablet_schema->keys_type();
|
||||
if (keys_type == KeysType::UNIQUE_KEYS || keys_type == KeysType::AGG_KEYS) {
|
||||
column.set_aggregation(StorageAggregateType::STORAGE_AGGREGATE_REPLACE);
|
||||
}
|
||||
|
||||
tmp_schema->append_column(column);
|
||||
VLOG(2) << "extend the access path column: " << path->linear_path();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,339 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "connector/partition_chunk_writer.h"
|
||||
|
||||
#include "column/chunk.h"
|
||||
#include "common/status.h"
|
||||
#include "connector/async_flush_stream_poller.h"
|
||||
#include "connector/connector_sink_executor.h"
|
||||
#include "connector/sink_memory_manager.h"
|
||||
#include "exec/pipeline/fragment_context.h"
|
||||
#include "formats/file_writer.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "storage/chunk_helper.h"
|
||||
#include "storage/load_spill_block_manager.h"
|
||||
#include "storage/storage_engine.h"
|
||||
#include "storage/types.h"
|
||||
#include "util/monotime.h"
|
||||
|
||||
namespace starrocks::connector {
|
||||
|
||||
PartitionChunkWriter::PartitionChunkWriter(std::string partition, std::vector<int8_t> partition_field_null_list,
|
||||
const std::shared_ptr<PartitionChunkWriterContext>& ctx)
|
||||
: _partition(std::move(partition)),
|
||||
_partition_field_null_list(std::move(partition_field_null_list)),
|
||||
_file_writer_factory(ctx->file_writer_factory),
|
||||
_location_provider(ctx->location_provider),
|
||||
_max_file_size(ctx->max_file_size),
|
||||
_is_default_partition(ctx->is_default_partition) {
|
||||
_commit_extra_data.resize(_partition_field_null_list.size(), '0');
|
||||
std::transform(_partition_field_null_list.begin(), _partition_field_null_list.end(), _commit_extra_data.begin(),
|
||||
[](int8_t b) { return b + '0'; });
|
||||
}
|
||||
|
||||
Status PartitionChunkWriter::create_file_writer_if_needed() {
|
||||
if (!_file_writer) {
|
||||
std::string path = _is_default_partition ? _location_provider->get() : _location_provider->get(_partition);
|
||||
ASSIGN_OR_RETURN(auto new_writer_and_stream, _file_writer_factory->create(path));
|
||||
_file_writer = std::move(new_writer_and_stream.writer);
|
||||
_out_stream = std::move(new_writer_and_stream.stream);
|
||||
RETURN_IF_ERROR(_file_writer->init());
|
||||
_io_poller->enqueue(_out_stream);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void PartitionChunkWriter::commit_file() {
|
||||
if (!_file_writer) {
|
||||
return;
|
||||
}
|
||||
auto result = _file_writer->commit();
|
||||
_commit_callback(result.set_extra_data(_commit_extra_data));
|
||||
_file_writer = nullptr;
|
||||
VLOG(3) << "commit to remote file, filename: " << _out_stream->filename()
|
||||
<< ", size: " << result.file_statistics.file_size;
|
||||
}
|
||||
|
||||
Status BufferPartitionChunkWriter::init() {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status BufferPartitionChunkWriter::write(Chunk* chunk) {
|
||||
RETURN_IF_ERROR(create_file_writer_if_needed());
|
||||
if (_file_writer->get_written_bytes() >= _max_file_size) {
|
||||
commit_file();
|
||||
}
|
||||
return _file_writer->write(chunk);
|
||||
}
|
||||
|
||||
Status BufferPartitionChunkWriter::flush() {
|
||||
commit_file();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status BufferPartitionChunkWriter::finish() {
|
||||
commit_file();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
SpillPartitionChunkWriter::SpillPartitionChunkWriter(std::string partition,
|
||||
std::vector<int8_t> partition_field_null_list,
|
||||
const std::shared_ptr<SpillPartitionChunkWriterContext>& ctx)
|
||||
: PartitionChunkWriter(std::move(partition), std::move(partition_field_null_list), ctx),
|
||||
_fragment_context(ctx->fragment_context),
|
||||
_sort_ordering(ctx->sort_ordering) {
|
||||
_chunk_spill_token = ExecEnv::GetInstance()->connector_sink_spill_executor()->create_token();
|
||||
_block_merge_token = StorageEngine::instance()->load_spill_block_merge_executor()->create_token();
|
||||
_tuple_desc = ctx->tuple_desc;
|
||||
}
|
||||
|
||||
SpillPartitionChunkWriter::~SpillPartitionChunkWriter() {
|
||||
if (_chunk_spill_token) {
|
||||
_chunk_spill_token->shutdown();
|
||||
}
|
||||
if (_block_merge_token) {
|
||||
_block_merge_token->shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
Status SpillPartitionChunkWriter::init() {
|
||||
std::string root_location =
|
||||
_is_default_partition ? _location_provider->root_location() : _location_provider->root_location(_partition);
|
||||
_load_spill_block_mgr = std::make_unique<LoadSpillBlockManager>(
|
||||
_fragment_context->query_id(), _fragment_context->fragment_instance_id(), root_location);
|
||||
RETURN_IF_ERROR(_load_spill_block_mgr->init());
|
||||
_load_chunk_spiller = std::make_unique<LoadChunkSpiller>(_load_spill_block_mgr.get(),
|
||||
_fragment_context->runtime_state()->runtime_profile());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SpillPartitionChunkWriter::write(Chunk* chunk) {
|
||||
RETURN_IF_ERROR(create_file_writer_if_needed());
|
||||
_chunks.push_back(chunk->clone_unique());
|
||||
_chunk_bytes_usage += chunk->bytes_usage();
|
||||
if (!_base_chunk) {
|
||||
_base_chunk = _chunks.back();
|
||||
}
|
||||
|
||||
int64_t max_flush_batch_size = _file_writer->get_flush_batch_size();
|
||||
if (_sort_ordering || max_flush_batch_size == 0) {
|
||||
max_flush_batch_size = _max_file_size;
|
||||
}
|
||||
if (_chunk_bytes_usage >= max_flush_batch_size) {
|
||||
return _flush_to_file();
|
||||
} else if (_mem_insufficent()) {
|
||||
return _spill();
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SpillPartitionChunkWriter::flush() {
|
||||
RETURN_IF(!_file_writer, Status::OK());
|
||||
return _spill();
|
||||
}
|
||||
|
||||
Status SpillPartitionChunkWriter::finish() {
|
||||
_chunk_spill_token->wait();
|
||||
// If no chunks have been spilled, flush data to remote file directly.
|
||||
if (_load_chunk_spiller->empty()) {
|
||||
VLOG(2) << "flush to remote directly when finish, query_id: " << print_id(_fragment_context->query_id())
|
||||
<< ", fragment_instance_id: " << print_id(_fragment_context->fragment_instance_id());
|
||||
RETURN_IF_ERROR(_flush_to_file());
|
||||
commit_file();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
auto cb = [this](const Status& st) {
|
||||
LOG_IF(ERROR, !st.ok()) << "fail to merge spill blocks, query_id: " << print_id(_fragment_context->query_id())
|
||||
<< ", fragment_instance_id: " << print_id(_fragment_context->fragment_instance_id());
|
||||
_handle_err(st);
|
||||
commit_file();
|
||||
};
|
||||
auto merge_task = std::make_shared<MergeBlockTask>(this, cb);
|
||||
return _block_merge_token->submit(merge_task);
|
||||
}
|
||||
|
||||
const int64_t SpillPartitionChunkWriter::kWaitMilliseconds = 10;
|
||||
|
||||
bool SpillPartitionChunkWriter::is_finished() {
|
||||
bool finished = _chunk_spill_token->wait_for(MonoDelta::FromMilliseconds(kWaitMilliseconds)) &&
|
||||
_block_merge_token->wait_for(MonoDelta::FromMilliseconds(kWaitMilliseconds));
|
||||
return finished;
|
||||
}
|
||||
|
||||
Status SpillPartitionChunkWriter::merge_blocks() {
|
||||
RETURN_IF_ERROR(flush());
|
||||
_chunk_spill_token->wait();
|
||||
|
||||
auto write_func = [this](Chunk* chunk) { return _flush_chunk(chunk, false); };
|
||||
auto flush_func = []() {
|
||||
// do nothing because we check and commit when writing chunk.
|
||||
return Status::OK();
|
||||
};
|
||||
Status st = _load_chunk_spiller->merge_write(_max_file_size, _sort_ordering != nullptr, false /* do_agg */,
|
||||
write_func, flush_func);
|
||||
VLOG(2) << "finish merge blocks, query_id: " << _fragment_context->query_id() << ", status: " << st.message();
|
||||
return st;
|
||||
}
|
||||
|
||||
Status SpillPartitionChunkWriter::_sort() {
|
||||
RETURN_IF(!_result_chunk, Status::OK());
|
||||
|
||||
auto chunk = _result_chunk->clone_empty_with_schema(0);
|
||||
_result_chunk->swap_chunk(*chunk);
|
||||
SmallPermutation perm = create_small_permutation(static_cast<uint32_t>(chunk->num_rows()));
|
||||
Columns columns;
|
||||
for (auto sort_key_idx : _sort_ordering->sort_key_idxes) {
|
||||
columns.push_back(chunk->get_column_by_index(sort_key_idx));
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(stable_sort_and_tie_columns(false, columns, _sort_ordering->sort_descs, &perm));
|
||||
std::vector<uint32_t> selective;
|
||||
permutate_to_selective(perm, &selective);
|
||||
_result_chunk->rolling_append_selective(*chunk, selective.data(), 0, chunk->num_rows());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SpillPartitionChunkWriter::_spill() {
|
||||
RETURN_IF(_chunks.empty(), Status::OK());
|
||||
|
||||
_merge_chunks();
|
||||
if (_sort_ordering) {
|
||||
RETURN_IF_ERROR(_sort());
|
||||
}
|
||||
|
||||
auto callback = [this](const ChunkPtr& chunk, const StatusOr<size_t>& res) {
|
||||
if (!res.ok()) {
|
||||
LOG(ERROR) << "fail to spill connector partition chunk sink, write it to remote file directly. msg: "
|
||||
<< res.status().message();
|
||||
Status st = _flush_chunk(chunk.get(), true);
|
||||
_handle_err(st);
|
||||
} else {
|
||||
VLOG(3) << "spill chunk data, filename: " << out_stream()->filename() << ", size: " << chunk->bytes_usage();
|
||||
}
|
||||
_spilling_bytes_usage.fetch_sub(chunk->bytes_usage(), std::memory_order_relaxed);
|
||||
};
|
||||
auto spill_task = std::make_shared<ChunkSpillTask>(_load_chunk_spiller.get(), _result_chunk, callback);
|
||||
RETURN_IF_ERROR(_chunk_spill_token->submit(spill_task));
|
||||
_spilling_bytes_usage.fetch_add(_result_chunk->bytes_usage(), std::memory_order_relaxed);
|
||||
_chunk_bytes_usage = 0;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SpillPartitionChunkWriter::_flush_to_file() {
|
||||
RETURN_IF(_chunks.empty(), Status::OK());
|
||||
|
||||
if (!_sort_ordering) {
|
||||
for (auto& chunk : _chunks) {
|
||||
RETURN_IF_ERROR(_flush_chunk(chunk.get(), false));
|
||||
}
|
||||
} else {
|
||||
_merge_chunks();
|
||||
RETURN_IF_ERROR(_sort());
|
||||
RETURN_IF_ERROR(_flush_chunk(_result_chunk.get(), true));
|
||||
}
|
||||
_chunks.clear();
|
||||
_chunk_bytes_usage = 0;
|
||||
|
||||
return Status::OK();
|
||||
};
|
||||
|
||||
Status SpillPartitionChunkWriter::_flush_chunk(Chunk* chunk, bool split) {
|
||||
if (chunk->get_slot_id_to_index_map().empty()) {
|
||||
auto slot_map = _base_chunk->get_slot_id_to_index_map();
|
||||
for (auto& it : slot_map) {
|
||||
chunk->set_slot_id_to_index(it.first, it.second);
|
||||
}
|
||||
}
|
||||
|
||||
if (!split) {
|
||||
return _write_chunk(chunk);
|
||||
}
|
||||
size_t chunk_size = config::vector_chunk_size;
|
||||
for (size_t offset = 0; offset < chunk->num_rows(); offset += chunk_size) {
|
||||
auto sub_chunk = chunk->clone_empty(chunk_size);
|
||||
size_t num_rows = std::min(chunk_size, chunk->num_rows() - offset);
|
||||
sub_chunk->append(*chunk, offset, num_rows);
|
||||
RETURN_IF_ERROR(_write_chunk(sub_chunk.get()));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SpillPartitionChunkWriter::_write_chunk(Chunk* chunk) {
|
||||
if (_file_writer->get_written_bytes() >= _max_file_size) {
|
||||
commit_file();
|
||||
}
|
||||
RETURN_IF_ERROR(create_file_writer_if_needed());
|
||||
RETURN_IF_ERROR(_file_writer->write(chunk));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void SpillPartitionChunkWriter::_merge_chunks() {
|
||||
if (_chunks.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Create a target chunk with schema to make it can use some
|
||||
// module functions of native table directly.
|
||||
size_t num_rows = std::accumulate(_chunks.begin(), _chunks.end(), 0,
|
||||
[](int sum, const ChunkPtr& chunk) { return sum + chunk->num_rows(); });
|
||||
_result_chunk = _create_schema_chunk(_chunks.front(), num_rows);
|
||||
|
||||
for (auto& chunk : _chunks) {
|
||||
_result_chunk->append(*chunk, 0, chunk->num_rows());
|
||||
chunk.reset();
|
||||
}
|
||||
_chunks.clear();
|
||||
}
|
||||
|
||||
bool SpillPartitionChunkWriter::_mem_insufficent() {
|
||||
// Return false because we will triger spill by sink memory manager.
|
||||
return false;
|
||||
}
|
||||
|
||||
void SpillPartitionChunkWriter::_handle_err(const Status& st) {
|
||||
if (!st.ok()) {
|
||||
_error_handler(st);
|
||||
}
|
||||
}
|
||||
|
||||
SchemaPtr SpillPartitionChunkWriter::_make_schema() {
|
||||
Fields fields;
|
||||
for (auto& slot : _tuple_desc->slots()) {
|
||||
TypeDescriptor type_desc = slot->type();
|
||||
TypeInfoPtr type_info = get_type_info(type_desc.type, type_desc.precision, type_desc.scale);
|
||||
auto field = std::make_shared<Field>(slot->id(), slot->col_name(), type_info, slot->is_nullable());
|
||||
fields.push_back(field);
|
||||
}
|
||||
SchemaPtr schema =
|
||||
std::make_shared<Schema>(std::move(fields), KeysType::DUP_KEYS,
|
||||
_sort_ordering ? _sort_ordering->sort_key_idxes : std::vector<uint32_t>());
|
||||
return schema;
|
||||
}
|
||||
|
||||
ChunkPtr SpillPartitionChunkWriter::_create_schema_chunk(const ChunkPtr& base_chunk, size_t num_rows) {
|
||||
if (!_schema) {
|
||||
auto schema = base_chunk->schema();
|
||||
if (schema) {
|
||||
_schema = schema;
|
||||
} else {
|
||||
_schema = _make_schema();
|
||||
}
|
||||
}
|
||||
auto chunk = ChunkHelper::new_chunk(*_schema, num_rows);
|
||||
return chunk;
|
||||
}
|
||||
|
||||
} // namespace starrocks::connector
|
||||
|
|
@ -0,0 +1,253 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "column/chunk.h"
|
||||
#include "common/status.h"
|
||||
#include "connector/utils.h"
|
||||
#include "formats/file_writer.h"
|
||||
#include "fs/fs.h"
|
||||
#include "runtime/exec_env.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "storage/load_chunk_spiller.h"
|
||||
#include "util/threadpool.h"
|
||||
|
||||
namespace starrocks::connector {
|
||||
|
||||
using CommitResult = formats::FileWriter::CommitResult;
|
||||
using CommitFunc = std::function<void(const CommitResult& result)>;
|
||||
using ErrorHandleFunc = std::function<void(const Status& status)>;
|
||||
|
||||
class AsyncFlushStreamPoller;
|
||||
|
||||
struct SortOrdering {
|
||||
std::vector<uint32_t> sort_key_idxes;
|
||||
SortDescs sort_descs;
|
||||
};
|
||||
|
||||
struct PartitionChunkWriterContext {
|
||||
std::shared_ptr<formats::FileWriterFactory> file_writer_factory;
|
||||
std::shared_ptr<LocationProvider> location_provider;
|
||||
int64_t max_file_size = 0;
|
||||
bool is_default_partition = false;
|
||||
};
|
||||
|
||||
struct BufferPartitionChunkWriterContext : public PartitionChunkWriterContext {};
|
||||
|
||||
struct SpillPartitionChunkWriterContext : public PartitionChunkWriterContext {
|
||||
pipeline::FragmentContext* fragment_context = nullptr;
|
||||
TupleDescriptor* tuple_desc = nullptr;
|
||||
std::shared_ptr<SortOrdering> sort_ordering;
|
||||
};
|
||||
|
||||
class PartitionChunkWriter {
|
||||
public:
|
||||
PartitionChunkWriter(std::string partition, std::vector<int8_t> partition_field_null_list,
|
||||
const std::shared_ptr<PartitionChunkWriterContext>& ctx);
|
||||
|
||||
virtual ~PartitionChunkWriter() = default;
|
||||
|
||||
virtual Status init() = 0;
|
||||
|
||||
virtual Status write(Chunk* chunk) = 0;
|
||||
|
||||
virtual Status flush() = 0;
|
||||
|
||||
virtual Status finish() = 0;
|
||||
|
||||
virtual bool is_finished() = 0;
|
||||
|
||||
virtual int64_t get_written_bytes() = 0;
|
||||
|
||||
virtual int64_t get_flushable_bytes() = 0;
|
||||
|
||||
const std::string& partition() const { return _partition; }
|
||||
|
||||
const std::vector<int8_t>& partition_field_null_list() const { return _partition_field_null_list; }
|
||||
|
||||
std::shared_ptr<formats::FileWriter> file_writer() { return _file_writer; }
|
||||
|
||||
std::shared_ptr<io::AsyncFlushOutputStream> out_stream() { return _out_stream; }
|
||||
|
||||
void set_io_poller(AsyncFlushStreamPoller* io_poller) { _io_poller = io_poller; }
|
||||
|
||||
void set_commit_callback(const CommitFunc& commit_callback) { _commit_callback = commit_callback; }
|
||||
|
||||
void set_error_handler(const ErrorHandleFunc& error_handler) { _error_handler = error_handler; }
|
||||
|
||||
protected:
|
||||
Status create_file_writer_if_needed();
|
||||
|
||||
void commit_file();
|
||||
|
||||
protected:
|
||||
std::string _partition;
|
||||
std::vector<int8_t> _partition_field_null_list;
|
||||
std::shared_ptr<formats::FileWriterFactory> _file_writer_factory;
|
||||
std::shared_ptr<LocationProvider> _location_provider;
|
||||
int64_t _max_file_size = 0;
|
||||
bool _is_default_partition = false;
|
||||
AsyncFlushStreamPoller* _io_poller = nullptr;
|
||||
|
||||
std::shared_ptr<formats::FileWriter> _file_writer;
|
||||
std::shared_ptr<io::AsyncFlushOutputStream> _out_stream;
|
||||
CommitFunc _commit_callback;
|
||||
std::string _commit_extra_data;
|
||||
ErrorHandleFunc _error_handler = nullptr;
|
||||
};
|
||||
|
||||
class BufferPartitionChunkWriter : public PartitionChunkWriter {
|
||||
public:
|
||||
BufferPartitionChunkWriter(std::string partition, std::vector<int8_t> partition_field_null_list,
|
||||
const std::shared_ptr<BufferPartitionChunkWriterContext>& ctx)
|
||||
: PartitionChunkWriter(std::move(partition), std::move(partition_field_null_list), ctx) {}
|
||||
|
||||
Status init() override;
|
||||
|
||||
Status write(Chunk* chunk) override;
|
||||
|
||||
Status flush() override;
|
||||
|
||||
Status finish() override;
|
||||
|
||||
bool is_finished() override { return true; }
|
||||
|
||||
int64_t get_written_bytes() override { return _file_writer ? _file_writer->get_written_bytes() : 0; }
|
||||
|
||||
int64_t get_flushable_bytes() override { return _file_writer ? _file_writer->get_written_bytes() : 0; }
|
||||
};
|
||||
|
||||
class SpillPartitionChunkWriter : public PartitionChunkWriter {
|
||||
public:
|
||||
SpillPartitionChunkWriter(std::string partition, std::vector<int8_t> partition_field_null_list,
|
||||
const std::shared_ptr<SpillPartitionChunkWriterContext>& ctx);
|
||||
|
||||
~SpillPartitionChunkWriter();
|
||||
|
||||
Status init() override;
|
||||
|
||||
Status write(Chunk* chunk) override;
|
||||
|
||||
Status flush() override;
|
||||
|
||||
Status finish() override;
|
||||
|
||||
bool is_finished() override;
|
||||
|
||||
int64_t get_written_bytes() override {
|
||||
if (!_file_writer) {
|
||||
return 0;
|
||||
}
|
||||
return _chunk_bytes_usage + _spilling_bytes_usage.load(std::memory_order_relaxed) +
|
||||
_file_writer->get_written_bytes();
|
||||
}
|
||||
|
||||
int64_t get_flushable_bytes() override { return _chunk_bytes_usage; }
|
||||
|
||||
Status merge_blocks();
|
||||
|
||||
private:
|
||||
Status _sort();
|
||||
|
||||
Status _spill();
|
||||
|
||||
Status _flush_to_file();
|
||||
|
||||
Status _flush_chunk(Chunk* chunk, bool split);
|
||||
|
||||
Status _write_chunk(Chunk* chunk);
|
||||
|
||||
void _merge_chunks();
|
||||
|
||||
SchemaPtr _make_schema();
|
||||
|
||||
ChunkPtr _create_schema_chunk(const ChunkPtr& base_chunk, size_t row_nums);
|
||||
|
||||
bool _mem_insufficent();
|
||||
|
||||
void _handle_err(const Status& st);
|
||||
|
||||
private:
|
||||
pipeline::FragmentContext* _fragment_context = nullptr;
|
||||
TupleDescriptor* _tuple_desc = nullptr;
|
||||
std::shared_ptr<SortOrdering> _sort_ordering;
|
||||
std::unique_ptr<ThreadPoolToken> _chunk_spill_token;
|
||||
std::unique_ptr<ThreadPoolToken> _block_merge_token;
|
||||
std::unique_ptr<LoadSpillBlockManager> _load_spill_block_mgr;
|
||||
std::shared_ptr<LoadChunkSpiller> _load_chunk_spiller;
|
||||
|
||||
std::list<ChunkPtr> _chunks;
|
||||
int64_t _chunk_bytes_usage = 0;
|
||||
std::atomic<int64_t> _spilling_bytes_usage = 0;
|
||||
ChunkPtr _result_chunk;
|
||||
ChunkPtr _base_chunk;
|
||||
SchemaPtr _schema;
|
||||
|
||||
static const int64_t kWaitMilliseconds;
|
||||
};
|
||||
|
||||
using PartitionChunkWriterPtr = std::shared_ptr<PartitionChunkWriter>;
|
||||
|
||||
class PartitionChunkWriterFactory {
|
||||
public:
|
||||
virtual ~PartitionChunkWriterFactory() = default;
|
||||
|
||||
virtual Status init() = 0;
|
||||
|
||||
virtual PartitionChunkWriterPtr create(std::string partition,
|
||||
std::vector<int8_t> partition_field_null_list) const = 0;
|
||||
};
|
||||
|
||||
class BufferPartitionChunkWriterFactory : public PartitionChunkWriterFactory {
|
||||
public:
|
||||
BufferPartitionChunkWriterFactory(std::shared_ptr<BufferPartitionChunkWriterContext> ctx) : _ctx(ctx) {}
|
||||
|
||||
~BufferPartitionChunkWriterFactory() = default;
|
||||
|
||||
Status init() override { return _ctx->file_writer_factory->init(); }
|
||||
|
||||
PartitionChunkWriterPtr create(std::string partition,
|
||||
std::vector<int8_t> partition_field_null_list) const override {
|
||||
return std::make_shared<BufferPartitionChunkWriter>(std::move(partition), std::move(partition_field_null_list),
|
||||
_ctx);
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<BufferPartitionChunkWriterContext> _ctx;
|
||||
};
|
||||
|
||||
class SpillPartitionChunkWriterFactory : public PartitionChunkWriterFactory {
|
||||
public:
|
||||
SpillPartitionChunkWriterFactory(std::shared_ptr<SpillPartitionChunkWriterContext> ctx) : _ctx(ctx) {}
|
||||
|
||||
~SpillPartitionChunkWriterFactory() = default;
|
||||
|
||||
Status init() override { return _ctx->file_writer_factory->init(); }
|
||||
|
||||
PartitionChunkWriterPtr create(std::string partition,
|
||||
std::vector<int8_t> partition_field_null_list) const override {
|
||||
return std::make_shared<SpillPartitionChunkWriter>(std::move(partition), std::move(partition_field_null_list),
|
||||
_ctx);
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<SpillPartitionChunkWriterContext> _ctx;
|
||||
};
|
||||
|
||||
} // namespace starrocks::connector
|
||||
|
|
@ -18,9 +18,9 @@
|
|||
|
||||
namespace starrocks::connector {
|
||||
|
||||
void SinkOperatorMemoryManager::init(std::map<PartitionKey, WriterStreamPair>* writer_stream_pairs,
|
||||
void SinkOperatorMemoryManager::init(std::map<PartitionKey, PartitionChunkWriterPtr>* partition_chunk_writers,
|
||||
AsyncFlushStreamPoller* io_poller, CommitFunc commit_func) {
|
||||
_candidates = writer_stream_pairs;
|
||||
_candidates = partition_chunk_writers;
|
||||
_commit_func = std::move(commit_func);
|
||||
_io_poller = io_poller;
|
||||
}
|
||||
|
|
@ -30,24 +30,24 @@ bool SinkOperatorMemoryManager::kill_victim() {
|
|||
return false;
|
||||
}
|
||||
|
||||
// find file writer with the largest file size
|
||||
PartitionKey partition;
|
||||
WriterStreamPair* victim = nullptr;
|
||||
for (auto& [key, writer_and_stream] : *_candidates) {
|
||||
if (victim && victim->first->get_written_bytes() > writer_and_stream.first->get_written_bytes()) {
|
||||
// Find a target file writer to flush.
|
||||
// For buffered partition writer, choose the the writer with the largest file size.
|
||||
// For spillable partition writer, choose the the writer with the largest memory size that can be spilled.
|
||||
PartitionChunkWriterPtr victim = nullptr;
|
||||
for (auto& [key, writer] : *_candidates) {
|
||||
if (victim && victim->get_flushable_bytes() > writer->get_flushable_bytes()) {
|
||||
continue;
|
||||
}
|
||||
partition = key;
|
||||
victim = &writer_and_stream;
|
||||
victim = writer;
|
||||
}
|
||||
if (victim == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto result = victim->first->commit();
|
||||
_commit_func(result);
|
||||
LOG(INFO) << "kill victim: " << victim->second->filename() << " size: " << result.file_statistics.file_size;
|
||||
_candidates->erase(partition);
|
||||
// The flush will decrease the writer flushable memory bytes, so it usually
|
||||
// will not be choosed in a short time.
|
||||
auto result = victim->flush();
|
||||
LOG(INFO) << "kill victim: " << victim->out_stream()->filename() << ", result: " << result;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -59,8 +59,8 @@ int64_t SinkOperatorMemoryManager::update_releasable_memory() {
|
|||
|
||||
int64_t SinkOperatorMemoryManager::update_writer_occupied_memory() {
|
||||
int64_t writer_occupied_memory = 0;
|
||||
for (auto& [_, writer_and_stream] : *_candidates) {
|
||||
writer_occupied_memory += writer_and_stream.first->get_written_bytes();
|
||||
for (auto& [_, writer] : *_candidates) {
|
||||
writer_occupied_memory += writer->get_written_bytes();
|
||||
}
|
||||
_writer_occupied_memory.store(writer_occupied_memory);
|
||||
return _writer_occupied_memory;
|
||||
|
|
@ -113,7 +113,6 @@ bool SinkMemoryManager::_apply_on_mem_tracker(SinkOperatorMemoryManager* child_m
|
|||
|
||||
auto available_memory = [&]() { return mem_tracker->limit() - mem_tracker->consumption(); };
|
||||
auto low_watermark = static_cast<int64_t>(mem_tracker->limit() * _low_watermark_ratio);
|
||||
auto high_watermark = static_cast<int64_t>(mem_tracker->limit() * _high_watermark_ratio);
|
||||
auto exceed_urgent_space = [&]() {
|
||||
return _total_writer_occupied_memory() > _query_tracker->limit() * _urgent_space_ratio;
|
||||
};
|
||||
|
|
@ -125,7 +124,7 @@ bool SinkMemoryManager::_apply_on_mem_tracker(SinkOperatorMemoryManager* child_m
|
|||
<< " releasable_memory: " << _total_releasable_memory()
|
||||
<< " writer_allocated_memory: " << _total_writer_occupied_memory();
|
||||
// trigger early close
|
||||
while (exceed_urgent_space() && available_memory() + _total_releasable_memory() < high_watermark) {
|
||||
while (exceed_urgent_space() && available_memory() <= low_watermark) {
|
||||
bool found = child_manager->kill_victim();
|
||||
if (!found) {
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -28,8 +28,8 @@ class SinkOperatorMemoryManager {
|
|||
public:
|
||||
SinkOperatorMemoryManager() = default;
|
||||
|
||||
void init(std::map<PartitionKey, WriterStreamPair>* writer_stream_pairs, AsyncFlushStreamPoller* io_poller,
|
||||
CommitFunc commit_func);
|
||||
void init(std::map<PartitionKey, PartitionChunkWriterPtr>* partition_chunk_writers,
|
||||
AsyncFlushStreamPoller* io_poller, CommitFunc commit_func);
|
||||
|
||||
// return true if a victim is found and killed, otherwise return false
|
||||
bool kill_victim();
|
||||
|
|
@ -45,7 +45,7 @@ public:
|
|||
int64_t writer_occupied_memory() { return _writer_occupied_memory.load(); }
|
||||
|
||||
private:
|
||||
std::map<PartitionKey, WriterStreamPair>* _candidates = nullptr; // reference, owned by sink operator
|
||||
std::map<PartitionKey, PartitionChunkWriterPtr>* _candidates = nullptr; // reference, owned by sink operator
|
||||
CommitFunc _commit_func;
|
||||
AsyncFlushStreamPoller* _io_poller;
|
||||
std::atomic_int64_t _releasable_memory{0};
|
||||
|
|
|
|||
|
|
@ -104,6 +104,12 @@ public:
|
|||
// location = base_path/{query_id}_{be_number}_{driver_id}_index.file_suffix
|
||||
std::string get() { return fmt::format("{}/{}_{}.{}", _base_path, _file_name_prefix, _index++, _file_name_suffix); }
|
||||
|
||||
std::string root_location(const std::string& partition) {
|
||||
return fmt::format("{}/{}", _base_path, PathUtils::remove_trailing_slash(partition));
|
||||
}
|
||||
|
||||
std::string root_location() { return fmt::format("{}", PathUtils::remove_trailing_slash(_base_path)); }
|
||||
|
||||
private:
|
||||
const std::string _base_path;
|
||||
const std::string _file_name_prefix;
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ set(EXEC_FILES
|
|||
aggregator.cpp
|
||||
sorted_streaming_aggregator.cpp
|
||||
aggregate/agg_hash_variant.cpp
|
||||
aggregate/compress_serializer.cpp
|
||||
aggregate/aggregate_base_node.cpp
|
||||
aggregate/aggregate_blocking_node.cpp
|
||||
aggregate/distinct_blocking_node.cpp
|
||||
|
|
|
|||
|
|
@ -0,0 +1,28 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace starrocks {
|
||||
using AggDataPtr = uint8_t*;
|
||||
using int128_t = __int128;
|
||||
|
||||
class SliceWithHash;
|
||||
class HashOnSliceWithHash;
|
||||
class EqualOnSliceWithHash;
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -14,27 +14,25 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <any>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
#include "column/column.h"
|
||||
#include "column/column_hash.h"
|
||||
#include "column/column_helper.h"
|
||||
#include "column/hash_set.h"
|
||||
#include "column/type_traits.h"
|
||||
#include "column/vectorized_fwd.h"
|
||||
#include "common/compiler_util.h"
|
||||
#include "exec/aggregate/agg_hash_set.h"
|
||||
#include "exec/aggregate/agg_profile.h"
|
||||
#include "exec/aggregate/compress_serializer.h"
|
||||
#include "gutil/casts.h"
|
||||
#include "gutil/strings/fastmem.h"
|
||||
#include "runtime/mem_pool.h"
|
||||
#include "util/fixed_hash_map.h"
|
||||
#include "util/hash_util.hpp"
|
||||
#include "util/phmap/phmap.h"
|
||||
#include "util/phmap/phmap_dump.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
|
|
@ -245,9 +243,10 @@ struct AggHashMapWithOneNumberKeyWithNullable
|
|||
DCHECK(!key_column->is_nullable());
|
||||
const auto column = down_cast<const ColumnType*>(key_column);
|
||||
|
||||
size_t bucket_count = this->hash_map.bucket_count();
|
||||
|
||||
if (bucket_count < prefetch_threhold) {
|
||||
if constexpr (is_no_prefetch_map<HashMap>) {
|
||||
this->template compute_agg_noprefetch<Func, HTBuildOp>(column, agg_states,
|
||||
std::forward<Func>(allocate_func), extra);
|
||||
} else if (this->hash_map.bucket_count() < prefetch_threhold) {
|
||||
this->template compute_agg_noprefetch<Func, HTBuildOp>(column, agg_states,
|
||||
std::forward<Func>(allocate_func), extra);
|
||||
} else {
|
||||
|
|
@ -1091,4 +1090,151 @@ struct AggHashMapWithSerializedKeyFixedSize
|
|||
int32_t _chunk_size;
|
||||
};
|
||||
|
||||
template <typename HashMap>
|
||||
struct AggHashMapWithCompressedKeyFixedSize
|
||||
: public AggHashMapWithKey<HashMap, AggHashMapWithCompressedKeyFixedSize<HashMap>> {
|
||||
using Self = AggHashMapWithCompressedKeyFixedSize<HashMap>;
|
||||
using Base = AggHashMapWithKey<HashMap, AggHashMapWithCompressedKeyFixedSize<HashMap>>;
|
||||
using KeyType = typename HashMap::key_type;
|
||||
using Iterator = typename HashMap::iterator;
|
||||
using FixedSizeSliceKey = typename HashMap::key_type;
|
||||
using ResultVector = typename std::vector<FixedSizeSliceKey>;
|
||||
|
||||
template <class... Args>
|
||||
AggHashMapWithCompressedKeyFixedSize(int chunk_size, Args&&... args)
|
||||
: Base(chunk_size, std::forward<Args>(args)...),
|
||||
mem_pool(std::make_unique<MemPool>()),
|
||||
_chunk_size(chunk_size) {
|
||||
fixed_keys.reserve(chunk_size);
|
||||
}
|
||||
|
||||
AggDataPtr get_null_key_data() { return nullptr; }
|
||||
void set_null_key_data(AggDataPtr data) {}
|
||||
|
||||
template <AllocFunc<Self> Func, typename HTBuildOp>
|
||||
ALWAYS_NOINLINE void compute_agg_noprefetch(size_t chunk_size, const Columns& key_columns, MemPool* pool,
|
||||
Func&& allocate_func, Buffer<AggDataPtr>* agg_states,
|
||||
ExtraAggParam* extra) {
|
||||
[[maybe_unused]] size_t hash_table_size = this->hash_map.size();
|
||||
auto* __restrict not_founds = extra->not_founds;
|
||||
// serialize
|
||||
bitcompress_serialize(key_columns, bases, offsets, chunk_size, sizeof(FixedSizeSliceKey), fixed_keys.data());
|
||||
|
||||
for (size_t i = 0; i < chunk_size; ++i) {
|
||||
if constexpr (HTBuildOp::process_limit) {
|
||||
if (hash_table_size < extra->limits) {
|
||||
_emplace_key(fixed_keys[i], (*agg_states)[i], allocate_func, [&] { hash_table_size++; });
|
||||
} else {
|
||||
_find_key((*agg_states)[i], (*not_founds)[i], fixed_keys[i]);
|
||||
}
|
||||
} else if constexpr (HTBuildOp::allocate) {
|
||||
_emplace_key(fixed_keys[i], (*agg_states)[i], allocate_func,
|
||||
FillNotFounds<HTBuildOp::fill_not_found>(not_founds, i));
|
||||
} else if constexpr (HTBuildOp::fill_not_found) {
|
||||
_find_key((*agg_states)[i], (*not_founds)[i], fixed_keys[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <AllocFunc<Self> Func, typename HTBuildOp>
|
||||
ALWAYS_NOINLINE void compute_agg_prefetch(size_t chunk_size, const Columns& key_columns, MemPool* pool,
|
||||
Func&& allocate_func, Buffer<AggDataPtr>* agg_states,
|
||||
ExtraAggParam* extra) {
|
||||
[[maybe_unused]] size_t hash_table_size = this->hash_map.size();
|
||||
auto* __restrict not_founds = extra->not_founds;
|
||||
// serialize
|
||||
bitcompress_serialize(key_columns, bases, offsets, chunk_size, sizeof(FixedSizeSliceKey), fixed_keys.data());
|
||||
|
||||
hashs.reserve(chunk_size);
|
||||
for (size_t i = 0; i < chunk_size; ++i) {
|
||||
hashs[i] = this->hash_map.hash_function()(fixed_keys[i]);
|
||||
}
|
||||
|
||||
size_t prefetch_index = AGG_HASH_MAP_DEFAULT_PREFETCH_DIST;
|
||||
for (size_t i = 0; i < chunk_size; ++i) {
|
||||
if (prefetch_index < chunk_size) {
|
||||
this->hash_map.prefetch_hash(hashs[prefetch_index++]);
|
||||
}
|
||||
if constexpr (HTBuildOp::process_limit) {
|
||||
if (hash_table_size < extra->limits) {
|
||||
_emplace_key_with_hash(fixed_keys[i], hashs[i], (*agg_states)[i], allocate_func,
|
||||
[&] { hash_table_size++; });
|
||||
} else {
|
||||
_find_key((*agg_states)[i], (*not_founds)[i], fixed_keys[i]);
|
||||
}
|
||||
} else if constexpr (HTBuildOp::allocate) {
|
||||
_emplace_key_with_hash(fixed_keys[i], hashs[i], (*agg_states)[i], allocate_func,
|
||||
FillNotFounds<HTBuildOp::fill_not_found>(not_founds, i));
|
||||
} else if constexpr (HTBuildOp::fill_not_found) {
|
||||
_find_key((*agg_states)[i], (*not_founds)[i], fixed_keys[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <AllocFunc<Self> Func, typename HTBuildOp>
|
||||
void compute_agg_states(size_t chunk_size, const Columns& key_columns, MemPool* pool, Func&& allocate_func,
|
||||
Buffer<AggDataPtr>* agg_states, ExtraAggParam* extra) {
|
||||
auto* buffer = reinterpret_cast<uint8_t*>(fixed_keys.data());
|
||||
memset(buffer, 0x0, sizeof(FixedSizeSliceKey) * chunk_size);
|
||||
|
||||
if constexpr (is_no_prefetch_map<HashMap>) {
|
||||
this->template compute_agg_noprefetch<Func, HTBuildOp>(
|
||||
chunk_size, key_columns, pool, std::forward<Func>(allocate_func), agg_states, extra);
|
||||
} else if (this->hash_map.bucket_count() < prefetch_threhold) {
|
||||
this->template compute_agg_noprefetch<Func, HTBuildOp>(
|
||||
chunk_size, key_columns, pool, std::forward<Func>(allocate_func), agg_states, extra);
|
||||
} else {
|
||||
this->template compute_agg_prefetch<Func, HTBuildOp>(chunk_size, key_columns, pool,
|
||||
std::forward<Func>(allocate_func), agg_states, extra);
|
||||
}
|
||||
}
|
||||
|
||||
template <AllocFunc<Self> Func, typename EmplaceCallBack>
|
||||
ALWAYS_INLINE void _emplace_key(KeyType key, AggDataPtr& target_state, Func&& allocate_func,
|
||||
EmplaceCallBack&& callback) {
|
||||
auto iter = this->hash_map.lazy_emplace(key, [&](const auto& ctor) {
|
||||
callback();
|
||||
AggDataPtr pv = allocate_func(key);
|
||||
ctor(key, pv);
|
||||
});
|
||||
target_state = iter->second;
|
||||
}
|
||||
|
||||
template <AllocFunc<Self> Func, typename EmplaceCallBack>
|
||||
ALWAYS_INLINE void _emplace_key_with_hash(KeyType key, size_t hash, AggDataPtr& target_state, Func&& allocate_func,
|
||||
EmplaceCallBack&& callback) {
|
||||
auto iter = this->hash_map.lazy_emplace_with_hash(key, hash, [&](const auto& ctor) {
|
||||
callback();
|
||||
AggDataPtr pv = allocate_func(key);
|
||||
ctor(key, pv);
|
||||
});
|
||||
target_state = iter->second;
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
ALWAYS_INLINE void _find_key(AggDataPtr& target_state, uint8_t& not_found, Args&&... args) {
|
||||
if (auto iter = this->hash_map.find(std::forward<Args>(args)...); iter != this->hash_map.end()) {
|
||||
target_state = iter->second;
|
||||
} else {
|
||||
not_found = 1;
|
||||
}
|
||||
}
|
||||
|
||||
void insert_keys_to_columns(ResultVector& keys, Columns& key_columns, int32_t chunk_size) {
|
||||
bitcompress_deserialize(key_columns, bases, offsets, used_bits, chunk_size, sizeof(FixedSizeSliceKey),
|
||||
keys.data());
|
||||
}
|
||||
|
||||
static constexpr bool has_single_null_key = false;
|
||||
|
||||
std::vector<int> used_bits;
|
||||
std::vector<int> offsets;
|
||||
std::vector<std::any> bases;
|
||||
std::vector<FixedSizeSliceKey> fixed_keys;
|
||||
std::vector<size_t> hashs;
|
||||
std::unique_ptr<MemPool> mem_pool;
|
||||
ResultVector results;
|
||||
int32_t _chunk_size;
|
||||
};
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
|
|||
|
|
@ -14,19 +14,17 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <any>
|
||||
|
||||
#include "column/column_hash.h"
|
||||
#include "column/column_helper.h"
|
||||
#include "column/hash_set.h"
|
||||
#include "column/type_traits.h"
|
||||
#include "column/vectorized_fwd.h"
|
||||
#include "exec/aggregate/agg_profile.h"
|
||||
#include "gutil/casts.h"
|
||||
#include "runtime/mem_pool.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "util/fixed_hash_map.h"
|
||||
#include "util/hash_util.hpp"
|
||||
#include "util/phmap/phmap.h"
|
||||
#include "util/runtime_profile.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
|
|
@ -111,14 +109,6 @@ struct AggHashSet {
|
|||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct no_prefetch_set : std::false_type {};
|
||||
template <PhmapSeed seed>
|
||||
struct no_prefetch_set<Int8AggHashSet<seed>> : std::true_type {};
|
||||
|
||||
template <class T>
|
||||
constexpr bool is_no_prefetch_set = no_prefetch_set<T>::value;
|
||||
|
||||
// handle one number hash key
|
||||
template <LogicalType logical_type, typename HashSet>
|
||||
struct AggHashSetOfOneNumberKey : public AggHashSet<HashSet, AggHashSetOfOneNumberKey<logical_type, HashSet>> {
|
||||
|
|
@ -147,12 +137,10 @@ struct AggHashSetOfOneNumberKey : public AggHashSet<HashSet, AggHashSetOfOneNumb
|
|||
|
||||
if constexpr (is_no_prefetch_set<HashSet>) {
|
||||
this->template build_set_noprefetch<compute_and_allocate>(chunk_size, key_columns, pool, not_founds);
|
||||
} else if (this->hash_set.bucket_count() < prefetch_threhold) {
|
||||
this->template build_set_noprefetch<compute_and_allocate>(chunk_size, key_columns, pool, not_founds);
|
||||
} else {
|
||||
if (this->hash_set.bucket_count() < prefetch_threhold) {
|
||||
this->template build_set_noprefetch<compute_and_allocate>(chunk_size, key_columns, pool, not_founds);
|
||||
} else {
|
||||
this->template build_set_prefetch<compute_and_allocate>(chunk_size, key_columns, pool, not_founds);
|
||||
}
|
||||
this->template build_set_prefetch<compute_and_allocate>(chunk_size, key_columns, pool, not_founds);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -754,10 +742,94 @@ struct AggHashSetOfSerializedKeyFixedSize : public AggHashSet<HashSet, AggHashSe
|
|||
uint8_t* buffer;
|
||||
ResultVector results;
|
||||
Buffer<Slice> tmp_slices;
|
||||
// std::vector<Slice> tmp_slices;
|
||||
|
||||
int32_t _chunk_size;
|
||||
std::vector<size_t> hashes;
|
||||
};
|
||||
|
||||
template <typename HashSet>
|
||||
struct AggHashSetCompressedFixedSize : public AggHashSet<HashSet, AggHashSetCompressedFixedSize<HashSet>> {
|
||||
using Base = AggHashSet<HashSet, AggHashSetCompressedFixedSize<HashSet>>;
|
||||
using Iterator = typename HashSet::iterator;
|
||||
using KeyType = typename HashSet::key_type;
|
||||
using FixedSizeSliceKey = typename HashSet::key_type;
|
||||
using ResultVector = typename std::vector<FixedSizeSliceKey>;
|
||||
|
||||
bool has_null_column = false;
|
||||
static constexpr size_t max_fixed_size = sizeof(FixedSizeSliceKey);
|
||||
|
||||
template <class... Args>
|
||||
AggHashSetCompressedFixedSize(int32_t chunk_size, Args&&... args)
|
||||
: Base(chunk_size, std::forward<Args>(args)...), _chunk_size(chunk_size) {
|
||||
fixed_keys.reserve(chunk_size);
|
||||
}
|
||||
|
||||
// When compute_and_allocate=false:
|
||||
// Elements queried in HashSet will be added to HashSet
|
||||
// elements that cannot be queried are not processed,
|
||||
// and are mainly used in the first stage of two-stage aggregation when aggr reduction is low
|
||||
template <bool compute_and_allocate>
|
||||
void build_set(size_t chunk_size, const Columns& key_columns, MemPool* pool, Filter* not_founds) {
|
||||
if constexpr (!compute_and_allocate) {
|
||||
DCHECK(not_founds);
|
||||
not_founds->assign(chunk_size, 0);
|
||||
}
|
||||
|
||||
auto* buffer = reinterpret_cast<uint8_t*>(fixed_keys.data());
|
||||
memset(buffer, 0x0, sizeof(FixedSizeSliceKey) * chunk_size);
|
||||
bitcompress_serialize(key_columns, bases, offsets, chunk_size, sizeof(FixedSizeSliceKey), fixed_keys.data());
|
||||
|
||||
if constexpr (is_no_prefetch_set<HashSet>) {
|
||||
this->template build_set_noprefetch<compute_and_allocate>(chunk_size, pool, not_founds);
|
||||
} else if (this->hash_set.bucket_count() < prefetch_threhold) {
|
||||
this->template build_set_noprefetch<compute_and_allocate>(chunk_size, pool, not_founds);
|
||||
} else {
|
||||
this->template build_set_prefetch<compute_and_allocate>(chunk_size, pool, not_founds);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool compute_and_allocate>
|
||||
ALWAYS_NOINLINE void build_set_prefetch(size_t chunk_size, MemPool* pool, Filter* not_founds) {
|
||||
auto* keys = reinterpret_cast<FixedSizeSliceKey*>(fixed_keys.data());
|
||||
AGG_HASH_SET_PRECOMPUTE_HASH_VALS();
|
||||
|
||||
for (size_t i = 0; i < chunk_size; ++i) {
|
||||
AGG_HASH_SET_PREFETCH_HASH_VAL();
|
||||
if constexpr (compute_and_allocate) {
|
||||
this->hash_set.emplace_with_hash(hashes[i], keys[i]);
|
||||
} else {
|
||||
(*not_founds)[i] = this->hash_set.find(keys[i], hashes[i]) == this->hash_set.end();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <bool compute_and_allocate>
|
||||
ALWAYS_NOINLINE void build_set_noprefetch(size_t chunk_size, MemPool* pool, Filter* not_founds) {
|
||||
for (size_t i = 0; i < chunk_size; ++i) {
|
||||
if constexpr (compute_and_allocate) {
|
||||
this->hash_set.insert(fixed_keys[i]);
|
||||
} else {
|
||||
(*not_founds)[i] = !this->hash_set.contains(fixed_keys[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void insert_keys_to_columns(ResultVector& keys, Columns& key_columns, int32_t chunk_size) {
|
||||
bitcompress_deserialize(key_columns, bases, offsets, used_bits, chunk_size, sizeof(FixedSizeSliceKey),
|
||||
keys.data());
|
||||
}
|
||||
|
||||
static constexpr bool has_single_null_key = false;
|
||||
bool has_null_key = false;
|
||||
|
||||
std::vector<int> used_bits;
|
||||
std::vector<int> offsets;
|
||||
std::vector<std::any> bases;
|
||||
std::vector<FixedSizeSliceKey> fixed_keys;
|
||||
std::vector<size_t> hashes;
|
||||
ResultVector results;
|
||||
|
||||
int32_t _chunk_size;
|
||||
};
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
|
|||
|
|
@ -15,11 +15,91 @@
|
|||
#include "exec/aggregate/agg_hash_variant.h"
|
||||
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <variant>
|
||||
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "util/phmap/phmap.h"
|
||||
|
||||
#define APPLY_FOR_AGG_VARIANT_ALL(M) \
|
||||
M(phase1_uint8) \
|
||||
M(phase1_int8) \
|
||||
M(phase1_int16) \
|
||||
M(phase1_int32) \
|
||||
M(phase1_int64) \
|
||||
M(phase1_int128) \
|
||||
M(phase1_decimal32) \
|
||||
M(phase1_decimal64) \
|
||||
M(phase1_decimal128) \
|
||||
M(phase1_decimal256) \
|
||||
M(phase1_date) \
|
||||
M(phase1_timestamp) \
|
||||
M(phase1_string) \
|
||||
M(phase1_slice) \
|
||||
M(phase1_null_uint8) \
|
||||
M(phase1_null_int8) \
|
||||
M(phase1_null_int16) \
|
||||
M(phase1_null_int32) \
|
||||
M(phase1_null_int64) \
|
||||
M(phase1_null_int128) \
|
||||
M(phase1_null_decimal32) \
|
||||
M(phase1_null_decimal64) \
|
||||
M(phase1_null_decimal128) \
|
||||
M(phase1_null_decimal256) \
|
||||
M(phase1_null_date) \
|
||||
M(phase1_null_timestamp) \
|
||||
M(phase1_null_string) \
|
||||
M(phase1_slice_two_level) \
|
||||
M(phase1_int32_two_level) \
|
||||
M(phase1_null_string_two_level) \
|
||||
M(phase1_string_two_level) \
|
||||
\
|
||||
M(phase2_uint8) \
|
||||
M(phase2_int8) \
|
||||
M(phase2_int16) \
|
||||
M(phase2_int32) \
|
||||
M(phase2_int64) \
|
||||
M(phase2_int128) \
|
||||
M(phase2_decimal32) \
|
||||
M(phase2_decimal64) \
|
||||
M(phase2_decimal128) \
|
||||
M(phase2_decimal256) \
|
||||
M(phase2_date) \
|
||||
M(phase2_timestamp) \
|
||||
M(phase2_string) \
|
||||
M(phase2_slice) \
|
||||
M(phase2_null_uint8) \
|
||||
M(phase2_null_int8) \
|
||||
M(phase2_null_int16) \
|
||||
M(phase2_null_int32) \
|
||||
M(phase2_null_int64) \
|
||||
M(phase2_null_int128) \
|
||||
M(phase2_null_decimal32) \
|
||||
M(phase2_null_decimal64) \
|
||||
M(phase2_null_decimal128) \
|
||||
M(phase2_null_decimal256) \
|
||||
M(phase2_null_date) \
|
||||
M(phase2_null_timestamp) \
|
||||
M(phase2_null_string) \
|
||||
M(phase2_slice_two_level) \
|
||||
M(phase2_int32_two_level) \
|
||||
M(phase2_null_string_two_level) \
|
||||
M(phase2_string_two_level) \
|
||||
\
|
||||
M(phase1_slice_fx4) \
|
||||
M(phase1_slice_fx8) \
|
||||
M(phase1_slice_fx16) \
|
||||
M(phase2_slice_fx4) \
|
||||
M(phase2_slice_fx8) \
|
||||
M(phase2_slice_fx16) \
|
||||
M(phase1_slice_cx1) \
|
||||
M(phase1_slice_cx4) \
|
||||
M(phase1_slice_cx8) \
|
||||
M(phase1_slice_cx16) \
|
||||
M(phase2_slice_cx1) \
|
||||
M(phase2_slice_cx4) \
|
||||
M(phase2_slice_cx8) \
|
||||
M(phase2_slice_cx16)
|
||||
|
||||
namespace starrocks {
|
||||
namespace detail {
|
||||
template <AggHashMapVariant::Type>
|
||||
|
|
@ -65,6 +145,10 @@ DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase1_string_two_level, OneStringTwoLe
|
|||
DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase1_slice_fx4, SerializedKeyFixedSize4AggHashMap<PhmapSeed1>);
|
||||
DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase1_slice_fx8, SerializedKeyFixedSize8AggHashMap<PhmapSeed1>);
|
||||
DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase1_slice_fx16, SerializedKeyFixedSize16AggHashMap<PhmapSeed1>);
|
||||
DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase1_slice_cx1, CompressedFixedSize1AggHashMap<PhmapSeed1>);
|
||||
DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase1_slice_cx4, CompressedFixedSize4AggHashMap<PhmapSeed1>);
|
||||
DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase1_slice_cx8, CompressedFixedSize8AggHashMap<PhmapSeed1>);
|
||||
DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase1_slice_cx16, CompressedFixedSize16AggHashMap<PhmapSeed1>);
|
||||
DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase2_uint8, UInt8AggHashMapWithOneNumberKey<PhmapSeed2>);
|
||||
DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase2_int8, Int8AggHashMapWithOneNumberKey<PhmapSeed2>);
|
||||
DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase2_int16, Int16AggHashMapWithOneNumberKey<PhmapSeed2>);
|
||||
|
|
@ -99,6 +183,10 @@ DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase2_string_two_level, OneStringTwoLe
|
|||
DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase2_slice_fx4, SerializedKeyFixedSize4AggHashMap<PhmapSeed2>);
|
||||
DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase2_slice_fx8, SerializedKeyFixedSize8AggHashMap<PhmapSeed2>);
|
||||
DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase2_slice_fx16, SerializedKeyFixedSize16AggHashMap<PhmapSeed2>);
|
||||
DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase2_slice_cx1, CompressedFixedSize1AggHashMap<PhmapSeed2>);
|
||||
DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase2_slice_cx4, CompressedFixedSize4AggHashMap<PhmapSeed2>);
|
||||
DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase2_slice_cx8, CompressedFixedSize8AggHashMap<PhmapSeed2>);
|
||||
DEFINE_MAP_TYPE(AggHashMapVariant::Type::phase2_slice_cx16, CompressedFixedSize16AggHashMap<PhmapSeed2>);
|
||||
|
||||
template <AggHashSetVariant::Type>
|
||||
struct AggHashSetVariantTypeTraits;
|
||||
|
|
@ -180,6 +268,15 @@ DEFINE_SET_TYPE(AggHashSetVariant::Type::phase2_slice_fx4, SerializedKeyAggHashS
|
|||
DEFINE_SET_TYPE(AggHashSetVariant::Type::phase2_slice_fx8, SerializedKeyAggHashSetFixedSize8<PhmapSeed2>);
|
||||
DEFINE_SET_TYPE(AggHashSetVariant::Type::phase2_slice_fx16, SerializedKeyAggHashSetFixedSize16<PhmapSeed2>);
|
||||
|
||||
DEFINE_SET_TYPE(AggHashSetVariant::Type::phase1_slice_cx1, CompressedAggHashSetFixedSize1<PhmapSeed1>);
|
||||
DEFINE_SET_TYPE(AggHashSetVariant::Type::phase1_slice_cx4, CompressedAggHashSetFixedSize4<PhmapSeed1>);
|
||||
DEFINE_SET_TYPE(AggHashSetVariant::Type::phase1_slice_cx8, CompressedAggHashSetFixedSize8<PhmapSeed1>);
|
||||
DEFINE_SET_TYPE(AggHashSetVariant::Type::phase1_slice_cx16, CompressedAggHashSetFixedSize16<PhmapSeed1>);
|
||||
DEFINE_SET_TYPE(AggHashSetVariant::Type::phase2_slice_cx1, CompressedAggHashSetFixedSize1<PhmapSeed2>);
|
||||
DEFINE_SET_TYPE(AggHashSetVariant::Type::phase2_slice_cx4, CompressedAggHashSetFixedSize4<PhmapSeed2>);
|
||||
DEFINE_SET_TYPE(AggHashSetVariant::Type::phase2_slice_cx8, CompressedAggHashSetFixedSize8<PhmapSeed2>);
|
||||
DEFINE_SET_TYPE(AggHashSetVariant::Type::phase2_slice_cx16, CompressedAggHashSetFixedSize16<PhmapSeed2>);
|
||||
|
||||
} // namespace detail
|
||||
void AggHashMapVariant::init(RuntimeState* state, Type type, AggStatistics* agg_stat) {
|
||||
_type = type;
|
||||
|
|
|
|||
|
|
@ -17,93 +17,15 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
|
||||
#include "column/hash_set.h"
|
||||
#include "exec/aggregate/agg_hash_map.h"
|
||||
#include "exec/aggregate/agg_hash_set.h"
|
||||
#include "exec/aggregate/agg_profile.h"
|
||||
#include "types/logical_type.h"
|
||||
#include "util/phmap/phmap.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
enum AggrPhase { AggrPhase1, AggrPhase2 };
|
||||
|
||||
#define APPLY_FOR_AGG_VARIANT_ALL(M) \
|
||||
M(phase1_uint8) \
|
||||
M(phase1_int8) \
|
||||
M(phase1_int16) \
|
||||
M(phase1_int32) \
|
||||
M(phase1_int64) \
|
||||
M(phase1_int128) \
|
||||
M(phase1_decimal32) \
|
||||
M(phase1_decimal64) \
|
||||
M(phase1_decimal128) \
|
||||
M(phase1_decimal256) \
|
||||
M(phase1_date) \
|
||||
M(phase1_timestamp) \
|
||||
M(phase1_string) \
|
||||
M(phase1_slice) \
|
||||
M(phase1_null_uint8) \
|
||||
M(phase1_null_int8) \
|
||||
M(phase1_null_int16) \
|
||||
M(phase1_null_int32) \
|
||||
M(phase1_null_int64) \
|
||||
M(phase1_null_int128) \
|
||||
M(phase1_null_decimal32) \
|
||||
M(phase1_null_decimal64) \
|
||||
M(phase1_null_decimal128) \
|
||||
M(phase1_null_decimal256) \
|
||||
M(phase1_null_date) \
|
||||
M(phase1_null_timestamp) \
|
||||
M(phase1_null_string) \
|
||||
M(phase1_slice_two_level) \
|
||||
M(phase1_int32_two_level) \
|
||||
M(phase1_null_string_two_level) \
|
||||
M(phase1_string_two_level) \
|
||||
\
|
||||
M(phase2_uint8) \
|
||||
M(phase2_int8) \
|
||||
M(phase2_int16) \
|
||||
M(phase2_int32) \
|
||||
M(phase2_int64) \
|
||||
M(phase2_int128) \
|
||||
M(phase2_decimal32) \
|
||||
M(phase2_decimal64) \
|
||||
M(phase2_decimal128) \
|
||||
M(phase2_decimal256) \
|
||||
M(phase2_date) \
|
||||
M(phase2_timestamp) \
|
||||
M(phase2_string) \
|
||||
M(phase2_slice) \
|
||||
M(phase2_null_uint8) \
|
||||
M(phase2_null_int8) \
|
||||
M(phase2_null_int16) \
|
||||
M(phase2_null_int32) \
|
||||
M(phase2_null_int64) \
|
||||
M(phase2_null_int128) \
|
||||
M(phase2_null_decimal32) \
|
||||
M(phase2_null_decimal64) \
|
||||
M(phase2_null_decimal128) \
|
||||
M(phase2_null_decimal256) \
|
||||
M(phase2_null_date) \
|
||||
M(phase2_null_timestamp) \
|
||||
M(phase2_null_string) \
|
||||
M(phase2_slice_two_level) \
|
||||
M(phase2_int32_two_level) \
|
||||
M(phase2_null_string_two_level) \
|
||||
M(phase2_string_two_level) \
|
||||
\
|
||||
M(phase1_slice_fx4) \
|
||||
M(phase1_slice_fx8) \
|
||||
M(phase1_slice_fx16) \
|
||||
M(phase2_slice_fx4) \
|
||||
M(phase2_slice_fx8) \
|
||||
M(phase2_slice_fx16)
|
||||
|
||||
// Aggregate Hash maps
|
||||
|
||||
// no-nullable single key maps:
|
||||
|
|
@ -187,6 +109,16 @@ using SerializedKeyFixedSize8AggHashMap = AggHashMapWithSerializedKeyFixedSize<F
|
|||
template <PhmapSeed seed>
|
||||
using SerializedKeyFixedSize16AggHashMap = AggHashMapWithSerializedKeyFixedSize<FixedSize16SliceAggHashMap<seed>>;
|
||||
|
||||
// fixed compress key
|
||||
template <PhmapSeed seed>
|
||||
using CompressedFixedSize1AggHashMap = AggHashMapWithCompressedKeyFixedSize<Int8AggHashMap<seed>>;
|
||||
template <PhmapSeed seed>
|
||||
using CompressedFixedSize4AggHashMap = AggHashMapWithCompressedKeyFixedSize<Int32AggHashMap<seed>>;
|
||||
template <PhmapSeed seed>
|
||||
using CompressedFixedSize8AggHashMap = AggHashMapWithCompressedKeyFixedSize<Int64AggHashMap<seed>>;
|
||||
template <PhmapSeed seed>
|
||||
using CompressedFixedSize16AggHashMap = AggHashMapWithCompressedKeyFixedSize<Int128AggHashMap<seed>>;
|
||||
|
||||
// Hash sets
|
||||
//
|
||||
template <PhmapSeed seed>
|
||||
|
|
@ -270,6 +202,15 @@ using SerializedKeyAggHashSetFixedSize8 = AggHashSetOfSerializedKeyFixedSize<Fix
|
|||
template <PhmapSeed seed>
|
||||
using SerializedKeyAggHashSetFixedSize16 = AggHashSetOfSerializedKeyFixedSize<FixedSize16SliceAggHashSet<seed>>;
|
||||
|
||||
template <PhmapSeed seed>
|
||||
using CompressedAggHashSetFixedSize1 = AggHashSetCompressedFixedSize<Int8AggHashSet<seed>>;
|
||||
template <PhmapSeed seed>
|
||||
using CompressedAggHashSetFixedSize4 = AggHashSetCompressedFixedSize<Int32AggHashSet<seed>>;
|
||||
template <PhmapSeed seed>
|
||||
using CompressedAggHashSetFixedSize8 = AggHashSetCompressedFixedSize<Int64AggHashSet<seed>>;
|
||||
template <PhmapSeed seed>
|
||||
using CompressedAggHashSetFixedSize16 = AggHashSetCompressedFixedSize<Int128AggHashSet<seed>>;
|
||||
|
||||
// aggregate key
|
||||
template <class HashMapWithKey>
|
||||
struct CombinedFixedSizeKey {
|
||||
|
|
@ -294,6 +235,24 @@ static_assert(!is_combined_fixed_size_key<Int32TwoLevelAggHashSetOfOneNumberKey<
|
|||
static_assert(is_combined_fixed_size_key<SerializedKeyAggHashSetFixedSize4<PhmapSeed1>>);
|
||||
static_assert(!is_combined_fixed_size_key<Int32TwoLevelAggHashMapWithOneNumberKey<PhmapSeed1>>);
|
||||
|
||||
template <class HashMapWithKey>
|
||||
struct CompressedFixedSizeKey {
|
||||
static auto constexpr value = false;
|
||||
};
|
||||
|
||||
template <typename HashMap>
|
||||
struct CompressedFixedSizeKey<AggHashMapWithCompressedKeyFixedSize<HashMap>> {
|
||||
static auto constexpr value = true;
|
||||
};
|
||||
|
||||
template <typename HashSet>
|
||||
struct CompressedFixedSizeKey<AggHashSetCompressedFixedSize<HashSet>> {
|
||||
static auto constexpr value = true;
|
||||
};
|
||||
|
||||
template <typename HashMapOrSetWithKey>
|
||||
inline constexpr bool is_compressed_fixed_size_key = CompressedFixedSizeKey<HashMapOrSetWithKey>::value;
|
||||
|
||||
// 1) For different group by columns type, size, cardinality, volume, we should choose different
|
||||
// hash functions and different hashmaps.
|
||||
// When runtime, we will only have one hashmap.
|
||||
|
|
@ -341,6 +300,10 @@ using AggHashMapWithKeyPtr = std::variant<
|
|||
std::unique_ptr<SerializedKeyFixedSize4AggHashMap<PhmapSeed1>>,
|
||||
std::unique_ptr<SerializedKeyFixedSize8AggHashMap<PhmapSeed1>>,
|
||||
std::unique_ptr<SerializedKeyFixedSize16AggHashMap<PhmapSeed1>>,
|
||||
std::unique_ptr<CompressedFixedSize1AggHashMap<PhmapSeed1>>,
|
||||
std::unique_ptr<CompressedFixedSize4AggHashMap<PhmapSeed1>>,
|
||||
std::unique_ptr<CompressedFixedSize8AggHashMap<PhmapSeed1>>,
|
||||
std::unique_ptr<CompressedFixedSize16AggHashMap<PhmapSeed1>>,
|
||||
std::unique_ptr<UInt8AggHashMapWithOneNumberKey<PhmapSeed2>>,
|
||||
std::unique_ptr<Int8AggHashMapWithOneNumberKey<PhmapSeed2>>,
|
||||
std::unique_ptr<Int16AggHashMapWithOneNumberKey<PhmapSeed2>>,
|
||||
|
|
@ -373,7 +336,11 @@ using AggHashMapWithKeyPtr = std::variant<
|
|||
std::unique_ptr<NullOneStringTwoLevelAggHashMap<PhmapSeed2>>,
|
||||
std::unique_ptr<SerializedKeyFixedSize4AggHashMap<PhmapSeed2>>,
|
||||
std::unique_ptr<SerializedKeyFixedSize8AggHashMap<PhmapSeed2>>,
|
||||
std::unique_ptr<SerializedKeyFixedSize16AggHashMap<PhmapSeed2>>>;
|
||||
std::unique_ptr<SerializedKeyFixedSize16AggHashMap<PhmapSeed2>>,
|
||||
std::unique_ptr<CompressedFixedSize1AggHashMap<PhmapSeed2>>,
|
||||
std::unique_ptr<CompressedFixedSize4AggHashMap<PhmapSeed2>>,
|
||||
std::unique_ptr<CompressedFixedSize8AggHashMap<PhmapSeed2>>,
|
||||
std::unique_ptr<CompressedFixedSize16AggHashMap<PhmapSeed2>>>;
|
||||
|
||||
using AggHashSetWithKeyPtr = std::variant<
|
||||
std::unique_ptr<UInt8AggHashSetOfOneNumberKey<PhmapSeed1>>,
|
||||
|
|
@ -441,7 +408,16 @@ using AggHashSetWithKeyPtr = std::variant<
|
|||
std::unique_ptr<SerializedKeyAggHashSetFixedSize16<PhmapSeed1>>,
|
||||
std::unique_ptr<SerializedKeyAggHashSetFixedSize4<PhmapSeed2>>,
|
||||
std::unique_ptr<SerializedKeyAggHashSetFixedSize8<PhmapSeed2>>,
|
||||
std::unique_ptr<SerializedKeyAggHashSetFixedSize16<PhmapSeed2>>>;
|
||||
std::unique_ptr<SerializedKeyAggHashSetFixedSize16<PhmapSeed2>>,
|
||||
|
||||
std::unique_ptr<CompressedAggHashSetFixedSize1<PhmapSeed1>>,
|
||||
std::unique_ptr<CompressedAggHashSetFixedSize4<PhmapSeed1>>,
|
||||
std::unique_ptr<CompressedAggHashSetFixedSize8<PhmapSeed1>>,
|
||||
std::unique_ptr<CompressedAggHashSetFixedSize16<PhmapSeed1>>,
|
||||
std::unique_ptr<CompressedAggHashSetFixedSize1<PhmapSeed2>>,
|
||||
std::unique_ptr<CompressedAggHashSetFixedSize4<PhmapSeed2>>,
|
||||
std::unique_ptr<CompressedAggHashSetFixedSize8<PhmapSeed2>>,
|
||||
std::unique_ptr<CompressedAggHashSetFixedSize16<PhmapSeed2>>>;
|
||||
} // namespace detail
|
||||
struct AggHashMapVariant {
|
||||
enum class Type {
|
||||
|
|
@ -481,6 +457,11 @@ struct AggHashMapVariant {
|
|||
phase1_slice_fx8,
|
||||
phase1_slice_fx16,
|
||||
|
||||
phase1_slice_cx1,
|
||||
phase1_slice_cx4,
|
||||
phase1_slice_cx8,
|
||||
phase1_slice_cx16,
|
||||
|
||||
phase2_uint8,
|
||||
phase2_int8,
|
||||
phase2_int16,
|
||||
|
|
@ -517,6 +498,10 @@ struct AggHashMapVariant {
|
|||
phase2_slice_fx8,
|
||||
phase2_slice_fx16,
|
||||
|
||||
phase2_slice_cx1,
|
||||
phase2_slice_cx4,
|
||||
phase2_slice_cx8,
|
||||
phase2_slice_cx16,
|
||||
};
|
||||
|
||||
detail::AggHashMapWithKeyPtr hash_map_with_key;
|
||||
|
|
@ -630,6 +615,14 @@ struct AggHashSetVariant {
|
|||
phase2_slice_fx8,
|
||||
phase2_slice_fx16,
|
||||
|
||||
phase1_slice_cx1,
|
||||
phase1_slice_cx4,
|
||||
phase1_slice_cx8,
|
||||
phase1_slice_cx16,
|
||||
phase2_slice_cx1,
|
||||
phase2_slice_cx4,
|
||||
phase2_slice_cx8,
|
||||
phase2_slice_cx16,
|
||||
};
|
||||
|
||||
detail::AggHashSetWithKeyPtr hash_set_with_key;
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
#include "exec/aggregate/aggregate_base_node.h"
|
||||
|
||||
#include "gutil/strings/substitute.h"
|
||||
#include "exec/aggregator.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
|
|
|
|||
|
|
@ -14,9 +14,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <any>
|
||||
|
||||
#include "exec/aggregator.h"
|
||||
#include "exec/aggregator_fwd.h"
|
||||
#include "exec/exec_node.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
|
|
|||
|
|
@ -16,13 +16,10 @@
|
|||
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <variant>
|
||||
|
||||
#include "exec/aggregator.h"
|
||||
#include "exec/pipeline/aggregate/aggregate_blocking_sink_operator.h"
|
||||
#include "exec/pipeline/aggregate/aggregate_blocking_source_operator.h"
|
||||
#include "exec/pipeline/aggregate/aggregate_streaming_sink_operator.h"
|
||||
#include "exec/pipeline/aggregate/aggregate_streaming_source_operator.h"
|
||||
#include "exec/pipeline/aggregate/sorted_aggregate_streaming_sink_operator.h"
|
||||
#include "exec/pipeline/aggregate/sorted_aggregate_streaming_source_operator.h"
|
||||
#include "exec/pipeline/aggregate/spillable_aggregate_blocking_sink_operator.h"
|
||||
|
|
@ -32,12 +29,8 @@
|
|||
#include "exec/pipeline/chunk_accumulate_operator.h"
|
||||
#include "exec/pipeline/exchange/local_exchange_source_operator.h"
|
||||
#include "exec/pipeline/limit_operator.h"
|
||||
#include "exec/pipeline/noop_sink_operator.h"
|
||||
#include "exec/pipeline/operator.h"
|
||||
#include "exec/pipeline/pipeline_builder.h"
|
||||
#include "exec/pipeline/spill_process_operator.h"
|
||||
#include "exec/sorted_streaming_aggregator.h"
|
||||
#include "gutil/casts.h"
|
||||
#include "runtime/current_thread.h"
|
||||
#include "simd/simd.h"
|
||||
|
||||
|
|
@ -121,8 +114,7 @@ Status AggregateBlockingNode::open(RuntimeState* state) {
|
|||
if (_aggregator->hash_map_variant().size() == 0) {
|
||||
_aggregator->set_ht_eos();
|
||||
}
|
||||
_aggregator->hash_map_variant().visit(
|
||||
[&](auto& hash_map_with_key) { _aggregator->it_hash() = _aggregator->_state_allocator.begin(); });
|
||||
_aggregator->it_hash() = _aggregator->state_allocator().begin();
|
||||
} else if (_aggregator->is_none_group_by_exprs()) {
|
||||
// for aggregate no group by, if _num_input_rows is 0,
|
||||
// In update phase, we directly return empty chunk.
|
||||
|
|
|
|||
|
|
@ -204,7 +204,7 @@ Status AggregateStreamingNode::get_next(RuntimeState* state, ChunkPtr* chunk, bo
|
|||
|
||||
Status AggregateStreamingNode::_output_chunk_from_hash_map(ChunkPtr* chunk) {
|
||||
if (!_aggregator->it_hash().has_value()) {
|
||||
_aggregator->it_hash() = _aggregator->_state_allocator.begin();
|
||||
_aggregator->it_hash() = _aggregator->state_allocator().begin();
|
||||
COUNTER_SET(_aggregator->hash_table_size(), (int64_t)_aggregator->hash_map_variant().size());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,296 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <any>
|
||||
#include <optional>
|
||||
|
||||
#include "column/column_helper.h"
|
||||
#include "column/column_visitor_adapter.h"
|
||||
#include "column/decimalv3_column.h"
|
||||
#include "column/nullable_column.h"
|
||||
#include "common/status.h"
|
||||
#include "exprs/literal.h"
|
||||
#include "types/logical_type_infra.h"
|
||||
#include "util/unaligned_access.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
template <size_t N>
|
||||
struct int_type {};
|
||||
|
||||
template <>
|
||||
struct int_type<1> {
|
||||
using type = int8_t;
|
||||
};
|
||||
template <>
|
||||
struct int_type<2> {
|
||||
using type = int16_t;
|
||||
};
|
||||
template <>
|
||||
struct int_type<4> {
|
||||
using type = int32_t;
|
||||
};
|
||||
template <>
|
||||
struct int_type<8> {
|
||||
using type = int64_t;
|
||||
};
|
||||
template <>
|
||||
struct int_type<16> {
|
||||
using type = __int128;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
int leading_zeros(T v) {
|
||||
if (v == 0) return sizeof(T) * 8;
|
||||
typename std::make_unsigned<T>::type uv = v;
|
||||
return __builtin_clzll(static_cast<size_t>(uv)) - (sizeof(size_t) * 8 - sizeof(T) * 8);
|
||||
}
|
||||
|
||||
template <>
|
||||
int leading_zeros<int128_t>(int128_t v) {
|
||||
uint64_t high = (uint64_t)(v >> 64);
|
||||
uint64_t low = (uint64_t)v;
|
||||
|
||||
if (high != 0) {
|
||||
return leading_zeros(high);
|
||||
} else {
|
||||
return 64 + leading_zeros(low);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
int get_used_bits(T min, T max) {
|
||||
using IntType = typename int_type<sizeof(T)>::type;
|
||||
auto vmin = unaligned_load<IntType>(&min);
|
||||
auto vmax = unaligned_load<IntType>(&max);
|
||||
IntType delta = vmax - vmin;
|
||||
return sizeof(T) * 8 - (leading_zeros<IntType>(delta));
|
||||
}
|
||||
|
||||
std::optional<int> get_used_bits(LogicalType ltype, const VectorizedLiteral& begin, const VectorizedLiteral& end,
|
||||
std::any& base) {
|
||||
size_t used_bits = 0;
|
||||
bool applied = scalar_type_dispatch(ltype, [&]<LogicalType Type>() {
|
||||
if constexpr ((lt_is_integer<Type> || lt_is_decimal<Type> ||
|
||||
lt_is_date<Type>)&&(sizeof(RunTimeCppType<Type>) <= 16)) {
|
||||
RunTimeCppType<Type> cs_min = ColumnHelper::get_const_value<Type>(begin.value().get());
|
||||
RunTimeCppType<Type> cs_max = ColumnHelper::get_const_value<Type>(end.value().get());
|
||||
base = cs_min;
|
||||
used_bits = get_used_bits(cs_min, cs_max);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
if (applied) {
|
||||
return used_bits;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
template <class TSrc, class TDst>
|
||||
void bitcompress_serialize(const TSrc* __restrict val, const uint8_t* __restrict nulls, TSrc base, size_t n, int offset,
|
||||
TDst* __restrict dst) {
|
||||
using UTSrc = typename std::make_unsigned<TSrc>::type;
|
||||
if (nulls == nullptr) {
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
TDst v = UTSrc(val[i] - base);
|
||||
dst[i] |= v << offset;
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
TDst v = UTSrc(val[i] - base) & ~(-static_cast<TSrc>(nulls[i]));
|
||||
dst[i] |= TDst(nulls[i]) << offset;
|
||||
dst[i] |= v << (offset + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class Dst>
|
||||
class CompressSerializer : public ColumnVisitorAdapter<CompressSerializer<Dst>> {
|
||||
public:
|
||||
using Base = ColumnVisitorAdapter<CompressSerializer<Dst>>;
|
||||
CompressSerializer(Dst* dst, const std::any& base, int offset)
|
||||
: Base(this), _dst(dst), _base(base), _offset(offset) {}
|
||||
|
||||
Status do_visit(const NullableColumn& column) {
|
||||
_null_data = column.null_column_data().data();
|
||||
return column.data_column()->accept(this);
|
||||
}
|
||||
|
||||
template <typename Column, typename T>
|
||||
void bit_compress(const Column& column) {
|
||||
if constexpr (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8 || sizeof(T) == 16) {
|
||||
using SrcType = typename int_type<sizeof(T)>::type;
|
||||
const auto& container = column.get_data();
|
||||
const auto& raw_data = container.data();
|
||||
size_t n = container.size();
|
||||
auto base = std::any_cast<T>(_base);
|
||||
auto tbase = unaligned_load<SrcType>(&base);
|
||||
bitcompress_serialize((SrcType*)raw_data, _null_data, tbase, n, _offset, _dst);
|
||||
} else {
|
||||
CHECK(false) << "unreachable";
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Status do_visit(const FixedLengthColumn<T>& column) {
|
||||
bit_compress<FixedLengthColumn<T>, T>(column);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Status do_visit(const DecimalV3Column<T>& column) {
|
||||
bit_compress<DecimalV3Column<T>, T>(column);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Status do_visit(const T& column) {
|
||||
CHECK(false) << "unreachable";
|
||||
return Status::NotSupported("unsupported type");
|
||||
}
|
||||
|
||||
private:
|
||||
Dst* _dst;
|
||||
const std::any& _base;
|
||||
int _offset;
|
||||
const uint8_t* _null_data = nullptr;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
T mask(T bits) {
|
||||
if (bits == sizeof(T) * 8) return ~T(0);
|
||||
return (T(1) << bits) - 1;
|
||||
}
|
||||
|
||||
template <class TSrc, class TDst>
|
||||
void bitcompress_deserialize(const TSrc* __restrict src, uint8_t* __restrict nulls, TDst base, int n, int used_bits,
|
||||
int offset, TDst* __restrict dst) {
|
||||
typename std::make_unsigned<TSrc>::type* usrc = (typename std::make_unsigned<TSrc>::type*)src;
|
||||
const uint8_t mask1 = mask<uint8_t>(1);
|
||||
const TSrc mask2 = mask<TSrc>(used_bits - offset - (nulls != nullptr));
|
||||
if (nulls == nullptr) {
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
dst[i] = ((usrc[i] >> (offset)) & mask2) + base;
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
nulls[i] = (usrc[i] >> offset) & mask1;
|
||||
dst[i] = ((usrc[i] >> (offset + 1)) & mask2) + base;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class Src>
|
||||
class CompressDeserializer final : public ColumnVisitorMutableAdapter<CompressDeserializer<Src>> {
|
||||
public:
|
||||
using Base = ColumnVisitorMutableAdapter<CompressDeserializer<Src>>;
|
||||
explicit CompressDeserializer(size_t num_rows, Src* src, const std::any& base, int offset, int used_bits)
|
||||
: Base(this), _num_rows(num_rows), _src(src), _base(base), _offset(offset), _used_bits(used_bits) {}
|
||||
|
||||
Status do_visit(NullableColumn* column) {
|
||||
// TODO: opt me
|
||||
column->null_column_data().resize(_num_rows);
|
||||
_null_data = column->null_column_data().data();
|
||||
RETURN_IF_ERROR(column->data_column()->accept_mutable(this));
|
||||
column->update_has_null();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
template <typename Column, typename T>
|
||||
void bit_decompress(Column* column) {
|
||||
if constexpr (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8 || sizeof(T) == 16) {
|
||||
using DstType = typename int_type<sizeof(T)>::type;
|
||||
column->resize(_num_rows);
|
||||
auto& container = column->get_data();
|
||||
auto* raw_data = container.data();
|
||||
auto base = std::any_cast<T>(_base);
|
||||
auto tbase = unaligned_load<DstType>(&base);
|
||||
bitcompress_deserialize(_src, _null_data, tbase, _num_rows, _used_bits, _offset, (DstType*)raw_data);
|
||||
} else {
|
||||
CHECK(false) << "unreachable";
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Status do_visit(FixedLengthColumn<T>* column) {
|
||||
bit_decompress<FixedLengthColumn<T>, T>(column);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Status do_visit(DecimalV3Column<T>* column) {
|
||||
bit_decompress<DecimalV3Column<T>, T>(column);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Status do_visit(const T& column) {
|
||||
DCHECK(false) << "unreachable";
|
||||
return Status::NotSupported("unsupported type");
|
||||
}
|
||||
|
||||
private:
|
||||
size_t _num_rows;
|
||||
const Src* _src;
|
||||
const std::any& _base;
|
||||
int _offset;
|
||||
int _used_bits;
|
||||
uint8_t* _null_data = nullptr;
|
||||
};
|
||||
|
||||
void bitcompress_serialize(const Columns& columns, const std::vector<std::any>& bases, const std::vector<int>& offsets,
|
||||
size_t num_rows, size_t fixed_key_size, void* buffer) {
|
||||
for (size_t i = 0; i < columns.size(); ++i) {
|
||||
if (fixed_key_size == 1) {
|
||||
CompressSerializer<uint8_t> serializer((uint8_t*)buffer, bases[i], offsets[i]);
|
||||
(void)columns[i]->accept(&serializer);
|
||||
} else if (fixed_key_size == 4) {
|
||||
CompressSerializer<int> serializer((int*)buffer, bases[i], offsets[i]);
|
||||
(void)columns[i]->accept(&serializer);
|
||||
} else if (fixed_key_size == 8) {
|
||||
CompressSerializer<int64_t> serializer((int64_t*)buffer, bases[i], offsets[i]);
|
||||
(void)columns[i]->accept(&serializer);
|
||||
} else if (fixed_key_size == 16) {
|
||||
CompressSerializer<int128_t> serializer((int128_t*)buffer, bases[i], offsets[i]);
|
||||
(void)columns[i]->accept(&serializer);
|
||||
} else {
|
||||
DCHECK(false) << "unreachable path";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void bitcompress_deserialize(Columns& columns, const std::vector<std::any>& bases, const std::vector<int>& offsets,
|
||||
const std::vector<int>& used_bits, size_t num_rows, size_t fixed_key_size, void* buffer) {
|
||||
for (size_t i = 0; i < columns.size(); ++i) {
|
||||
if (fixed_key_size == 1) {
|
||||
CompressDeserializer<uint8_t> deserializer(num_rows, (uint8_t*)buffer, bases[i], offsets[i], used_bits[i]);
|
||||
(void)columns[i]->accept_mutable(&deserializer);
|
||||
} else if (fixed_key_size == 4) {
|
||||
CompressDeserializer<int> deserializer(num_rows, (int*)buffer, bases[i], offsets[i], used_bits[i]);
|
||||
(void)columns[i]->accept_mutable(&deserializer);
|
||||
} else if (fixed_key_size == 8) {
|
||||
CompressDeserializer<int64_t> deserializer(num_rows, (int64_t*)buffer, bases[i], offsets[i], used_bits[i]);
|
||||
(void)columns[i]->accept_mutable(&deserializer);
|
||||
} else if (fixed_key_size == 16) {
|
||||
CompressDeserializer<int128_t> deserializer(num_rows, (int128_t*)buffer, bases[i], offsets[i],
|
||||
used_bits[i]);
|
||||
(void)columns[i]->accept_mutable(&deserializer);
|
||||
} else {
|
||||
DCHECK(false) << "unreachable path";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <any>
|
||||
|
||||
#include "column/column.h"
|
||||
#include "types/logical_type.h"
|
||||
|
||||
namespace starrocks {
|
||||
class VectorizedLiteral;
|
||||
/**
|
||||
* Calculates the number of bits used between a given range for a specified logical type.
|
||||
*
|
||||
* This function calculates the number of bits required for a given logical type and a specified range
|
||||
* of start and end values. The result is an optional integer representing the calculated number of bits.
|
||||
*
|
||||
* If we input a column that does not support bit compress, we will return an empty optional.
|
||||
*/
|
||||
std::optional<int> get_used_bits(LogicalType ltype, const VectorizedLiteral& begin, const VectorizedLiteral& end,
|
||||
std::any& base);
|
||||
|
||||
/**
|
||||
* serialize column data into a bit-compressed format.
|
||||
*/
|
||||
void bitcompress_serialize(const Columns& columns, const std::vector<std::any>& bases, const std::vector<int>& offsets,
|
||||
size_t num_rows, size_t fixed_key_size, void* buffer);
|
||||
|
||||
/**
|
||||
* deserialize column data from a bit-compressed format.
|
||||
*
|
||||
*/
|
||||
void bitcompress_deserialize(Columns& columns, const std::vector<std::any>& bases, const std::vector<int>& offsets,
|
||||
const std::vector<int>& used_bits, size_t num_rows, size_t fixed_key_size, void* buffer);
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -17,27 +17,27 @@
|
|||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <variant>
|
||||
#include <utility>
|
||||
|
||||
#include "column/chunk.h"
|
||||
#include "column/column_helper.h"
|
||||
#include "column/vectorized_fwd.h"
|
||||
#include "common/config.h"
|
||||
#include "common/logging.h"
|
||||
#include "common/status.h"
|
||||
#include "exec/agg_runtime_filter_builder.h"
|
||||
#include "exec/aggregate/agg_hash_variant.h"
|
||||
#include "exec/aggregate/agg_profile.h"
|
||||
#include "exec/exec_node.h"
|
||||
#include "exec/limited_pipeline_chunk_buffer.h"
|
||||
#include "exec/pipeline/operator.h"
|
||||
#include "exec/spill/spiller.hpp"
|
||||
#include "exprs/agg/agg_state_if.h"
|
||||
#include "exprs/agg/agg_state_merge.h"
|
||||
#include "exprs/agg/agg_state_union.h"
|
||||
#include "exprs/agg/aggregate_factory.h"
|
||||
#include "exprs/agg/aggregate_state_allocator.h"
|
||||
#include "exprs/literal.h"
|
||||
#include "gen_cpp/PlanNodes_types.h"
|
||||
#include "runtime/current_thread.h"
|
||||
#include "runtime/descriptors.h"
|
||||
#include "runtime/memory/roaring_hook.h"
|
||||
#include "types/logical_type.h"
|
||||
#include "udf/java/utils.h"
|
||||
#include "util/runtime_profile.h"
|
||||
|
|
@ -52,6 +52,60 @@ static const std::string AGG_STATE_MERGE_SUFFIX = "_merge";
|
|||
static const std::string AGG_STATE_IF_SUFFIX = "_if";
|
||||
static const std::string FUNCTION_COUNT = "count";
|
||||
|
||||
template <class HashMapWithKey>
|
||||
struct AllocateState {
|
||||
AllocateState(Aggregator* aggregator_) : aggregator(aggregator_) {}
|
||||
inline AggDataPtr operator()(const typename HashMapWithKey::KeyType& key);
|
||||
inline AggDataPtr operator()(std::nullptr_t);
|
||||
|
||||
private:
|
||||
Aggregator* aggregator;
|
||||
};
|
||||
|
||||
template <class HashMapWithKey>
|
||||
inline AggDataPtr AllocateState<HashMapWithKey>::operator()(const typename HashMapWithKey::KeyType& key) {
|
||||
AggDataPtr agg_state = aggregator->_state_allocator.allocate();
|
||||
*reinterpret_cast<typename HashMapWithKey::KeyType*>(agg_state) = key;
|
||||
size_t created = 0;
|
||||
size_t aggregate_function_sz = aggregator->_agg_fn_ctxs.size();
|
||||
try {
|
||||
for (int i = 0; i < aggregate_function_sz; i++) {
|
||||
aggregator->_agg_functions[i]->create(aggregator->_agg_fn_ctxs[i],
|
||||
agg_state + aggregator->_agg_states_offsets[i]);
|
||||
created++;
|
||||
}
|
||||
return agg_state;
|
||||
} catch (std::bad_alloc& e) {
|
||||
for (size_t i = 0; i < created; ++i) {
|
||||
aggregator->_agg_functions[i]->destroy(aggregator->_agg_fn_ctxs[i],
|
||||
agg_state + aggregator->_agg_states_offsets[i]);
|
||||
}
|
||||
aggregator->_state_allocator.rollback();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
template <class HashMapWithKey>
|
||||
inline AggDataPtr AllocateState<HashMapWithKey>::operator()(std::nullptr_t) {
|
||||
AggDataPtr agg_state = aggregator->_state_allocator.allocate_null_key_data();
|
||||
size_t created = 0;
|
||||
size_t aggregate_function_sz = aggregator->_agg_fn_ctxs.size();
|
||||
try {
|
||||
for (int i = 0; i < aggregate_function_sz; i++) {
|
||||
aggregator->_agg_functions[i]->create(aggregator->_agg_fn_ctxs[i],
|
||||
agg_state + aggregator->_agg_states_offsets[i]);
|
||||
created++;
|
||||
}
|
||||
return agg_state;
|
||||
} catch (std::bad_alloc& e) {
|
||||
for (int i = 0; i < created; i++) {
|
||||
aggregator->_agg_functions[i]->destroy(aggregator->_agg_fn_ctxs[i],
|
||||
agg_state + aggregator->_agg_states_offsets[i]);
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
template <bool UseIntermediateAsOutput>
|
||||
bool AggFunctionTypes::is_result_nullable() const {
|
||||
if constexpr (UseIntermediateAsOutput) {
|
||||
|
|
@ -143,6 +197,9 @@ AggregatorParamsPtr convert_to_aggregator_params(const TPlanNode& tnode) {
|
|||
params->intermediate_aggr_exprs = tnode.agg_node.intermediate_aggr_exprs;
|
||||
params->enable_pipeline_share_limit =
|
||||
tnode.agg_node.__isset.enable_pipeline_share_limit ? tnode.agg_node.enable_pipeline_share_limit : false;
|
||||
params->grouping_min_max =
|
||||
tnode.agg_node.__isset.group_by_min_max ? tnode.agg_node.group_by_min_max : std::vector<TExpr>{};
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
|
@ -358,6 +415,16 @@ Status Aggregator::prepare(RuntimeState* state, ObjectPool* pool, RuntimeProfile
|
|||
|
||||
RETURN_IF_ERROR(Expr::create_expr_trees(_pool, _params->conjuncts, &_conjunct_ctxs, state, true));
|
||||
RETURN_IF_ERROR(Expr::create_expr_trees(_pool, _params->grouping_exprs, &_group_by_expr_ctxs, state, true));
|
||||
RETURN_IF_ERROR(Expr::create_expr_trees(_pool, _params->grouping_min_max, &_group_by_min_max, state, true));
|
||||
_ranges.resize(_group_by_expr_ctxs.size());
|
||||
if (_group_by_min_max.size() == _group_by_expr_ctxs.size() * 2) {
|
||||
for (size_t i = 0; i < _group_by_expr_ctxs.size(); ++i) {
|
||||
std::pair<VectorizedLiteral*, VectorizedLiteral*> range;
|
||||
range.first = down_cast<VectorizedLiteral*>(_group_by_min_max[i * 2]->root());
|
||||
range.second = down_cast<VectorizedLiteral*>(_group_by_min_max[i * 2 + 1]->root());
|
||||
_ranges[i] = range;
|
||||
}
|
||||
}
|
||||
|
||||
// add profile attributes
|
||||
if (!_params->sql_grouping_keys.empty()) {
|
||||
|
|
@ -582,7 +649,7 @@ Status Aggregator::_create_aggregate_function(starrocks::RuntimeState* state, co
|
|||
TypeDescriptor return_type = TypeDescriptor::from_thrift(fn.ret_type);
|
||||
TypeDescriptor serde_type = TypeDescriptor::from_thrift(fn.aggregate_fn.intermediate_type);
|
||||
DCHECK_LE(1, fn.arg_types.size());
|
||||
TypeDescriptor arg_type = arg_types[0];
|
||||
const TypeDescriptor& arg_type = arg_types[0];
|
||||
auto* func = get_aggregate_function(func_name, return_type, arg_types, is_result_nullable, fn.binary_type,
|
||||
state->func_version());
|
||||
if (func == nullptr) {
|
||||
|
|
@ -1287,19 +1354,76 @@ Status Aggregator::evaluate_agg_fn_exprs(Chunk* chunk, bool use_intermediate) {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
bool is_group_columns_fixed_size(std::vector<ExprContext*>& group_by_expr_ctxs, std::vector<ColumnType>& group_by_types,
|
||||
size_t* max_size, bool* has_null) {
|
||||
bool could_apply_bitcompress_opt(
|
||||
const std::vector<ColumnType>& group_by_types,
|
||||
const std::vector<std::optional<std::pair<VectorizedLiteral*, VectorizedLiteral*>>>& ranges,
|
||||
std::vector<std::any>& base, std::vector<int>& used_bytes, size_t* max_size, bool* has_null) {
|
||||
size_t accumulated = 0;
|
||||
size_t accumulated_fixed_length_bits = 0;
|
||||
for (size_t i = 0; i < group_by_types.size(); i++) {
|
||||
size_t size = 0;
|
||||
// 1 bytes for null flag.
|
||||
if (group_by_types[i].is_nullable) {
|
||||
*has_null = true;
|
||||
size += 1;
|
||||
}
|
||||
if (group_by_types[i].result_type.is_complex_type()) {
|
||||
return false;
|
||||
}
|
||||
LogicalType ltype = group_by_types[i].result_type.type;
|
||||
|
||||
size_t fixed_base_size = get_size_of_fixed_length_type(ltype);
|
||||
if (fixed_base_size == 0) return false;
|
||||
accumulated_fixed_length_bits += fixed_base_size * 8;
|
||||
|
||||
if (!ranges[i].has_value()) {
|
||||
return false;
|
||||
}
|
||||
auto used_bits = get_used_bits(ltype, *ranges[i]->first, *ranges[i]->second, base[i]);
|
||||
if (!used_bits.has_value()) {
|
||||
return false;
|
||||
}
|
||||
size += used_bits.value();
|
||||
|
||||
accumulated += size;
|
||||
used_bytes[i] = accumulated;
|
||||
}
|
||||
auto get_level = [](size_t used_bits) {
|
||||
if (used_bits <= sizeof(uint8_t) * 8)
|
||||
return 1;
|
||||
else if (used_bits <= sizeof(uint16_t) * 8)
|
||||
return 2;
|
||||
else if (used_bits <= sizeof(uint32_t) * 8)
|
||||
return 3;
|
||||
else if (used_bits <= sizeof(uint64_t) * 8)
|
||||
return 4;
|
||||
else if (used_bits <= sizeof(int128_t) * 8)
|
||||
return 5;
|
||||
else
|
||||
return 6;
|
||||
};
|
||||
// If they are at the same level, grouping by compressed key will not optimize performance, so we disable it.
|
||||
// eg: For example, two int32 values both have a threshold of 0-2^32, so they need to use group by int64.
|
||||
// In this case, there will be no optimization effect. We disable this situation.
|
||||
if (get_level(accumulated_fixed_length_bits) > get_level(accumulated)) {
|
||||
*max_size = accumulated;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_group_columns_fixed_size(std::vector<ColumnType>& group_by_types, size_t* max_size, bool* has_null) {
|
||||
size_t size = 0;
|
||||
*has_null = false;
|
||||
|
||||
for (size_t i = 0; i < group_by_expr_ctxs.size(); i++) {
|
||||
ExprContext* ctx = group_by_expr_ctxs[i];
|
||||
for (size_t i = 0; i < group_by_types.size(); i++) {
|
||||
// 1 bytes for null flag.
|
||||
if (group_by_types[i].is_nullable) {
|
||||
*has_null = true;
|
||||
size += 1; // 1 bytes for null flag.
|
||||
size += 1;
|
||||
}
|
||||
LogicalType ltype = ctx->root()->type().type;
|
||||
if (ctx->root()->type().is_complex_type()) {
|
||||
LogicalType ltype = group_by_types[i].result_type.type;
|
||||
if (group_by_types[i].result_type.is_complex_type()) {
|
||||
return false;
|
||||
}
|
||||
size_t byte_size = get_size_of_fixed_length_type(ltype);
|
||||
|
|
@ -1311,20 +1435,30 @@ bool is_group_columns_fixed_size(std::vector<ExprContext*>& group_by_expr_ctxs,
|
|||
}
|
||||
|
||||
template <typename HashVariantType>
|
||||
void Aggregator::_init_agg_hash_variant(HashVariantType& hash_variant) {
|
||||
typename HashVariantType::Type Aggregator::_get_hash_table_type() {
|
||||
auto type = _aggr_phase == AggrPhase1 ? HashVariantType::Type::phase1_slice : HashVariantType::Type::phase2_slice;
|
||||
if (_group_by_expr_ctxs.size() == 1) {
|
||||
type = HashVariantResolver<HashVariantType>::instance().get_unary_type(
|
||||
_aggr_phase, _group_by_types[0].result_type.type, _has_nullable_key);
|
||||
if (_group_by_types.empty()) {
|
||||
return type;
|
||||
}
|
||||
// using one key hash table
|
||||
if (_group_by_types.size() == 1) {
|
||||
bool nullable = _group_by_types[0].is_nullable;
|
||||
LogicalType type = _group_by_types[0].result_type.type;
|
||||
return HashVariantResolver<HashVariantType>::instance().get_unary_type(_aggr_phase, type, nullable);
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
template <typename HashVariantType>
|
||||
typename HashVariantType::Type Aggregator::_try_to_apply_fixed_size_opt(typename HashVariantType::Type type,
|
||||
bool* has_null, int* fixed_size) {
|
||||
bool has_null_column = false;
|
||||
int fixed_byte_size = 0;
|
||||
// this optimization don't need to be limited to multi-column group by.
|
||||
// single column like float/double/decimal/largeint could also be applied to.
|
||||
if (type == HashVariantType::Type::phase1_slice || type == HashVariantType::Type::phase2_slice) {
|
||||
size_t max_size = 0;
|
||||
if (is_group_columns_fixed_size(_group_by_expr_ctxs, _group_by_types, &max_size, &has_null_column)) {
|
||||
if (is_group_columns_fixed_size(_group_by_types, &max_size, &has_null_column)) {
|
||||
// we need reserve a byte for serialization length for nullable columns
|
||||
if (max_size < 4 || (!has_null_column && max_size == 4)) {
|
||||
type = _aggr_phase == AggrPhase1 ? HashVariantType::Type::phase1_slice_fx4
|
||||
|
|
@ -1341,6 +1475,99 @@ void Aggregator::_init_agg_hash_variant(HashVariantType& hash_variant) {
|
|||
}
|
||||
}
|
||||
}
|
||||
*has_null = has_null_column;
|
||||
*fixed_size = fixed_byte_size;
|
||||
return type;
|
||||
}
|
||||
|
||||
template <typename HashVariantType>
|
||||
typename HashVariantType::Type Aggregator::_try_to_apply_compressed_key_opt(typename HashVariantType::Type input_type,
|
||||
CompressKeyContext* ctx) {
|
||||
typename HashVariantType::Type type = input_type;
|
||||
if (_group_by_types.empty()) {
|
||||
return type;
|
||||
}
|
||||
for (size_t i = 0; i < _ranges.size(); ++i) {
|
||||
if (!_ranges[i].has_value()) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
|
||||
// check apply bit compress opt
|
||||
{
|
||||
bool has_null_column;
|
||||
size_t new_max_bit_size = 0;
|
||||
std::vector<int>& offsets = ctx->offsets;
|
||||
std::vector<int>& used_bits = ctx->used_bits;
|
||||
std::vector<std::any>& bases = ctx->bases;
|
||||
|
||||
size_t group_by_keys = _group_by_types.size();
|
||||
used_bits.resize(group_by_keys);
|
||||
offsets.resize(group_by_keys);
|
||||
bases.resize(group_by_keys);
|
||||
|
||||
if (could_apply_bitcompress_opt(_group_by_types, _ranges, bases, used_bits, &new_max_bit_size,
|
||||
&has_null_column)) {
|
||||
if (_group_by_types.size() > 0) {
|
||||
if (new_max_bit_size <= 8) {
|
||||
type = _aggr_phase == AggrPhase1 ? HashVariantType::Type::phase1_slice_cx1
|
||||
: HashVariantType::Type::phase2_slice_cx1;
|
||||
} else if (new_max_bit_size <= 4 * 8) {
|
||||
type = _aggr_phase == AggrPhase1 ? HashVariantType::Type::phase1_slice_cx4
|
||||
: HashVariantType::Type::phase2_slice_cx4;
|
||||
} else if (new_max_bit_size <= 8 * 8) {
|
||||
type = _aggr_phase == AggrPhase1 ? HashVariantType::Type::phase1_slice_cx8
|
||||
: HashVariantType::Type::phase2_slice_cx8;
|
||||
} else if (new_max_bit_size <= 16 * 8) {
|
||||
type = _aggr_phase == AggrPhase1 ? HashVariantType::Type::phase1_slice_cx16
|
||||
: HashVariantType::Type::phase2_slice_cx16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
offsets[0] = 0;
|
||||
for (size_t i = 1; i < group_by_keys; ++i) {
|
||||
offsets[i] = used_bits[i - 1];
|
||||
}
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
template <typename HashVariantType>
|
||||
void Aggregator::_build_hash_variant(HashVariantType& hash_variant, typename HashVariantType::Type type,
|
||||
CompressKeyContext&& context) {
|
||||
hash_variant.init(_state, type, _agg_stat);
|
||||
hash_variant.visit([&](auto& variant) {
|
||||
if constexpr (is_compressed_fixed_size_key<std::decay_t<decltype(*variant)>>) {
|
||||
variant->offsets = std::move(context.offsets);
|
||||
variant->used_bits = std::move(context.used_bits);
|
||||
variant->bases = std::move(context.bases);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template <typename HashVariantType>
|
||||
void Aggregator::_init_agg_hash_variant(HashVariantType& hash_variant) {
|
||||
auto type = _get_hash_table_type<HashVariantType>();
|
||||
|
||||
CompressKeyContext compress_key_ctx;
|
||||
bool apply_compress_key_opt = false;
|
||||
typename HashVariantType::Type prev_type = type;
|
||||
type = _try_to_apply_compressed_key_opt<HashVariantType>(type, &compress_key_ctx);
|
||||
apply_compress_key_opt = prev_type != type;
|
||||
if (apply_compress_key_opt) {
|
||||
// build with compressed key
|
||||
VLOG_ROW << "apply compressed key";
|
||||
_build_hash_variant<HashVariantType>(hash_variant, type, std::move(compress_key_ctx));
|
||||
return;
|
||||
}
|
||||
|
||||
bool has_null_column = false;
|
||||
int fixed_byte_size = 0;
|
||||
|
||||
if (_group_by_types.size() > 1) {
|
||||
type = _try_to_apply_fixed_size_opt<HashVariantType>(type, &has_null_column, &fixed_byte_size);
|
||||
}
|
||||
|
||||
VLOG_ROW << "hash type is "
|
||||
<< static_cast<typename std::underlying_type<typename HashVariantType::Type>::type>(type);
|
||||
|
|
|
|||
|
|
@ -19,40 +19,34 @@
|
|||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <new>
|
||||
#include <queue>
|
||||
#include <utility>
|
||||
|
||||
#include "column/chunk.h"
|
||||
#include "column/column_helper.h"
|
||||
#include "column/type_traits.h"
|
||||
#include "column/vectorized_fwd.h"
|
||||
#include "common/object_pool.h"
|
||||
#include "common/statusor.h"
|
||||
#include "exec/aggregate/agg_hash_variant.h"
|
||||
#include "exec/aggregate/agg_profile.h"
|
||||
#include "exec/chunk_buffer_memory_manager.h"
|
||||
#include "exec/aggregator_fwd.h"
|
||||
#include "exec/limited_pipeline_chunk_buffer.h"
|
||||
#include "exec/pipeline/context_with_dependency.h"
|
||||
#include "exec/pipeline/schedule/observer.h"
|
||||
#include "exec/pipeline/spill_process_channel.h"
|
||||
#include "exprs/agg/aggregate_factory.h"
|
||||
#include "exprs/agg/aggregate.h"
|
||||
#include "exprs/expr.h"
|
||||
#include "gen_cpp/QueryPlanExtra_types.h"
|
||||
#include "gutil/strings/substitute.h"
|
||||
#include "runtime/current_thread.h"
|
||||
#include "runtime/descriptors.h"
|
||||
#include "runtime/mem_pool.h"
|
||||
#include "runtime/memory/counting_allocator.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "runtime/types.h"
|
||||
#include "util/defer_op.h"
|
||||
|
||||
namespace starrocks {
|
||||
class RuntimeFilter;
|
||||
class AggInRuntimeFilterMerger;
|
||||
struct HashTableKeyAllocator;
|
||||
class VectorizedLiteral;
|
||||
|
||||
struct RawHashTableIterator {
|
||||
RawHashTableIterator(HashTableKeyAllocator* alloc_, size_t x_, int y_) : alloc(alloc_), x(x_), y(y_) {}
|
||||
|
|
@ -117,19 +111,6 @@ inline uint8_t* RawHashTableIterator::value() {
|
|||
return static_cast<uint8_t*>(alloc->vecs[x].first) + alloc->aggregate_key_size * y;
|
||||
}
|
||||
|
||||
class Aggregator;
|
||||
class SortedStreamingAggregator;
|
||||
|
||||
template <class HashMapWithKey>
|
||||
struct AllocateState {
|
||||
AllocateState(Aggregator* aggregator_) : aggregator(aggregator_) {}
|
||||
inline AggDataPtr operator()(const typename HashMapWithKey::KeyType& key);
|
||||
inline AggDataPtr operator()(std::nullptr_t);
|
||||
|
||||
private:
|
||||
Aggregator* aggregator;
|
||||
};
|
||||
|
||||
struct AggFunctionTypes {
|
||||
TypeDescriptor result_type;
|
||||
TypeDescriptor serde_type; // for serialize
|
||||
|
|
@ -227,6 +208,7 @@ struct AggregatorParams {
|
|||
std::vector<TExpr> grouping_exprs;
|
||||
std::vector<TExpr> aggregate_functions;
|
||||
std::vector<TExpr> intermediate_aggr_exprs;
|
||||
std::vector<TExpr> grouping_min_max;
|
||||
|
||||
// Incremental MV
|
||||
// Whether it's testing, use MemStateTable in testing, instead use IMTStateTable.
|
||||
|
|
@ -255,12 +237,6 @@ AggregatorParamsPtr convert_to_aggregator_params(const TPlanNode& tnode);
|
|||
// it contains common data struct and algorithm of aggregation
|
||||
class Aggregator : public pipeline::ContextWithDependency {
|
||||
public:
|
||||
#ifdef NDEBUG
|
||||
static constexpr size_t two_level_memory_threshold = 33554432; // 32M, L3 Cache
|
||||
#else
|
||||
static constexpr size_t two_level_memory_threshold = 64;
|
||||
#endif
|
||||
|
||||
Aggregator(AggregatorParamsPtr params);
|
||||
|
||||
~Aggregator() noexcept override {
|
||||
|
|
@ -414,7 +390,7 @@ public:
|
|||
|
||||
bool is_streaming_all_states() const { return _streaming_all_states; }
|
||||
|
||||
HashTableKeyAllocator _state_allocator;
|
||||
HashTableKeyAllocator& state_allocator() { return _state_allocator; }
|
||||
|
||||
void attach_sink_observer(RuntimeState* state, pipeline::PipelineObserver* observer) {
|
||||
_pip_observable.attach_sink_observer(state, observer);
|
||||
|
|
@ -435,6 +411,8 @@ protected:
|
|||
std::unique_ptr<MemPool> _mem_pool;
|
||||
// used to count heap memory usage of agg states
|
||||
std::unique_ptr<CountingAllocatorWithHook> _allocator;
|
||||
|
||||
HashTableKeyAllocator _state_allocator;
|
||||
// The open phase still relies on the TFunction object for some initialization operations
|
||||
std::vector<TFunction> _fns;
|
||||
|
||||
|
|
@ -501,6 +479,8 @@ protected:
|
|||
|
||||
// Exprs used to evaluate group by column
|
||||
std::vector<ExprContext*> _group_by_expr_ctxs;
|
||||
std::vector<ExprContext*> _group_by_min_max;
|
||||
std::vector<std::optional<std::pair<VectorizedLiteral*, VectorizedLiteral*>>> _ranges;
|
||||
Columns _group_by_columns;
|
||||
std::vector<ColumnType> _group_by_types;
|
||||
|
||||
|
|
@ -598,6 +578,24 @@ protected:
|
|||
// Choose different agg hash map/set by different group by column's count, type, nullable
|
||||
template <typename HashVariantType>
|
||||
void _init_agg_hash_variant(HashVariantType& hash_variant);
|
||||
// get spec hash table/set type
|
||||
template <typename HashVariantType>
|
||||
typename HashVariantType::Type _get_hash_table_type();
|
||||
|
||||
template <typename HashVariantType>
|
||||
typename HashVariantType::Type _try_to_apply_fixed_size_opt(typename HashVariantType::Type type,
|
||||
bool* has_null_column, int* fixed_byte_size);
|
||||
struct CompressKeyContext {
|
||||
std::vector<int> offsets;
|
||||
std::vector<int> used_bits;
|
||||
std::vector<std::any> bases;
|
||||
};
|
||||
template <typename HashVariantType>
|
||||
typename HashVariantType::Type _try_to_apply_compressed_key_opt(typename HashVariantType::Type input_type,
|
||||
CompressKeyContext* ctx);
|
||||
template <typename HashVariantType>
|
||||
void _build_hash_variant(HashVariantType& hash_variant, typename HashVariantType::Type type,
|
||||
CompressKeyContext&& context);
|
||||
|
||||
void _release_agg_memory();
|
||||
|
||||
|
|
@ -608,7 +606,7 @@ protected:
|
|||
|
||||
int64_t get_two_level_threahold() {
|
||||
if (config::two_level_memory_threshold < 0) {
|
||||
return two_level_memory_threshold;
|
||||
return agg::two_level_memory_threshold;
|
||||
}
|
||||
return config::two_level_memory_threshold;
|
||||
}
|
||||
|
|
@ -617,50 +615,6 @@ protected:
|
|||
friend struct AllocateState;
|
||||
};
|
||||
|
||||
template <class HashMapWithKey>
|
||||
inline AggDataPtr AllocateState<HashMapWithKey>::operator()(const typename HashMapWithKey::KeyType& key) {
|
||||
AggDataPtr agg_state = aggregator->_state_allocator.allocate();
|
||||
*reinterpret_cast<typename HashMapWithKey::KeyType*>(agg_state) = key;
|
||||
size_t created = 0;
|
||||
size_t aggregate_function_sz = aggregator->_agg_fn_ctxs.size();
|
||||
try {
|
||||
for (int i = 0; i < aggregate_function_sz; i++) {
|
||||
aggregator->_agg_functions[i]->create(aggregator->_agg_fn_ctxs[i],
|
||||
agg_state + aggregator->_agg_states_offsets[i]);
|
||||
created++;
|
||||
}
|
||||
return agg_state;
|
||||
} catch (std::bad_alloc& e) {
|
||||
for (size_t i = 0; i < created; ++i) {
|
||||
aggregator->_agg_functions[i]->destroy(aggregator->_agg_fn_ctxs[i],
|
||||
agg_state + aggregator->_agg_states_offsets[i]);
|
||||
}
|
||||
aggregator->_state_allocator.rollback();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
template <class HashMapWithKey>
|
||||
inline AggDataPtr AllocateState<HashMapWithKey>::operator()(std::nullptr_t) {
|
||||
AggDataPtr agg_state = aggregator->_state_allocator.allocate_null_key_data();
|
||||
size_t created = 0;
|
||||
size_t aggregate_function_sz = aggregator->_agg_fn_ctxs.size();
|
||||
try {
|
||||
for (int i = 0; i < aggregate_function_sz; i++) {
|
||||
aggregator->_agg_functions[i]->create(aggregator->_agg_fn_ctxs[i],
|
||||
agg_state + aggregator->_agg_states_offsets[i]);
|
||||
created++;
|
||||
}
|
||||
return agg_state;
|
||||
} catch (std::bad_alloc& e) {
|
||||
for (int i = 0; i < created; i++) {
|
||||
aggregator->_agg_functions[i]->destroy(aggregator->_agg_fn_ctxs[i],
|
||||
agg_state + aggregator->_agg_states_offsets[i]);
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool LimitedMemAggState::has_limited(const Aggregator& aggregator) const {
|
||||
return limited_memory_size > 0 && aggregator.memory_usage() >= limited_memory_size;
|
||||
}
|
||||
|
|
@ -702,11 +656,4 @@ private:
|
|||
std::atomic<int64_t> _shared_limit_countdown;
|
||||
};
|
||||
|
||||
using AggregatorFactory = AggregatorFactoryBase<Aggregator>;
|
||||
using AggregatorFactoryPtr = std::shared_ptr<AggregatorFactory>;
|
||||
|
||||
using SortedStreamingAggregatorPtr = std::shared_ptr<SortedStreamingAggregator>;
|
||||
using StreamingAggregatorFactory = AggregatorFactoryBase<SortedStreamingAggregator>;
|
||||
using StreamingAggregatorFactoryPtr = std::shared_ptr<StreamingAggregatorFactory>;
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
|
|||
|
|
@ -0,0 +1,32 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
|
||||
namespace starrocks {
|
||||
namespace agg {
|
||||
#ifdef NDEBUG
|
||||
constexpr size_t two_level_memory_threshold = 33554432; // 32M, L3 Cache
|
||||
#else
|
||||
constexpr size_t two_level_memory_threshold = 64;
|
||||
#endif
|
||||
} // namespace agg
|
||||
|
||||
class Aggregator;
|
||||
class SortedStreamingAggregator;
|
||||
using AggregatorPtr = std::shared_ptr<Aggregator>;
|
||||
using SortedStreamingAggregatorPtr = std::shared_ptr<SortedStreamingAggregator>;
|
||||
|
||||
template <class HashMapWithKey>
|
||||
struct AllocateState;
|
||||
|
||||
template <class T>
|
||||
class AggregatorFactoryBase;
|
||||
|
||||
using AggregatorFactory = AggregatorFactoryBase<Aggregator>;
|
||||
using AggregatorFactoryPtr = std::shared_ptr<AggregatorFactory>;
|
||||
|
||||
using StreamingAggregatorFactory = AggregatorFactoryBase<SortedStreamingAggregator>;
|
||||
using StreamingAggregatorFactoryPtr = std::shared_ptr<StreamingAggregatorFactory>;
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -35,7 +35,6 @@
|
|||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
|
|
@ -48,10 +47,7 @@
|
|||
#include "runtime/descriptors.h"
|
||||
#include "runtime/mem_pool.h"
|
||||
#include "runtime/query_statistics.h"
|
||||
#include "service/backend_options.h"
|
||||
#include "util/blocking_queue.hpp"
|
||||
#include "util/runtime_profile.h"
|
||||
#include "util/uid_util.h" // for print_id
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
|
|
|
|||
|
|
@ -120,6 +120,16 @@ public:
|
|||
|
||||
const ChunkPtr& back() { return _chunks.back(); }
|
||||
|
||||
void append_selective_to_back(const Chunk& src, const uint32_t* indexes, uint32_t from, uint32_t size) {
|
||||
auto& chunk = _chunks.back();
|
||||
const size_t prev_bytes = chunk->memory_usage();
|
||||
|
||||
chunk->append_selective(src, indexes, from, size);
|
||||
const size_t new_bytes = chunk->memory_usage();
|
||||
|
||||
_tracker->consume(new_bytes - prev_bytes);
|
||||
}
|
||||
|
||||
bool is_full() const {
|
||||
return _chunks.size() >= 4 || _tracker->consumption() > config::partition_hash_join_probe_limit_size;
|
||||
}
|
||||
|
|
@ -213,10 +223,10 @@ Status PartitionedHashJoinProberImpl::push_probe_chunk(RuntimeState* state, Chun
|
|||
}
|
||||
std::vector<uint32_t> hash_values;
|
||||
{
|
||||
hash_values.assign(num_rows, HashUtil::FNV_SEED);
|
||||
hash_values.assign(num_rows, 0);
|
||||
|
||||
for (const ColumnPtr& column : partition_columns) {
|
||||
column->fnv_hash(hash_values.data(), 0, num_rows);
|
||||
column->crc32_hash(hash_values.data(), 0, num_rows);
|
||||
}
|
||||
// find partition id
|
||||
for (size_t i = 0; i < hash_values.size(); ++i) {
|
||||
|
|
@ -362,7 +372,7 @@ bool SingleHashJoinBuilder::anti_join_key_column_has_null() const {
|
|||
return false;
|
||||
}
|
||||
|
||||
Status SingleHashJoinBuilder::do_append_chunk(const ChunkPtr& chunk) {
|
||||
Status SingleHashJoinBuilder::do_append_chunk(RuntimeState* state, const ChunkPtr& chunk) {
|
||||
if (UNLIKELY(_ht.get_row_count() + chunk->num_rows() >= max_hash_table_element_size)) {
|
||||
return Status::NotSupported(strings::Substitute("row count of right table in hash join > $0", UINT32_MAX));
|
||||
}
|
||||
|
|
@ -404,7 +414,7 @@ enum class CacheLevel { L2, L3, MEMORY };
|
|||
|
||||
class AdaptivePartitionHashJoinBuilder final : public HashJoinBuilder {
|
||||
public:
|
||||
AdaptivePartitionHashJoinBuilder(HashJoiner& hash_joiner);
|
||||
explicit AdaptivePartitionHashJoinBuilder(HashJoiner& hash_joiner);
|
||||
~AdaptivePartitionHashJoinBuilder() override = default;
|
||||
|
||||
void create(const HashTableParam& param) override;
|
||||
|
|
@ -413,7 +423,7 @@ public:
|
|||
|
||||
void reset(const HashTableParam& param) override;
|
||||
|
||||
Status do_append_chunk(const ChunkPtr& chunk) override;
|
||||
Status do_append_chunk(RuntimeState* state, const ChunkPtr& chunk) override;
|
||||
|
||||
Status build(RuntimeState* state) override;
|
||||
|
||||
|
|
@ -432,27 +442,53 @@ public:
|
|||
|
||||
void clone_readable(HashJoinBuilder* builder) override;
|
||||
|
||||
Status prepare_for_spill_start(RuntimeState* state) override;
|
||||
ChunkPtr convert_to_spill_schema(const ChunkPtr& chunk) const override;
|
||||
|
||||
private:
|
||||
size_t _estimated_row_size(const HashTableParam& param) const;
|
||||
size_t _estimated_probe_cost(const HashTableParam& param) const;
|
||||
static double _calculate_cache_miss_factor(const HashJoiner& hash_joiner);
|
||||
|
||||
size_t _estimate_hash_table_probing_bytes_per_row(const HashTableParam& param) const;
|
||||
size_t _estimate_probe_row_bytes(const HashTableParam& param) const;
|
||||
template <CacheLevel T>
|
||||
size_t _estimated_build_cost(size_t build_row_size) const;
|
||||
void _adjust_partition_rows(size_t build_row_size);
|
||||
size_t _estimate_cost_by_bytes(size_t row_bytes) const;
|
||||
|
||||
void _init_partition_nums(const HashTableParam& param);
|
||||
Status _convert_to_single_partition();
|
||||
Status _append_chunk_to_partitions(const ChunkPtr& chunk);
|
||||
void _adjust_partition_rows(size_t hash_table_bytes_per_row, size_t hash_table_probing_bytes_per_row);
|
||||
|
||||
Status _do_append_chunk(RuntimeState* state, const ChunkPtr& chunk);
|
||||
Status _append_chunk_to_partitions(RuntimeState* state, const ChunkPtr& chunk);
|
||||
Status _transfer_to_appending_stage(RuntimeState* state);
|
||||
Status _convert_to_single_partition(RuntimeState* state);
|
||||
Status _flush_buffer_chunks(RuntimeState* state);
|
||||
|
||||
bool _need_partition_join_for_build(size_t ht_num_rows) const;
|
||||
bool _need_partition_join_for_append(size_t ht_num_rows) const;
|
||||
|
||||
private:
|
||||
std::vector<std::unique_ptr<SingleHashJoinBuilder>> _builders;
|
||||
|
||||
size_t _partition_num = 0;
|
||||
size_t _partition_join_min_rows = 0;
|
||||
size_t _partition_join_max_rows = 0;
|
||||
// Split append chunk into two stages:
|
||||
// - BUFFERING: buffers chunks without partitioning until the number of rows exceeds _partition_join_l2_max_rows or _partition_join_l3_max_rows.
|
||||
// - APPENDING: partitions all incoming chunks.
|
||||
enum class Stage { BUFFERING, APPENDING };
|
||||
Stage _stage = Stage::BUFFERING;
|
||||
MemTracker _mem_tracker;
|
||||
std::vector<PartitionChunkChannel> _partition_input_channels;
|
||||
std::vector<ChunkPtr> _unpartition_chunks;
|
||||
|
||||
size_t _probe_estimated_costs = 0;
|
||||
size_t _partition_num = 0;
|
||||
|
||||
size_t _hash_table_probing_bytes_per_row = 0;
|
||||
size_t _hash_table_bytes_per_row = 0;
|
||||
size_t _partition_join_l2_min_rows = 0;
|
||||
size_t _partition_join_l2_max_rows = 0;
|
||||
size_t _partition_join_l3_min_rows = 0;
|
||||
size_t _partition_join_l3_max_rows = 0;
|
||||
|
||||
size_t _probe_row_shuffle_cost = 0;
|
||||
size_t _l2_benefit = 0;
|
||||
size_t _l3_benefit = 0;
|
||||
|
||||
size_t _fit_L2_cache_max_rows = 0;
|
||||
size_t _fit_L3_cache_max_rows = 0;
|
||||
|
|
@ -461,10 +497,15 @@ private:
|
|||
size_t _L3_cache_size = 0;
|
||||
|
||||
size_t _pushed_chunks = 0;
|
||||
|
||||
// Shared read-only data accessed concurrently by threads can lead to better cache performance.
|
||||
// Therefore, for broadcast joins, this parameter is used to reduce benefit of partitioned hash joins as the number
|
||||
// of prober threads (DOP) increases.
|
||||
const double _cache_miss_factor;
|
||||
};
|
||||
|
||||
AdaptivePartitionHashJoinBuilder::AdaptivePartitionHashJoinBuilder(HashJoiner& hash_joiner)
|
||||
: HashJoinBuilder(hash_joiner) {
|
||||
: HashJoinBuilder(hash_joiner), _cache_miss_factor(_calculate_cache_miss_factor(hash_joiner)) {
|
||||
static constexpr size_t DEFAULT_L2_CACHE_SIZE = 1 * 1024 * 1024;
|
||||
static constexpr size_t DEFAULT_L3_CACHE_SIZE = 32 * 1024 * 1024;
|
||||
const auto& cache_sizes = CpuInfo::get_cache_sizes();
|
||||
|
|
@ -474,100 +515,173 @@ AdaptivePartitionHashJoinBuilder::AdaptivePartitionHashJoinBuilder(HashJoiner& h
|
|||
_L3_cache_size = _L3_cache_size ? _L3_cache_size : DEFAULT_L3_CACHE_SIZE;
|
||||
}
|
||||
|
||||
size_t AdaptivePartitionHashJoinBuilder::_estimated_row_size(const HashTableParam& param) const {
|
||||
double AdaptivePartitionHashJoinBuilder::_calculate_cache_miss_factor(const HashJoiner& hash_joiner) {
|
||||
if (hash_joiner.distribution_mode() != TJoinDistributionMode::BROADCAST) {
|
||||
return 1.0; // No broadcast join, no cache reuse between different probers.
|
||||
}
|
||||
|
||||
const size_t max_prober_dop = hash_joiner.max_dop();
|
||||
if (max_prober_dop <= 1) {
|
||||
return 1.0;
|
||||
}
|
||||
if (max_prober_dop > 8) {
|
||||
return 0.1;
|
||||
}
|
||||
return 1 - (max_prober_dop - 1) * 0.1;
|
||||
}
|
||||
|
||||
size_t AdaptivePartitionHashJoinBuilder::_estimate_hash_table_probing_bytes_per_row(const HashTableParam& param) const {
|
||||
size_t estimated_each_row = 0;
|
||||
|
||||
// Probing a row need
|
||||
// 1. touch `first` and `next` vectors,
|
||||
// 2 and compare join keys between builder and prober.
|
||||
// 3. output columns from the build side.
|
||||
|
||||
// 1. `first` and `next` bytes
|
||||
estimated_each_row += 8;
|
||||
|
||||
// 2. key bytes
|
||||
for (const auto& join_key : param.join_keys) {
|
||||
if (join_key.type != nullptr) {
|
||||
estimated_each_row += get_size_of_fixed_length_type(join_key.type->type);
|
||||
// The benefit from non-fixed key columns is less than those from fixed key columns, so the penalty (/4) is applied here.
|
||||
estimated_each_row += type_estimated_overhead_bytes(join_key.type->type) / 4;
|
||||
}
|
||||
}
|
||||
|
||||
// 3. output bytes
|
||||
for (auto* tuple : param.build_row_desc->tuple_descriptors()) {
|
||||
for (auto slot : tuple->slots()) {
|
||||
if (param.build_output_slots.contains(slot->id())) {
|
||||
for (const auto* slot : tuple->slots()) {
|
||||
if (param.build_output_slots.empty() || param.build_output_slots.contains(slot->id())) {
|
||||
estimated_each_row += get_size_of_fixed_length_type(slot->type().type);
|
||||
estimated_each_row += type_estimated_overhead_bytes(slot->type().type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// for hash table bucket
|
||||
estimated_each_row += 4;
|
||||
|
||||
return estimated_each_row;
|
||||
return std::max<size_t>(estimated_each_row * _cache_miss_factor, 1);
|
||||
}
|
||||
|
||||
// We could use a better estimation model.
|
||||
size_t AdaptivePartitionHashJoinBuilder::_estimated_probe_cost(const HashTableParam& param) const {
|
||||
size_t AdaptivePartitionHashJoinBuilder::_estimate_probe_row_bytes(const HashTableParam& param) const {
|
||||
size_t size = 0;
|
||||
|
||||
// shuffling probe bytes
|
||||
for (auto* tuple : param.probe_row_desc->tuple_descriptors()) {
|
||||
for (auto slot : tuple->slots()) {
|
||||
if (param.probe_output_slots.contains(slot->id())) {
|
||||
size += get_size_of_fixed_length_type(slot->type().type);
|
||||
size += type_estimated_overhead_bytes(slot->type().type);
|
||||
}
|
||||
for (const auto* slot : tuple->slots()) {
|
||||
size += get_size_of_fixed_length_type(slot->type().type);
|
||||
size += type_estimated_overhead_bytes(slot->type().type);
|
||||
}
|
||||
}
|
||||
// we define probe cost is bytes size * 6
|
||||
return size * 6;
|
||||
|
||||
return std::max<size_t>(size, 1);
|
||||
}
|
||||
|
||||
template <>
|
||||
size_t AdaptivePartitionHashJoinBuilder::_estimated_build_cost<CacheLevel::L2>(size_t build_row_size) const {
|
||||
return build_row_size / 2;
|
||||
size_t AdaptivePartitionHashJoinBuilder::_estimate_cost_by_bytes<CacheLevel::L2>(size_t row_bytes) const {
|
||||
return row_bytes / 2;
|
||||
}
|
||||
|
||||
template <>
|
||||
size_t AdaptivePartitionHashJoinBuilder::_estimated_build_cost<CacheLevel::L3>(size_t build_row_size) const {
|
||||
return build_row_size;
|
||||
size_t AdaptivePartitionHashJoinBuilder::_estimate_cost_by_bytes<CacheLevel::L3>(size_t row_bytes) const {
|
||||
return row_bytes;
|
||||
}
|
||||
|
||||
template <>
|
||||
size_t AdaptivePartitionHashJoinBuilder::_estimated_build_cost<CacheLevel::MEMORY>(size_t build_row_size) const {
|
||||
return build_row_size * 2;
|
||||
size_t AdaptivePartitionHashJoinBuilder::_estimate_cost_by_bytes<CacheLevel::MEMORY>(size_t row_bytes) const {
|
||||
return row_bytes * 2;
|
||||
}
|
||||
|
||||
void AdaptivePartitionHashJoinBuilder::_adjust_partition_rows(size_t build_row_size) {
|
||||
build_row_size = std::max(build_row_size, 4UL);
|
||||
_fit_L2_cache_max_rows = _L2_cache_size / build_row_size;
|
||||
_fit_L3_cache_max_rows = _L3_cache_size / build_row_size;
|
||||
bool AdaptivePartitionHashJoinBuilder::_need_partition_join_for_build(size_t ht_num_rows) const {
|
||||
return (_partition_join_l2_min_rows < ht_num_rows && ht_num_rows <= _partition_join_l2_max_rows) ||
|
||||
(_partition_join_l3_min_rows < ht_num_rows && ht_num_rows <= _partition_join_l3_max_rows);
|
||||
}
|
||||
|
||||
// If the hash table is smaller than the L2 cache. we don't think partition hash join is needed.
|
||||
_partition_join_min_rows = _fit_L2_cache_max_rows;
|
||||
// If the hash table after partition can't be loaded to L3. we don't think partition hash join is needed.
|
||||
_partition_join_max_rows = _fit_L3_cache_max_rows * _partition_num;
|
||||
bool AdaptivePartitionHashJoinBuilder::_need_partition_join_for_append(size_t ht_num_rows) const {
|
||||
return ht_num_rows <= _partition_join_l2_max_rows || ht_num_rows <= _partition_join_l3_max_rows;
|
||||
}
|
||||
|
||||
if (_probe_estimated_costs + _estimated_build_cost<CacheLevel::L2>(build_row_size) <
|
||||
_estimated_build_cost<CacheLevel::L3>(build_row_size)) {
|
||||
// overhead after hash table partitioning + probe extra cost < cost before partitioning
|
||||
// nothing to do
|
||||
} else if (_probe_estimated_costs + _estimated_build_cost<CacheLevel::L3>(build_row_size) <
|
||||
_estimated_build_cost<CacheLevel::MEMORY>(build_row_size)) {
|
||||
// It is only after this that performance gains can be realized beyond the L3 cache.
|
||||
_partition_join_min_rows = _fit_L3_cache_max_rows;
|
||||
void AdaptivePartitionHashJoinBuilder::_adjust_partition_rows(size_t hash_table_bytes_per_row,
|
||||
size_t hash_table_probing_bytes_per_row) {
|
||||
if (hash_table_bytes_per_row == _hash_table_bytes_per_row &&
|
||||
hash_table_probing_bytes_per_row == _hash_table_probing_bytes_per_row) {
|
||||
return; // No need to adjust partition rows.
|
||||
}
|
||||
|
||||
_hash_table_bytes_per_row = hash_table_bytes_per_row;
|
||||
_hash_table_probing_bytes_per_row = hash_table_probing_bytes_per_row;
|
||||
|
||||
hash_table_bytes_per_row = std::max<size_t>(hash_table_bytes_per_row, 1);
|
||||
|
||||
_fit_L2_cache_max_rows = _L2_cache_size / hash_table_bytes_per_row;
|
||||
_fit_L3_cache_max_rows = _L3_cache_size / hash_table_bytes_per_row;
|
||||
|
||||
_partition_join_l2_min_rows = -1;
|
||||
_partition_join_l2_max_rows = 0;
|
||||
_partition_join_l3_min_rows = -1;
|
||||
_partition_join_l3_max_rows = 0;
|
||||
|
||||
const auto l2_benefit = _estimate_cost_by_bytes<CacheLevel::L3>(hash_table_probing_bytes_per_row) -
|
||||
_estimate_cost_by_bytes<CacheLevel::L2>(hash_table_probing_bytes_per_row);
|
||||
const auto l3_benefit = _estimate_cost_by_bytes<CacheLevel::MEMORY>(hash_table_probing_bytes_per_row) -
|
||||
_estimate_cost_by_bytes<CacheLevel::L3>(hash_table_probing_bytes_per_row);
|
||||
|
||||
if (_probe_row_shuffle_cost < l3_benefit) { // Partitioned joins benefit from L3 cache.
|
||||
// Partitioned joins benefit from L3 cache when probing a row has cache miss in non-partitioned join but not in partitioned join.
|
||||
// 1. min_rows > (l3_cache_size/hash_table_bytes_per_row)*(l3_benefit/(l3_benefit-_probe_row_shuffle_cost)), because:
|
||||
// - l3_benefit * non_partition_cache_miss_rate > _probe_row_shuffle_cost
|
||||
// - non_partition_cache_miss_rate = 1 - l3_cache_size/(min_rows*hash_table_bytes_per_row)
|
||||
// 2. max_rows < (l3_cache_size/hash_table_bytes_per_row)*(l3_benefit/_probe_row_shuffle_cost)*num_partitions, because:
|
||||
// - l3_benefit * partition_cache_hit_rate > _probe_row_shuffle_cost
|
||||
// - partition_cache_hit_rate = l3_cache_size/(max_rows_per_partition*hash_table_bytes_per_row)
|
||||
_partition_join_l3_min_rows = _fit_L3_cache_max_rows * l3_benefit / (l3_benefit - _probe_row_shuffle_cost);
|
||||
_partition_join_l3_max_rows = _fit_L3_cache_max_rows * _partition_num * l3_benefit / _probe_row_shuffle_cost;
|
||||
_partition_join_l3_max_rows *= 2; // relax the restriction
|
||||
|
||||
if (_probe_row_shuffle_cost < l2_benefit) { // Partitioned joins benefit from L2 cache.
|
||||
_partition_join_l2_min_rows = _fit_L2_cache_max_rows * l2_benefit / (l2_benefit - _probe_row_shuffle_cost);
|
||||
_partition_join_l2_min_rows *= 2; // Make the restriction more stringent
|
||||
_partition_join_l2_max_rows =
|
||||
(_fit_L2_cache_max_rows * _partition_num) * l2_benefit / _probe_row_shuffle_cost;
|
||||
}
|
||||
} else {
|
||||
// Partitioned joins don't have performance gains. Not using partition hash join.
|
||||
_partition_num = 1;
|
||||
}
|
||||
|
||||
VLOG_OPERATOR << "TRACE:"
|
||||
<< "partition_num=" << _partition_num << " partition_join_min_rows=" << _partition_join_min_rows
|
||||
<< " partition_join_max_rows=" << _partition_join_max_rows << " probe cost=" << _probe_estimated_costs
|
||||
<< " build cost L2=" << _estimated_build_cost<CacheLevel::L2>(build_row_size)
|
||||
<< " build cost L3=" << _estimated_build_cost<CacheLevel::L3>(build_row_size)
|
||||
<< " build cost Mem=" << _estimated_build_cost<CacheLevel::MEMORY>(build_row_size);
|
||||
_l2_benefit = l2_benefit;
|
||||
_l3_benefit = l3_benefit;
|
||||
|
||||
VLOG_OPERATOR << "TRACE: _adjust_partition_rows "
|
||||
<< "[partition_num=" << _partition_num << "] "
|
||||
<< "[partition_join_l2_min_rows=" << _partition_join_l2_min_rows << "] "
|
||||
<< "[partition_join_l2_max_rows=" << _partition_join_l2_max_rows << "] "
|
||||
<< "[partition_join_l3_min_rows=" << _partition_join_l3_min_rows << "] "
|
||||
<< "[partition_join_l3_max_rows=" << _partition_join_l3_max_rows << "] "
|
||||
<< "[hash_table_probing_bytes_per_row=" << hash_table_probing_bytes_per_row << "] "
|
||||
<< "[hash_table_bytes_per_row=" << hash_table_bytes_per_row << "] "
|
||||
<< "[l2_benefit=" << l2_benefit << "] "
|
||||
<< "[l3_benefit=" << l3_benefit << "] "
|
||||
<< "[probe_shuffle_cost=" << _probe_row_shuffle_cost << "] ";
|
||||
}
|
||||
|
||||
void AdaptivePartitionHashJoinBuilder::_init_partition_nums(const HashTableParam& param) {
|
||||
_partition_num = 16;
|
||||
|
||||
size_t estimated_bytes_each_row = _estimated_row_size(param);
|
||||
_probe_row_shuffle_cost =
|
||||
std::max<size_t>(_estimate_cost_by_bytes<CacheLevel::L3>(_estimate_probe_row_bytes(param)), 1);
|
||||
|
||||
_probe_estimated_costs = _estimated_probe_cost(param);
|
||||
const size_t hash_table_probing_bytes_per_row = _estimate_hash_table_probing_bytes_per_row(param);
|
||||
_adjust_partition_rows(1, hash_table_probing_bytes_per_row);
|
||||
|
||||
_adjust_partition_rows(estimated_bytes_each_row);
|
||||
|
||||
COUNTER_SET(_hash_joiner.build_metrics().partition_nums, (int64_t)_partition_num);
|
||||
COUNTER_SET(_hash_joiner.build_metrics().partition_nums, static_cast<int64_t>(_partition_num));
|
||||
}
|
||||
|
||||
void AdaptivePartitionHashJoinBuilder::create(const HashTableParam& param) {
|
||||
_init_partition_nums(param);
|
||||
|
||||
if (_partition_num > 1) {
|
||||
_partition_input_channels.resize(_partition_num, PartitionChunkChannel(&_mem_tracker));
|
||||
}
|
||||
for (size_t i = 0; i < _partition_num; ++i) {
|
||||
_builders.emplace_back(std::make_unique<SingleHashJoinBuilder>(_hash_joiner));
|
||||
_builders.back()->create(param);
|
||||
|
|
@ -579,10 +693,14 @@ void AdaptivePartitionHashJoinBuilder::close() {
|
|||
builder->close();
|
||||
}
|
||||
_builders.clear();
|
||||
_partition_input_channels.clear();
|
||||
_partition_num = 0;
|
||||
_partition_join_min_rows = 0;
|
||||
_partition_join_max_rows = 0;
|
||||
_probe_estimated_costs = 0;
|
||||
_partition_join_l2_min_rows = 0;
|
||||
_partition_join_l2_max_rows = 0;
|
||||
_partition_join_l3_min_rows = 0;
|
||||
_partition_join_l3_max_rows = 0;
|
||||
_probe_row_shuffle_cost = 0;
|
||||
_hash_table_probing_bytes_per_row = 0;
|
||||
_fit_L2_cache_max_rows = 0;
|
||||
_fit_L3_cache_max_rows = 0;
|
||||
_pushed_chunks = 0;
|
||||
|
|
@ -637,17 +755,70 @@ int64_t AdaptivePartitionHashJoinBuilder::ht_mem_usage() const {
|
|||
[](int64_t sum, const auto& builder) { return sum + builder->ht_mem_usage(); });
|
||||
}
|
||||
|
||||
Status AdaptivePartitionHashJoinBuilder::_convert_to_single_partition() {
|
||||
Status AdaptivePartitionHashJoinBuilder::_convert_to_single_partition(RuntimeState* state) {
|
||||
VLOG_OPERATOR << "TRACE: convert_to_single_partition "
|
||||
<< "[partition_num=" << _partition_num << "] "
|
||||
<< "[partition_join_l2_min_rows=" << _partition_join_l2_min_rows << "] "
|
||||
<< "[partition_join_l2_max_rows=" << _partition_join_l2_max_rows << "] "
|
||||
<< "[partition_join_l3_min_rows=" << _partition_join_l3_min_rows << "] "
|
||||
<< "[partition_join_l3_max_rows=" << _partition_join_l3_max_rows << "] "
|
||||
<< "[hash_table_row_count=" << hash_table_row_count() << "] ";
|
||||
|
||||
// merge all partition data to the first partition
|
||||
for (size_t i = 1; i < _builders.size(); ++i) {
|
||||
_builders[0]->hash_table().merge_ht(_builders[i]->hash_table());
|
||||
if (_stage == Stage::BUFFERING) {
|
||||
_mem_tracker.set(0);
|
||||
for (const auto& unpartition_chunk : _unpartition_chunks) {
|
||||
RETURN_IF_ERROR(_builders[0]->do_append_chunk(state, unpartition_chunk));
|
||||
}
|
||||
_unpartition_chunks.clear();
|
||||
} else {
|
||||
for (size_t i = 0; i < _builders.size(); ++i) {
|
||||
if (i != 0) {
|
||||
_builders[0]->hash_table().merge_ht(_builders[i]->hash_table());
|
||||
}
|
||||
auto& channel = _partition_input_channels[i];
|
||||
while (!channel.is_empty()) {
|
||||
RETURN_IF_ERROR(_builders[0]->do_append_chunk(state, channel.pull()));
|
||||
}
|
||||
}
|
||||
_partition_input_channels.clear();
|
||||
}
|
||||
_builders.resize(1);
|
||||
|
||||
_partition_num = 1;
|
||||
COUNTER_SET(_hash_joiner.build_metrics().partition_nums, static_cast<int64_t>(1));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status AdaptivePartitionHashJoinBuilder::_append_chunk_to_partitions(const ChunkPtr& chunk) {
|
||||
Status AdaptivePartitionHashJoinBuilder::_transfer_to_appending_stage(RuntimeState* state) {
|
||||
_stage = Stage::APPENDING;
|
||||
_mem_tracker.set(0); // All the buffered chunks are moved to the partition builders, so clear the memory tracker.
|
||||
for (const auto& unpartition_chunk : _unpartition_chunks) {
|
||||
RETURN_IF_ERROR(_append_chunk_to_partitions(state, unpartition_chunk));
|
||||
}
|
||||
_unpartition_chunks.clear();
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status AdaptivePartitionHashJoinBuilder::_do_append_chunk(RuntimeState* state, const ChunkPtr& chunk) {
|
||||
if (_stage == Stage::BUFFERING) {
|
||||
_mem_tracker.consume(chunk->memory_usage());
|
||||
_unpartition_chunks.push_back(chunk);
|
||||
|
||||
const size_t num_rows = hash_table_row_count();
|
||||
if (num_rows >= _partition_join_l2_min_rows || num_rows >= _partition_join_l3_min_rows) {
|
||||
RETURN_IF_ERROR(_transfer_to_appending_stage(state));
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
} else {
|
||||
return _append_chunk_to_partitions(state, chunk);
|
||||
}
|
||||
}
|
||||
|
||||
Status AdaptivePartitionHashJoinBuilder::_append_chunk_to_partitions(RuntimeState* state, const ChunkPtr& chunk) {
|
||||
const std::vector<ExprContext*>& build_partition_keys = _hash_joiner.build_expr_ctxs();
|
||||
|
||||
size_t num_rows = chunk->num_rows();
|
||||
|
|
@ -660,10 +831,10 @@ Status AdaptivePartitionHashJoinBuilder::_append_chunk_to_partitions(const Chunk
|
|||
}
|
||||
std::vector<uint32_t> hash_values;
|
||||
{
|
||||
hash_values.assign(num_rows, HashUtil::FNV_SEED);
|
||||
hash_values.assign(num_rows, 0);
|
||||
|
||||
for (const ColumnPtr& column : partition_columns) {
|
||||
column->fnv_hash(hash_values.data(), 0, num_rows);
|
||||
column->crc32_hash(hash_values.data(), 0, num_rows);
|
||||
}
|
||||
// find partition id
|
||||
for (size_t i = 0; i < hash_values.size(); ++i) {
|
||||
|
|
@ -698,45 +869,83 @@ Status AdaptivePartitionHashJoinBuilder::_append_chunk_to_partitions(const Chunk
|
|||
if (size == 0) {
|
||||
continue;
|
||||
}
|
||||
// TODO: make builder implements append with selective
|
||||
auto partition_chunk = chunk->clone_empty();
|
||||
partition_chunk->append_selective(*chunk, selection.data(), from, size);
|
||||
RETURN_IF_ERROR(_builders[i]->append_chunk(std::move(partition_chunk)));
|
||||
|
||||
auto& channel = _partition_input_channels[i];
|
||||
|
||||
if (channel.is_empty()) {
|
||||
channel.push(chunk->clone_empty());
|
||||
}
|
||||
|
||||
if (channel.back()->num_rows() + size <= state->chunk_size()) {
|
||||
channel.append_selective_to_back(*chunk, selection.data(), from, size);
|
||||
} else {
|
||||
channel.push(chunk->clone_empty());
|
||||
channel.append_selective_to_back(*chunk, selection.data(), from, size);
|
||||
}
|
||||
|
||||
while (channel.is_full()) {
|
||||
RETURN_IF_ERROR(_builders[i]->append_chunk(state, channel.pull()));
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status AdaptivePartitionHashJoinBuilder::do_append_chunk(const ChunkPtr& chunk) {
|
||||
if (_partition_num > 1 && hash_table_row_count() > _partition_join_max_rows) {
|
||||
RETURN_IF_ERROR(_convert_to_single_partition());
|
||||
Status AdaptivePartitionHashJoinBuilder::do_append_chunk(RuntimeState* state, const ChunkPtr& chunk) {
|
||||
if (_partition_num > 1 && !_need_partition_join_for_append(hash_table_row_count())) {
|
||||
RETURN_IF_ERROR(_convert_to_single_partition(state));
|
||||
}
|
||||
|
||||
if (_partition_num > 1 && ++_pushed_chunks % 8 == 0) {
|
||||
size_t build_row_size = ht_mem_usage() / hash_table_row_count();
|
||||
_adjust_partition_rows(build_row_size);
|
||||
const size_t build_row_size = (ht_mem_usage() + _mem_tracker.consumption()) / hash_table_row_count();
|
||||
_adjust_partition_rows(build_row_size, _hash_table_probing_bytes_per_row);
|
||||
if (_partition_num == 1) {
|
||||
RETURN_IF_ERROR(_convert_to_single_partition());
|
||||
RETURN_IF_ERROR(_convert_to_single_partition(state));
|
||||
}
|
||||
}
|
||||
|
||||
if (_partition_num > 1) {
|
||||
RETURN_IF_ERROR(_append_chunk_to_partitions(chunk));
|
||||
RETURN_IF_ERROR(_do_append_chunk(state, chunk));
|
||||
} else {
|
||||
RETURN_IF_ERROR(_builders[0]->do_append_chunk(chunk));
|
||||
RETURN_IF_ERROR(_builders[0]->do_append_chunk(state, chunk));
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status AdaptivePartitionHashJoinBuilder::prepare_for_spill_start(RuntimeState* state) {
|
||||
if (_partition_num > 1) {
|
||||
return _flush_buffer_chunks(state);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
ChunkPtr AdaptivePartitionHashJoinBuilder::convert_to_spill_schema(const ChunkPtr& chunk) const {
|
||||
return _builders[0]->convert_to_spill_schema(chunk);
|
||||
}
|
||||
|
||||
Status AdaptivePartitionHashJoinBuilder::_flush_buffer_chunks(RuntimeState* state) {
|
||||
if (_stage == Stage::BUFFERING) {
|
||||
RETURN_IF_ERROR(_transfer_to_appending_stage(state));
|
||||
}
|
||||
for (size_t i = 0; i < _partition_input_channels.size(); ++i) {
|
||||
auto& channel = _partition_input_channels[i];
|
||||
while (!channel.is_empty()) {
|
||||
RETURN_IF_ERROR(_builders[i]->do_append_chunk(state, channel.pull()));
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status AdaptivePartitionHashJoinBuilder::build(RuntimeState* state) {
|
||||
DCHECK_EQ(_partition_num, _builders.size());
|
||||
|
||||
if (_partition_num > 1 && hash_table_row_count() < _partition_join_min_rows) {
|
||||
RETURN_IF_ERROR(_convert_to_single_partition());
|
||||
if (_partition_num > 1) {
|
||||
if (!_need_partition_join_for_build(hash_table_row_count())) {
|
||||
RETURN_IF_ERROR(_convert_to_single_partition(state));
|
||||
} else {
|
||||
RETURN_IF_ERROR(_flush_buffer_chunks(state));
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& builder : _builders) {
|
||||
|
|
@ -769,17 +978,20 @@ std::unique_ptr<HashJoinProberImpl> AdaptivePartitionHashJoinBuilder::create_pro
|
|||
}
|
||||
}
|
||||
|
||||
void AdaptivePartitionHashJoinBuilder::clone_readable(HashJoinBuilder* builder) {
|
||||
void AdaptivePartitionHashJoinBuilder::clone_readable(HashJoinBuilder* other_builder) {
|
||||
for (auto& builder : _builders) {
|
||||
DCHECK(builder->ready());
|
||||
}
|
||||
DCHECK(_ready);
|
||||
DCHECK_EQ(_partition_num, _builders.size());
|
||||
auto other = down_cast<AdaptivePartitionHashJoinBuilder*>(builder);
|
||||
auto other = down_cast<AdaptivePartitionHashJoinBuilder*>(other_builder);
|
||||
other->_builders.clear();
|
||||
other->_partition_num = _partition_num;
|
||||
other->_partition_join_max_rows = _partition_join_max_rows;
|
||||
other->_partition_join_min_rows = _partition_join_min_rows;
|
||||
other->_partition_join_l2_min_rows = _partition_join_l2_min_rows;
|
||||
other->_partition_join_l2_max_rows = _partition_join_l2_max_rows;
|
||||
other->_partition_join_l3_min_rows = _partition_join_l3_min_rows;
|
||||
other->_partition_join_l3_max_rows = _partition_join_l3_max_rows;
|
||||
other->_partition_join_l3_max_rows = _partition_join_l3_max_rows;
|
||||
other->_ready = _ready;
|
||||
for (size_t i = 0; i < _partition_num; ++i) {
|
||||
other->_builders.emplace_back(std::make_unique<SingleHashJoinBuilder>(_hash_joiner));
|
||||
|
|
|
|||
|
|
@ -92,11 +92,11 @@ public:
|
|||
virtual void create(const HashTableParam& param) = 0;
|
||||
|
||||
// append chunk to hash table
|
||||
Status append_chunk(const ChunkPtr& chunk) {
|
||||
Status append_chunk(RuntimeState* state, const ChunkPtr& chunk) {
|
||||
_inc_row_count(chunk->num_rows());
|
||||
return do_append_chunk(chunk);
|
||||
return do_append_chunk(state, chunk);
|
||||
}
|
||||
virtual Status do_append_chunk(const ChunkPtr& chunk) = 0;
|
||||
virtual Status do_append_chunk(RuntimeState* state, const ChunkPtr& chunk) = 0;
|
||||
|
||||
virtual Status build(RuntimeState* state) = 0;
|
||||
|
||||
|
|
@ -125,6 +125,7 @@ public:
|
|||
// clone readable to to builder
|
||||
virtual void clone_readable(HashJoinBuilder* builder) = 0;
|
||||
|
||||
virtual Status prepare_for_spill_start(RuntimeState* state) { return Status::OK(); }
|
||||
virtual ChunkPtr convert_to_spill_schema(const ChunkPtr& chunk) const = 0;
|
||||
|
||||
protected:
|
||||
|
|
@ -149,7 +150,7 @@ public:
|
|||
|
||||
void reset(const HashTableParam& param) override;
|
||||
|
||||
Status do_append_chunk(const ChunkPtr& chunk) override;
|
||||
Status do_append_chunk(RuntimeState* state, const ChunkPtr& chunk) override;
|
||||
|
||||
Status build(RuntimeState* state) override;
|
||||
|
||||
|
|
|
|||
|
|
@ -483,8 +483,8 @@ pipeline::OpFactories HashJoinNode::_decompose_to_pipeline(pipeline::PipelineBui
|
|||
HashJoinerParam param(pool, _hash_join_node, _is_null_safes, _build_expr_ctxs, _probe_expr_ctxs,
|
||||
_other_join_conjunct_ctxs, _conjunct_ctxs, child(1)->row_desc(), child(0)->row_desc(),
|
||||
child(1)->type(), child(0)->type(), child(1)->conjunct_ctxs().empty(), _build_runtime_filters,
|
||||
_output_slots, _output_slots, _distribution_mode, _enable_late_materialization,
|
||||
_enable_partition_hash_join, _is_skew_join);
|
||||
_output_slots, _output_slots, context->degree_of_parallelism(), _distribution_mode,
|
||||
_enable_late_materialization, _enable_partition_hash_join, _is_skew_join);
|
||||
auto hash_joiner_factory = std::make_shared<starrocks::pipeline::HashJoinerFactory>(param);
|
||||
|
||||
// Create a shared RefCountedRuntimeFilterCollector
|
||||
|
|
|
|||
|
|
@ -82,6 +82,7 @@ HashJoiner::HashJoiner(const HashJoinerParam& param)
|
|||
_probe_output_slots(param._probe_output_slots),
|
||||
_build_runtime_filters(param._build_runtime_filters.begin(), param._build_runtime_filters.end()),
|
||||
_enable_late_materialization(param._enable_late_materialization),
|
||||
_max_dop(param._max_dop),
|
||||
_is_skew_join(param._is_skew_join) {
|
||||
_is_push_down = param._hash_join_node.is_push_down;
|
||||
if (_join_type == TJoinOp::LEFT_ANTI_JOIN && param._hash_join_node.is_rewritten_from_not_in) {
|
||||
|
|
@ -178,7 +179,7 @@ void HashJoiner::_init_hash_table_param(HashTableParam* param, RuntimeState* sta
|
|||
}
|
||||
}
|
||||
}
|
||||
Status HashJoiner::append_chunk_to_ht(const ChunkPtr& chunk) {
|
||||
Status HashJoiner::append_chunk_to_ht(RuntimeState* state, const ChunkPtr& chunk) {
|
||||
if (_phase != HashJoinPhase::BUILD) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
@ -187,7 +188,7 @@ Status HashJoiner::append_chunk_to_ht(const ChunkPtr& chunk) {
|
|||
}
|
||||
|
||||
update_build_rows(chunk->num_rows());
|
||||
return _hash_join_builder->append_chunk(chunk);
|
||||
return _hash_join_builder->append_chunk(state, chunk);
|
||||
}
|
||||
|
||||
Status HashJoiner::append_chunk_to_spill_buffer(RuntimeState* state, const ChunkPtr& chunk) {
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ struct HashJoinerParam {
|
|||
const RowDescriptor& build_row_descriptor, const RowDescriptor& probe_row_descriptor,
|
||||
TPlanNodeType::type build_node_type, TPlanNodeType::type probe_node_type,
|
||||
bool build_conjunct_ctxs_is_empty, std::list<RuntimeFilterBuildDescriptor*> build_runtime_filters,
|
||||
std::set<SlotId> build_output_slots, std::set<SlotId> probe_output_slots,
|
||||
std::set<SlotId> build_output_slots, std::set<SlotId> probe_output_slots, size_t max_dop,
|
||||
const TJoinDistributionMode::type distribution_mode, bool enable_late_materialization,
|
||||
bool enable_partition_hash_join, bool is_skew_join)
|
||||
: _pool(pool),
|
||||
|
|
@ -88,6 +88,7 @@ struct HashJoinerParam {
|
|||
_build_runtime_filters(std::move(build_runtime_filters)),
|
||||
_build_output_slots(std::move(build_output_slots)),
|
||||
_probe_output_slots(std::move(probe_output_slots)),
|
||||
_max_dop(max_dop),
|
||||
_distribution_mode(distribution_mode),
|
||||
_enable_late_materialization(enable_late_materialization),
|
||||
_enable_partition_hash_join(enable_partition_hash_join),
|
||||
|
|
@ -113,6 +114,8 @@ struct HashJoinerParam {
|
|||
std::set<SlotId> _build_output_slots;
|
||||
std::set<SlotId> _probe_output_slots;
|
||||
|
||||
size_t _max_dop;
|
||||
|
||||
const TJoinDistributionMode::type _distribution_mode;
|
||||
const bool _enable_late_materialization;
|
||||
const bool _enable_partition_hash_join;
|
||||
|
|
@ -205,7 +208,7 @@ public:
|
|||
|
||||
void enter_eos_phase() { _phase = HashJoinPhase::EOS; }
|
||||
// build phase
|
||||
Status append_chunk_to_ht(const ChunkPtr& chunk);
|
||||
Status append_chunk_to_ht(RuntimeState* state, const ChunkPtr& chunk);
|
||||
|
||||
Status append_chunk_to_spill_buffer(RuntimeState* state, const ChunkPtr& chunk);
|
||||
|
||||
|
|
@ -343,6 +346,9 @@ public:
|
|||
return DeferOp([this]() { _probe_observable.notify_source_observers(); });
|
||||
}
|
||||
|
||||
size_t max_dop() const { return _max_dop; }
|
||||
TJoinDistributionMode::type distribution_mode() const { return _hash_join_node.distribution_mode; }
|
||||
|
||||
private:
|
||||
static bool _has_null(const ColumnPtr& column);
|
||||
|
||||
|
|
@ -361,7 +367,7 @@ private:
|
|||
const_column->data_column()->assign(chunk->num_rows(), 0);
|
||||
key_columns.emplace_back(const_column->data_column());
|
||||
} else {
|
||||
key_columns.emplace_back(column_ptr);
|
||||
key_columns.emplace_back(std::move(column_ptr));
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
|
|
@ -483,6 +489,8 @@ private:
|
|||
pipeline::Observable _builder_observable;
|
||||
pipeline::Observable _probe_observable;
|
||||
|
||||
size_t _max_dop = 0;
|
||||
|
||||
bool _is_skew_join = false;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -237,8 +237,9 @@ Status HdfsScanner::get_next(RuntimeState* runtime_state, ChunkPtr* chunk) {
|
|||
// short circuit for min/max optimization.
|
||||
if (_scanner_ctx.can_use_min_max_optimization()) {
|
||||
// 3 means we output 3 values: min, max, and null
|
||||
_scanner_ctx.append_or_update_min_max_column_to_chunk(chunk, 3);
|
||||
size_t row_count = (*chunk)->num_rows();
|
||||
const size_t row_count = 3;
|
||||
(*chunk)->set_num_rows(row_count);
|
||||
_scanner_ctx.append_or_update_min_max_column_to_chunk(chunk, row_count);
|
||||
_scanner_ctx.append_or_update_partition_column_to_chunk(chunk, row_count);
|
||||
_scanner_ctx.append_or_update_extended_column_to_chunk(chunk, row_count);
|
||||
_scanner_ctx.no_more_chunks = true;
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
#include "simd/simd.h"
|
||||
#include "types/logical_type_infra.h"
|
||||
#include "util/runtime_profile.h"
|
||||
#include "util/stack_util.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
|
|
@ -47,6 +48,10 @@ private:
|
|||
template <LogicalType LT>
|
||||
static std::pair<bool, JoinHashMapMethodUnaryType> _try_use_range_direct_mapping(RuntimeState* state,
|
||||
JoinHashTableItems* table_items);
|
||||
// @return: <can_use, JoinHashMapMethodUnaryType>, where `JoinHashMapMethodUnaryType` is effective only when `can_use` is true.
|
||||
template <LogicalType LT>
|
||||
static std::pair<bool, JoinHashMapMethodUnaryType> _try_use_linear_chained(RuntimeState* state,
|
||||
JoinHashTableItems* table_items);
|
||||
};
|
||||
|
||||
std::tuple<JoinKeyConstructorUnaryType, JoinHashMapMethodUnaryType>
|
||||
|
|
@ -152,6 +157,10 @@ JoinHashMapMethodUnaryType JoinHashMapSelector::_determine_hash_map_method(
|
|||
}
|
||||
}
|
||||
|
||||
if (const auto [can_use, hash_map_type] = _try_use_linear_chained<LT>(state, table_items); can_use) {
|
||||
return hash_map_type;
|
||||
}
|
||||
|
||||
return JoinHashMapMethodTypeTraits<JoinHashMapMethodType::BUCKET_CHAINED, LT>::unary_type;
|
||||
}
|
||||
});
|
||||
|
|
@ -220,6 +229,28 @@ std::pair<bool, JoinHashMapMethodUnaryType> JoinHashMapSelector::_try_use_range_
|
|||
return {false, JoinHashMapMethodUnaryType::BUCKET_CHAINED_INT};
|
||||
}
|
||||
|
||||
template <LogicalType LT>
|
||||
std::pair<bool, JoinHashMapMethodUnaryType> JoinHashMapSelector::_try_use_linear_chained(
|
||||
RuntimeState* state, JoinHashTableItems* table_items) {
|
||||
if (!state->enable_hash_join_linear_chained_opt()) {
|
||||
return {false, JoinHashMapMethodTypeTraits<JoinHashMapMethodType::BUCKET_CHAINED, LT>::unary_type};
|
||||
}
|
||||
|
||||
const uint64_t bucket_size = JoinHashMapHelper::calc_bucket_size(table_items->row_count + 1);
|
||||
if (bucket_size > LinearChainedJoinHashMap<LT>::max_supported_bucket_size()) {
|
||||
return {false, JoinHashMapMethodTypeTraits<JoinHashMapMethodType::BUCKET_CHAINED, LT>::unary_type};
|
||||
}
|
||||
|
||||
const bool is_left_anti_join_without_other_conjunct =
|
||||
(table_items->join_type == TJoinOp::LEFT_ANTI_JOIN || table_items->join_type == TJoinOp::LEFT_SEMI_JOIN) &&
|
||||
!table_items->with_other_conjunct;
|
||||
if (is_left_anti_join_without_other_conjunct) {
|
||||
return {true, JoinHashMapMethodTypeTraits<JoinHashMapMethodType::LINEAR_CHAINED_SET, LT>::unary_type};
|
||||
} else {
|
||||
return {true, JoinHashMapMethodTypeTraits<JoinHashMapMethodType::LINEAR_CHAINED, LT>::unary_type};
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------
|
||||
// JoinHashMap
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
|
@ -483,6 +514,15 @@ void JoinHashTable::_init_join_keys() {
|
|||
}
|
||||
|
||||
int64_t JoinHashTable::mem_usage() const {
|
||||
// Theoretically, `_table_items` may be a nullptr after a cancel, even though in practice we haven’t observed any
|
||||
// cases where `_table_items` was unexpectedly cleared or left uninitialized.
|
||||
// To prevent potential null pointer exceptions, we add a defensive check here.
|
||||
if (_table_items == nullptr) {
|
||||
LOG(WARNING) << "table_items is nullptr in mem_usage, stack:" << get_stack_trace();
|
||||
DCHECK(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t usage = 0;
|
||||
if (_table_items->build_chunk != nullptr) {
|
||||
usage += _table_items->build_chunk->memory_usage();
|
||||
|
|
@ -617,6 +657,21 @@ void JoinHashTable::merge_ht(const JoinHashTable& ht) {
|
|||
}
|
||||
columns[i]->append(*other_columns[i], 1, other_columns[i]->size() - 1);
|
||||
}
|
||||
|
||||
auto& key_columns = _table_items->key_columns;
|
||||
auto& other_key_columns = ht._table_items->key_columns;
|
||||
for (size_t i = 0; i < key_columns.size(); i++) {
|
||||
// If the join key is slot ref, will get from build chunk directly,
|
||||
// otherwise will append from key_column of input
|
||||
if (_table_items->join_keys[i].col_ref == nullptr) {
|
||||
// upgrade to nullable column
|
||||
if (!key_columns[i]->is_nullable() && other_key_columns[i]->is_nullable()) {
|
||||
const size_t row_count = key_columns[i]->size();
|
||||
key_columns[i] = NullableColumn::create(key_columns[i], NullColumn::create(row_count, 0));
|
||||
}
|
||||
key_columns[i]->append(*other_key_columns[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ChunkPtr JoinHashTable::convert_to_spill_schema(const ChunkPtr& chunk) const {
|
||||
|
|
|
|||
|
|
@ -327,26 +327,6 @@ private:
|
|||
HashTableProbeState* _probe_state = nullptr;
|
||||
};
|
||||
|
||||
#define JoinHashMapForOneKey(LT) JoinHashMap<LT, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::BUCKET_CHAINED>
|
||||
#define JoinHashMapForDirectMapping(LT) \
|
||||
JoinHashMap<LT, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::DIRECT_MAPPING>
|
||||
#define JoinHashMapForFixedSizeKey(LT) \
|
||||
JoinHashMap<LT, JoinKeyConstructorType::SERIALIZED_FIXED_SIZE, JoinHashMapMethodType::BUCKET_CHAINED>
|
||||
#define JoinHashMapForSerializedKey(LT) \
|
||||
JoinHashMap<LT, JoinKeyConstructorType::SERIALIZED, JoinHashMapMethodType::BUCKET_CHAINED>
|
||||
#define JoinHashMapForOneKeyRangeDirectMapping(LT) \
|
||||
JoinHashMap<LT, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::RANGE_DIRECT_MAPPING>
|
||||
#define JoinHashSetForOneKeyRangeDirectMapping(LT) \
|
||||
JoinHashMap<LT, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::RANGE_DIRECT_MAPPING_SET>
|
||||
#define JoinHashMapForOneKeyDenseRangeDirectMapping(LT) \
|
||||
JoinHashMap<LT, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::DENSE_RANGE_DIRECT_MAPPING>
|
||||
#define JoinHashMapForFixedSizeKeyRangeDirectMapping(LT) \
|
||||
JoinHashMap<LT, JoinKeyConstructorType::SERIALIZED_FIXED_SIZE, JoinHashMapMethodType::RANGE_DIRECT_MAPPING>
|
||||
#define JoinHashSetForFixedSizeKeyRangeDirectMapping(LT) \
|
||||
JoinHashMap<LT, JoinKeyConstructorType::SERIALIZED_FIXED_SIZE, JoinHashMapMethodType::RANGE_DIRECT_MAPPING_SET>
|
||||
#define JoinHashMapForFixedSizeKeyDenseRangeDirectMapping(LT) \
|
||||
JoinHashMap<LT, JoinKeyConstructorType::SERIALIZED_FIXED_SIZE, JoinHashMapMethodType::DENSE_RANGE_DIRECT_MAPPING>
|
||||
|
||||
// ------------------------------------------------------------------------------------
|
||||
// JoinHashTable
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
|
@ -420,42 +400,55 @@ private:
|
|||
void _remove_duplicate_index_for_right_anti_join(Filter* filter);
|
||||
void _remove_duplicate_index_for_full_outer_join(Filter* filter);
|
||||
|
||||
using JoinHashMapVariant =
|
||||
std::variant<std::unique_ptr<JoinHashMapForEmpty>, //
|
||||
std::unique_ptr<JoinHashMapForDirectMapping(TYPE_BOOLEAN)>,
|
||||
std::unique_ptr<JoinHashMapForDirectMapping(TYPE_TINYINT)>,
|
||||
std::unique_ptr<JoinHashMapForDirectMapping(TYPE_SMALLINT)>,
|
||||
#define JoinHashMapForIntBigintKey(MT) \
|
||||
std::unique_ptr<JoinHashMap<TYPE_INT, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr<JoinHashMap<TYPE_BIGINT, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr< \
|
||||
JoinHashMap<TYPE_INT, JoinKeyConstructorType::SERIALIZED_FIXED_SIZE, JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr<JoinHashMap<TYPE_BIGINT, JoinKeyConstructorType::SERIALIZED_FIXED_SIZE, \
|
||||
JoinHashMapMethodType::MT>>
|
||||
|
||||
std::unique_ptr<JoinHashMapForOneKey(TYPE_INT)>, //
|
||||
std::unique_ptr<JoinHashMapForOneKey(TYPE_BIGINT)>,
|
||||
std::unique_ptr<JoinHashMapForOneKey(TYPE_LARGEINT)>, //
|
||||
std::unique_ptr<JoinHashMapForOneKey(TYPE_FLOAT)>,
|
||||
std::unique_ptr<JoinHashMapForOneKey(TYPE_DOUBLE)>, //
|
||||
std::unique_ptr<JoinHashMapForOneKey(TYPE_VARCHAR)>,
|
||||
std::unique_ptr<JoinHashMapForOneKey(TYPE_DATE)>, //
|
||||
std::unique_ptr<JoinHashMapForOneKey(TYPE_DATETIME)>,
|
||||
std::unique_ptr<JoinHashMapForOneKey(TYPE_DECIMALV2)>,
|
||||
std::unique_ptr<JoinHashMapForOneKey(TYPE_DECIMAL32)>,
|
||||
std::unique_ptr<JoinHashMapForOneKey(TYPE_DECIMAL64)>,
|
||||
std::unique_ptr<JoinHashMapForOneKey(TYPE_DECIMAL128)>,
|
||||
#define JoinHashMapForSmallKey(MT) \
|
||||
std::unique_ptr<JoinHashMap<TYPE_BOOLEAN, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr<JoinHashMap<TYPE_TINYINT, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr<JoinHashMap<TYPE_SMALLINT, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::MT>>
|
||||
|
||||
std::unique_ptr<JoinHashMapForSerializedKey(TYPE_VARCHAR)>,
|
||||
std::unique_ptr<JoinHashMapForFixedSizeKey(TYPE_INT)>,
|
||||
std::unique_ptr<JoinHashMapForFixedSizeKey(TYPE_BIGINT)>,
|
||||
std::unique_ptr<JoinHashMapForFixedSizeKey(TYPE_LARGEINT)>,
|
||||
#define JoinHashMapForNonSmallKey(MT) \
|
||||
std::unique_ptr<JoinHashMap<TYPE_INT, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr<JoinHashMap<TYPE_BIGINT, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr<JoinHashMap<TYPE_LARGEINT, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr<JoinHashMap<TYPE_FLOAT, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr<JoinHashMap<TYPE_DOUBLE, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr<JoinHashMap<TYPE_DATE, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr<JoinHashMap<TYPE_DATETIME, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr<JoinHashMap<TYPE_DECIMALV2, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr<JoinHashMap<TYPE_DECIMAL32, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr<JoinHashMap<TYPE_DECIMAL64, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr<JoinHashMap<TYPE_DECIMAL128, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr<JoinHashMap<TYPE_VARCHAR, JoinKeyConstructorType::ONE_KEY, JoinHashMapMethodType::MT>>, \
|
||||
\
|
||||
std::unique_ptr< \
|
||||
JoinHashMap<TYPE_INT, JoinKeyConstructorType::SERIALIZED_FIXED_SIZE, JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr<JoinHashMap<TYPE_BIGINT, JoinKeyConstructorType::SERIALIZED_FIXED_SIZE, \
|
||||
JoinHashMapMethodType::MT>>, \
|
||||
std::unique_ptr<JoinHashMap<TYPE_LARGEINT, JoinKeyConstructorType::SERIALIZED_FIXED_SIZE, \
|
||||
JoinHashMapMethodType::MT>>, \
|
||||
\
|
||||
std::unique_ptr<JoinHashMap<TYPE_VARCHAR, JoinKeyConstructorType::SERIALIZED, JoinHashMapMethodType::MT>>
|
||||
|
||||
std::unique_ptr<JoinHashMapForOneKeyRangeDirectMapping(TYPE_INT)>,
|
||||
std::unique_ptr<JoinHashMapForOneKeyRangeDirectMapping(TYPE_BIGINT)>,
|
||||
std::unique_ptr<JoinHashSetForOneKeyRangeDirectMapping(TYPE_INT)>,
|
||||
std::unique_ptr<JoinHashSetForOneKeyRangeDirectMapping(TYPE_BIGINT)>,
|
||||
std::unique_ptr<JoinHashMapForOneKeyDenseRangeDirectMapping(TYPE_INT)>,
|
||||
std::unique_ptr<JoinHashMapForOneKeyDenseRangeDirectMapping(TYPE_BIGINT)>,
|
||||
std::unique_ptr<JoinHashMapForFixedSizeKeyRangeDirectMapping(TYPE_INT)>,
|
||||
std::unique_ptr<JoinHashMapForFixedSizeKeyRangeDirectMapping(TYPE_BIGINT)>,
|
||||
std::unique_ptr<JoinHashSetForFixedSizeKeyRangeDirectMapping(TYPE_INT)>,
|
||||
std::unique_ptr<JoinHashSetForFixedSizeKeyRangeDirectMapping(TYPE_BIGINT)>,
|
||||
std::unique_ptr<JoinHashMapForFixedSizeKeyDenseRangeDirectMapping(TYPE_INT)>,
|
||||
std::unique_ptr<JoinHashMapForFixedSizeKeyDenseRangeDirectMapping(TYPE_BIGINT)>>;
|
||||
using JoinHashMapVariant = std::variant<std::unique_ptr<JoinHashMapForEmpty>,
|
||||
JoinHashMapForSmallKey(DIRECT_MAPPING), //
|
||||
JoinHashMapForNonSmallKey(BUCKET_CHAINED), //
|
||||
JoinHashMapForNonSmallKey(LINEAR_CHAINED), //
|
||||
JoinHashMapForNonSmallKey(LINEAR_CHAINED_SET), //
|
||||
JoinHashMapForIntBigintKey(RANGE_DIRECT_MAPPING), //
|
||||
JoinHashMapForIntBigintKey(RANGE_DIRECT_MAPPING_SET), //
|
||||
JoinHashMapForIntBigintKey(DENSE_RANGE_DIRECT_MAPPING) //
|
||||
>;
|
||||
|
||||
#undef JoinHashMapForNonSmallKey
|
||||
#undef JoinHashMapForSmallKey
|
||||
#undef JoinHashMapForIntBigintKey
|
||||
|
||||
bool _is_empty_map = true;
|
||||
JoinKeyConstructorUnaryType _key_constructor_type;
|
||||
|
|
|
|||
|
|
@ -400,7 +400,7 @@ void JoinHashMap<LT, CT, MT>::_search_ht(RuntimeState* state, ChunkPtr* probe_ch
|
|||
|
||||
auto& build_data = BuildKeyConstructor().get_key_data(*_table_items);
|
||||
auto& probe_data = ProbeKeyConstructor().get_key_data(*_probe_state);
|
||||
HashMapMethod().lookup_init(*_table_items, _probe_state, probe_data, _probe_state->null_array);
|
||||
HashMapMethod().lookup_init(*_table_items, _probe_state, build_data, probe_data, _probe_state->null_array);
|
||||
_probe_state->consider_probe_time_locality();
|
||||
|
||||
if (_table_items->is_collision_free_and_unique) {
|
||||
|
|
@ -629,9 +629,11 @@ void JoinHashMap<LT, CT, MT>::_search_ht_impl(RuntimeState* state, const Buffer<
|
|||
#define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
|
||||
#endif
|
||||
|
||||
#define PREFETCH_AND_COWAIT(x, y) \
|
||||
XXH_PREFETCH(x); \
|
||||
XXH_PREFETCH(y); \
|
||||
#define PREFETCH_AND_COWAIT(cur_data, next_index) \
|
||||
if constexpr (!HashMapMethod::AreKeysInChainIdentical) { \
|
||||
XXH_PREFETCH(cur_data); \
|
||||
} \
|
||||
XXH_PREFETCH(next_index); \
|
||||
co_await std::suspend_always{};
|
||||
|
||||
// When a probe row corresponds to multiple Build rows,
|
||||
|
|
@ -994,6 +996,19 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_semi_join(RuntimeState* st
|
|||
}
|
||||
}
|
||||
|
||||
if (match_count == probe_row_count) {
|
||||
_probe_state->match_flag = JoinMatchFlag::ALL_MATCH_ONE;
|
||||
} else if (match_count * 2 >= probe_row_count) {
|
||||
_probe_state->match_flag = JoinMatchFlag::MOST_MATCH_ONE;
|
||||
uint8_t* match_filter_data = _probe_state->probe_match_filter.data();
|
||||
memset(match_filter_data, 0, sizeof(uint8_t) * probe_row_count);
|
||||
for (uint32_t i = 0; i < match_count; i++) {
|
||||
match_filter_data[_probe_state->probe_index[i]] = 1;
|
||||
}
|
||||
} else {
|
||||
_probe_state->match_flag = JoinMatchFlag::NORMAL;
|
||||
}
|
||||
|
||||
PROBE_OVER()
|
||||
}
|
||||
|
||||
|
|
@ -1001,10 +1016,10 @@ template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
|
|||
template <bool first_probe, bool is_collision_free_and_unique>
|
||||
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_anti_join(RuntimeState* state, const Buffer<CppType>& build_data,
|
||||
const Buffer<CppType>& probe_data) {
|
||||
size_t match_count = 0;
|
||||
|
||||
size_t probe_row_count = _probe_state->probe_row_count;
|
||||
DCHECK_LT(0, _table_items->row_count);
|
||||
|
||||
size_t match_count = 0;
|
||||
const size_t probe_row_count = _probe_state->probe_row_count;
|
||||
if (_table_items->join_type == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN && _probe_state->null_array != nullptr) {
|
||||
// process left anti join from not in
|
||||
for (size_t i = 0; i < probe_row_count; i++) {
|
||||
|
|
@ -1022,6 +1037,19 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_anti_join(RuntimeState* st
|
|||
}
|
||||
}
|
||||
|
||||
if (match_count == probe_row_count) {
|
||||
_probe_state->match_flag = JoinMatchFlag::ALL_MATCH_ONE;
|
||||
} else if (match_count * 2 >= probe_row_count) {
|
||||
_probe_state->match_flag = JoinMatchFlag::MOST_MATCH_ONE;
|
||||
uint8_t* match_filter_data = _probe_state->probe_match_filter.data();
|
||||
memset(match_filter_data, 0, sizeof(uint8_t) * probe_row_count);
|
||||
for (uint32_t i = 0; i < match_count; i++) {
|
||||
match_filter_data[_probe_state->probe_index[i]] = 1;
|
||||
}
|
||||
} else {
|
||||
_probe_state->match_flag = JoinMatchFlag::NORMAL;
|
||||
}
|
||||
|
||||
PROBE_OVER()
|
||||
}
|
||||
|
||||
|
|
@ -1122,7 +1150,13 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_right_outer_join(RuntimeState*
|
|||
_probe_state->build_match_index[build_index] = 1;
|
||||
match_count++;
|
||||
|
||||
RETURN_IF_CHUNK_FULL()
|
||||
if constexpr (!is_collision_free_and_unique) {
|
||||
RETURN_IF_CHUNK_FULL()
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (is_collision_free_and_unique) {
|
||||
break;
|
||||
}
|
||||
build_index = _table_items->next[build_index];
|
||||
}
|
||||
|
|
@ -1190,9 +1224,15 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_right_semi_join(RuntimeState* s
|
|||
_probe_state->build_match_index[build_index] = 1;
|
||||
match_count++;
|
||||
|
||||
RETURN_IF_CHUNK_FULL()
|
||||
if constexpr (!is_collision_free_and_unique) {
|
||||
RETURN_IF_CHUNK_FULL()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (is_collision_free_and_unique) {
|
||||
break;
|
||||
}
|
||||
build_index = _table_items->next[build_index];
|
||||
}
|
||||
}
|
||||
|
|
@ -1247,6 +1287,10 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_right_anti_join(RuntimeState* s
|
|||
if (HashMapMethod().equal(build_data[index], probe_data[i])) {
|
||||
_probe_state->build_match_index[index] = 1;
|
||||
}
|
||||
|
||||
if constexpr (is_collision_free_and_unique) {
|
||||
break;
|
||||
}
|
||||
index = _table_items->next[index];
|
||||
}
|
||||
}
|
||||
|
|
@ -1311,7 +1355,13 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_full_outer_join(RuntimeState* s
|
|||
_probe_state->cur_row_match_count++;
|
||||
match_count++;
|
||||
|
||||
RETURN_IF_CHUNK_FULL()
|
||||
if constexpr (!is_collision_free_and_unique) {
|
||||
RETURN_IF_CHUNK_FULL()
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (is_collision_free_and_unique) {
|
||||
break;
|
||||
}
|
||||
build_index = _table_items->next[build_index];
|
||||
}
|
||||
|
|
@ -1399,8 +1449,15 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_semi_join_with_other_conju
|
|||
_probe_state->build_index[match_count] = build_index;
|
||||
match_count++;
|
||||
|
||||
RETURN_IF_CHUNK_FULL()
|
||||
if constexpr (!is_collision_free_and_unique) {
|
||||
RETURN_IF_CHUNK_FULL()
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (is_collision_free_and_unique) {
|
||||
break;
|
||||
}
|
||||
|
||||
build_index = _table_items->next[build_index];
|
||||
}
|
||||
}
|
||||
|
|
@ -1463,8 +1520,15 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_null_aware_anti_join_with_other
|
|||
match_count++;
|
||||
_probe_state->cur_row_match_count++;
|
||||
|
||||
RETURN_IF_CHUNK_FULL()
|
||||
if constexpr (!is_collision_free_and_unique) {
|
||||
RETURN_IF_CHUNK_FULL()
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (is_collision_free_and_unique) {
|
||||
break;
|
||||
}
|
||||
|
||||
build_index = _table_items->next[build_index];
|
||||
}
|
||||
|
||||
|
|
@ -1503,7 +1567,13 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_right_outer_right_semi_right_an
|
|||
_probe_state->build_index[match_count] = build_index;
|
||||
match_count++;
|
||||
|
||||
RETURN_IF_CHUNK_FULL()
|
||||
if constexpr (!is_collision_free_and_unique) {
|
||||
RETURN_IF_CHUNK_FULL()
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (is_collision_free_and_unique) {
|
||||
break;
|
||||
}
|
||||
build_index = _table_items->next[build_index];
|
||||
}
|
||||
|
|
@ -1552,7 +1622,13 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_outer_left_anti_full_outer
|
|||
_probe_state->cur_row_match_count++;
|
||||
match_count++;
|
||||
|
||||
RETURN_IF_CHUNK_FULL()
|
||||
if constexpr (!is_collision_free_and_unique) {
|
||||
RETURN_IF_CHUNK_FULL()
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (is_collision_free_and_unique) {
|
||||
break;
|
||||
}
|
||||
build_index = _table_items->next[build_index];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,16 +56,101 @@ public:
|
|||
using CppType = typename RunTimeTypeTraits<LT>::CppType;
|
||||
using ColumnType = typename RunTimeTypeTraits<LT>::ColumnType;
|
||||
|
||||
static constexpr bool AreKeysInChainIdentical = false;
|
||||
|
||||
static void build_prepare(RuntimeState* state, JoinHashTableItems* table_items);
|
||||
static void construct_hash_table(JoinHashTableItems* table_items, const Buffer<CppType>& keys,
|
||||
const Buffer<uint8_t>* is_nulls);
|
||||
|
||||
static void lookup_init(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,
|
||||
const Buffer<CppType>& keys, const Buffer<uint8_t>* is_nulls);
|
||||
const Buffer<CppType>& build_keys, const Buffer<CppType>& probe_keys,
|
||||
const Buffer<uint8_t>* is_nulls);
|
||||
|
||||
static bool equal(const CppType& x, const CppType& y) { return x == y; }
|
||||
};
|
||||
|
||||
// The `LinearChainedJoinHashMap` uses linear probing to store distinct keys and chained to storage for linked lists of
|
||||
// identical keys.
|
||||
// - `first` stores the build index of the header for the linked list for each distinct key.
|
||||
// - `next` maintains the linked list structure for each distinct key.
|
||||
//
|
||||
// Fingerprint
|
||||
// - Each `first` entry uses the highest 1 byte to store the fingerprint and the lower 3 bytes for the build index,
|
||||
// thus supporting up to 0xFFFFFF buckets.
|
||||
// - The fingerprint is generated via hashing.
|
||||
// During hashing, `bucket_num_with_fp = hash % (bucket_size * 8)` is computed instead of `hash % bucket_size`.
|
||||
// - The lower 8 bits of `bucket_num_with_fp` represent the fingerprint (`fp`),
|
||||
// - while `bucket_num_with_fp >> 8` yields the bucket number.
|
||||
//
|
||||
// Insert and probe
|
||||
// - During insertion, linear probing is used in `first` to locate either the first empty bucket or an existing matching key.
|
||||
// The new build index is then inserted into the corresponding linked list in `next`.
|
||||
// - During probing, linear probing is used in `first` to locate either an empty bucket or the bucket_num for a matching key.
|
||||
// - If an empty bucket is found, it indicates no matching key exists.
|
||||
// - If a matching key exists, the entire linked list (with `first[bucket_num]` as its header) in `next` stores build
|
||||
// indexes for all the same keys.
|
||||
//
|
||||
// The following diagram illustrates the structure of `LinearChainedJoinHashMap`:
|
||||
//
|
||||
// build keys first next
|
||||
// ┌──────────────┐ ┌───┐
|
||||
// │FP|build_index│ │ │◄───┐
|
||||
// │1B 3B │ │ │◄┐ │
|
||||
// ├──────────────┤ ├───┤ │ │
|
||||
// ┌───────►│ │ │ │ │ │
|
||||
// ┌────┐ │ ┌──┤ │ │ │ │ │
|
||||
// ┌──────┐ │ │ │ │ ├──────────────┤ ├───┤ │ │
|
||||
// │ key ├─►│hash├───┘ └─►│ │ │ ├─┘ │
|
||||
// └──────┘ │ │ ┌──┤ │ │ │◄─┐ │
|
||||
// └────┘ │ ├──────────────┤ ├───┤ │ │
|
||||
// │ │ │ │ │ │ │
|
||||
// │ │ │ │ │ │ │
|
||||
// │ ├──────────────┤ ├───┤ │ │
|
||||
// └─►│ ├──►│ │ │ │
|
||||
// │ │ │ ├──┘ │
|
||||
// ├──────────────┤ ├───┤ │
|
||||
// │ │ │ │ │
|
||||
// │ │ │ │ │
|
||||
// ├──────────────┤ ├───┤ │
|
||||
// │ │ │ │ │
|
||||
// │ ├──►│ ├────┘
|
||||
// └──────────────┘ └───┘
|
||||
template <LogicalType LT, bool NeedBuildChained = true>
|
||||
class LinearChainedJoinHashMap {
|
||||
public:
|
||||
using CppType = typename RunTimeTypeTraits<LT>::CppType;
|
||||
using ColumnType = typename RunTimeTypeTraits<LT>::ColumnType;
|
||||
|
||||
static constexpr bool AreKeysInChainIdentical = true;
|
||||
|
||||
static void build_prepare(RuntimeState* state, JoinHashTableItems* table_items);
|
||||
static void construct_hash_table(JoinHashTableItems* table_items, const Buffer<CppType>& keys,
|
||||
const Buffer<uint8_t>* is_nulls);
|
||||
|
||||
static void lookup_init(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,
|
||||
const Buffer<CppType>& build_keys, const Buffer<CppType>& probe_keys,
|
||||
const Buffer<uint8_t>* is_nulls);
|
||||
|
||||
static bool equal(const CppType& x, const CppType& y) { return true; }
|
||||
|
||||
static uint32_t max_supported_bucket_size() { return DATA_MASK; }
|
||||
|
||||
private:
|
||||
static constexpr uint32_t FP_BITS = 8;
|
||||
static constexpr uint32_t FP_MASK = 0xFF00'0000ul;
|
||||
static constexpr uint32_t DATA_MASK = 0x00FF'FFFFul;
|
||||
|
||||
static uint32_t _combine_data_fp(const uint32_t data, const uint32_t fp) { return fp | data; }
|
||||
static uint32_t _extract_data(const uint32_t v) { return v & DATA_MASK; }
|
||||
static uint32_t _extract_fp(const uint32_t v) { return v & FP_MASK; }
|
||||
|
||||
static uint32_t _get_bucket_num_from_hash(const uint32_t hash) { return hash >> FP_BITS; }
|
||||
static uint32_t _get_fp_from_hash(const uint32_t hash) { return hash << (32 - FP_BITS); }
|
||||
};
|
||||
|
||||
template <LogicalType LT>
|
||||
using LinearChainedJoinHashSet = LinearChainedJoinHashMap<LT, false>;
|
||||
|
||||
// The bucket-chained linked list formed by first` and `next` is the same as that of `BucketChainedJoinHashMap`.
|
||||
//
|
||||
// `DirectMappingJoinHashMap` maps to a position in `first` using `key-MIN_VALUE`.
|
||||
|
|
@ -101,12 +186,15 @@ public:
|
|||
using CppType = typename RunTimeTypeTraits<LT>::CppType;
|
||||
using ColumnType = typename RunTimeTypeTraits<LT>::ColumnType;
|
||||
|
||||
static constexpr bool AreKeysInChainIdentical = true;
|
||||
|
||||
static void build_prepare(RuntimeState* state, JoinHashTableItems* table_items);
|
||||
static void construct_hash_table(JoinHashTableItems* table_items, const Buffer<CppType>& keys,
|
||||
const Buffer<uint8_t>* is_nulls);
|
||||
|
||||
static void lookup_init(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,
|
||||
const Buffer<CppType>& keys, const Buffer<uint8_t>* is_nulls);
|
||||
const Buffer<CppType>& build_keys, const Buffer<CppType>& probe_keys,
|
||||
const Buffer<uint8_t>* is_nulls);
|
||||
|
||||
static bool equal(const CppType& x, const CppType& y) { return true; }
|
||||
};
|
||||
|
|
@ -149,12 +237,15 @@ public:
|
|||
using CppType = typename RunTimeTypeTraits<LT>::CppType;
|
||||
using ColumnType = typename RunTimeTypeTraits<LT>::ColumnType;
|
||||
|
||||
static constexpr bool AreKeysInChainIdentical = true;
|
||||
|
||||
static void build_prepare(RuntimeState* state, JoinHashTableItems* table_items);
|
||||
static void construct_hash_table(JoinHashTableItems* table_items, const Buffer<CppType>& keys,
|
||||
const Buffer<uint8_t>* is_nulls);
|
||||
|
||||
static void lookup_init(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,
|
||||
const Buffer<CppType>& keys, const Buffer<uint8_t>* is_nulls);
|
||||
const Buffer<CppType>& build_keys, const Buffer<CppType>& probe_keys,
|
||||
const Buffer<uint8_t>* is_nulls);
|
||||
|
||||
static bool equal(const CppType& x, const CppType& y) { return true; }
|
||||
};
|
||||
|
|
@ -168,12 +259,15 @@ public:
|
|||
using CppType = typename RunTimeTypeTraits<LT>::CppType;
|
||||
using ColumnType = typename RunTimeTypeTraits<LT>::ColumnType;
|
||||
|
||||
static constexpr bool AreKeysInChainIdentical = true;
|
||||
|
||||
static void build_prepare(RuntimeState* state, JoinHashTableItems* table_items);
|
||||
static void construct_hash_table(JoinHashTableItems* table_items, const Buffer<CppType>& keys,
|
||||
const Buffer<uint8_t>* is_nulls);
|
||||
|
||||
static void lookup_init(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,
|
||||
const Buffer<CppType>& keys, const Buffer<uint8_t>* is_nulls);
|
||||
const Buffer<CppType>& build_keys, const Buffer<CppType>& probe_keys,
|
||||
const Buffer<uint8_t>* is_nulls);
|
||||
|
||||
static bool equal(const CppType& x, const CppType& y) { return true; }
|
||||
};
|
||||
|
|
@ -221,12 +315,15 @@ public:
|
|||
using CppType = typename RunTimeTypeTraits<LT>::CppType;
|
||||
using ColumnType = typename RunTimeTypeTraits<LT>::ColumnType;
|
||||
|
||||
static constexpr bool AreKeysInChainIdentical = true;
|
||||
|
||||
static void build_prepare(RuntimeState* state, JoinHashTableItems* table_items);
|
||||
static void construct_hash_table(JoinHashTableItems* table_items, const Buffer<CppType>& keys,
|
||||
const Buffer<uint8_t>* is_nulls);
|
||||
|
||||
static void lookup_init(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,
|
||||
const Buffer<CppType>& keys, const Buffer<uint8_t>* is_nulls);
|
||||
const Buffer<CppType>& build_keys, const Buffer<CppType>& probe_keys,
|
||||
const Buffer<uint8_t>* is_nulls);
|
||||
|
||||
static bool equal(const CppType& x, const CppType& y) { return true; }
|
||||
};
|
||||
|
|
|
|||
|
|
@ -84,7 +84,8 @@ void BucketChainedJoinHashMap<LT>::construct_hash_table(JoinHashTableItems* tabl
|
|||
|
||||
template <LogicalType LT>
|
||||
void BucketChainedJoinHashMap<LT>::lookup_init(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,
|
||||
const Buffer<CppType>& keys, const Buffer<uint8_t>* is_nulls) {
|
||||
const Buffer<CppType>& build_keys, const Buffer<CppType>& probe_keys,
|
||||
const Buffer<uint8_t>* is_nulls) {
|
||||
const uint32_t row_count = probe_state->probe_row_count;
|
||||
const auto* firsts = table_items.first.data();
|
||||
const auto* buckets = probe_state->buckets.data();
|
||||
|
|
@ -92,8 +93,8 @@ void BucketChainedJoinHashMap<LT>::lookup_init(const JoinHashTableItems& table_i
|
|||
|
||||
if (is_nulls == nullptr) {
|
||||
for (uint32_t i = 0; i < row_count; i++) {
|
||||
probe_state->buckets[i] = JoinHashMapHelper::calc_bucket_num<CppType>(keys[i], table_items.bucket_size,
|
||||
table_items.log_bucket_size);
|
||||
probe_state->buckets[i] = JoinHashMapHelper::calc_bucket_num<CppType>(
|
||||
probe_keys[i], table_items.bucket_size, table_items.log_bucket_size);
|
||||
}
|
||||
SIMDGather::gather(nexts, firsts, buckets, row_count);
|
||||
} else {
|
||||
|
|
@ -107,14 +108,197 @@ void BucketChainedJoinHashMap<LT>::lookup_init(const JoinHashTableItems& table_i
|
|||
};
|
||||
for (uint32_t i = 0; i < row_count; i++) {
|
||||
if (need_calc_bucket_num(i)) {
|
||||
probe_state->buckets[i] = JoinHashMapHelper::calc_bucket_num<CppType>(keys[i], table_items.bucket_size,
|
||||
table_items.log_bucket_size);
|
||||
probe_state->buckets[i] = JoinHashMapHelper::calc_bucket_num<CppType>(
|
||||
probe_keys[i], table_items.bucket_size, table_items.log_bucket_size);
|
||||
}
|
||||
}
|
||||
SIMDGather::gather(nexts, firsts, buckets, is_nulls_data, row_count);
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------
|
||||
// LinearChainedJoinHashMap
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
||||
template <LogicalType LT, bool NeedBuildChained>
|
||||
void LinearChainedJoinHashMap<LT, NeedBuildChained>::build_prepare(RuntimeState* state,
|
||||
JoinHashTableItems* table_items) {
|
||||
table_items->bucket_size = JoinHashMapHelper::calc_bucket_size(table_items->row_count + 1);
|
||||
table_items->log_bucket_size = __builtin_ctz(table_items->bucket_size);
|
||||
table_items->first.resize(table_items->bucket_size, 0);
|
||||
table_items->next.resize(table_items->row_count + 1, 0);
|
||||
}
|
||||
|
||||
template <LogicalType LT, bool NeedBuildChained>
|
||||
void LinearChainedJoinHashMap<LT, NeedBuildChained>::construct_hash_table(JoinHashTableItems* table_items,
|
||||
const Buffer<CppType>& keys,
|
||||
const Buffer<uint8_t>* is_nulls) {
|
||||
auto process = [&]<bool IsNullable>() {
|
||||
const auto num_rows = 1 + table_items->row_count;
|
||||
const uint32_t bucket_size_mask = table_items->bucket_size - 1;
|
||||
|
||||
auto* __restrict next = table_items->next.data();
|
||||
auto* __restrict first = table_items->first.data();
|
||||
const uint8_t* __restrict is_nulls_data = IsNullable ? is_nulls->data() : nullptr;
|
||||
|
||||
auto need_calc_bucket_num = [&](const uint32_t index) {
|
||||
// Only check `is_nulls_data[i]` for the nullable slice type. The hash calculation overhead for
|
||||
// fixed-size types is small, and thus we do not check it to allow vectorization of the hash calculation.
|
||||
if constexpr (!IsNullable || !std::is_same_v<CppType, Slice>) {
|
||||
return true;
|
||||
} else {
|
||||
return is_nulls_data[index] == 0;
|
||||
}
|
||||
};
|
||||
auto is_null = [&](const uint32_t index) {
|
||||
if constexpr (!IsNullable) {
|
||||
return false;
|
||||
} else {
|
||||
return is_nulls_data[index] != 0;
|
||||
}
|
||||
};
|
||||
|
||||
for (uint32_t i = 1; i < num_rows; i++) {
|
||||
// Use `next` stores `bucket_num` temporarily.
|
||||
if (need_calc_bucket_num(i)) {
|
||||
next[i] = JoinHashMapHelper::calc_bucket_num<CppType>(keys[i], table_items->bucket_size << FP_BITS,
|
||||
table_items->log_bucket_size + FP_BITS);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 1; i < num_rows; i++) {
|
||||
if (i + 16 < num_rows && !is_null(i + 16)) {
|
||||
__builtin_prefetch(first + _get_bucket_num_from_hash(next[i + 16]));
|
||||
}
|
||||
|
||||
if (is_null(i)) {
|
||||
next[i] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
const uint32_t hash = next[i];
|
||||
const uint32_t fp = _get_fp_from_hash(hash);
|
||||
uint32_t bucket_num = _get_bucket_num_from_hash(hash);
|
||||
|
||||
uint32_t probe_times = 1;
|
||||
while (true) {
|
||||
if (first[bucket_num] == 0) {
|
||||
if constexpr (NeedBuildChained) {
|
||||
next[i] = 0;
|
||||
}
|
||||
first[bucket_num] = _combine_data_fp(i, fp);
|
||||
break;
|
||||
}
|
||||
|
||||
if (fp == _extract_fp(first[bucket_num]) && keys[i] == keys[_extract_data(first[bucket_num])]) {
|
||||
if constexpr (NeedBuildChained) {
|
||||
next[i] = _extract_data(first[bucket_num]);
|
||||
first[bucket_num] = _combine_data_fp(i, fp);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
bucket_num = (bucket_num + probe_times) & bucket_size_mask;
|
||||
probe_times++;
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (!NeedBuildChained) {
|
||||
table_items->next.clear();
|
||||
}
|
||||
};
|
||||
|
||||
if (is_nulls == nullptr) {
|
||||
process.template operator()<false>();
|
||||
} else {
|
||||
process.template operator()<true>();
|
||||
}
|
||||
}
|
||||
|
||||
template <LogicalType LT, bool NeedBuildChained>
|
||||
void LinearChainedJoinHashMap<LT, NeedBuildChained>::lookup_init(const JoinHashTableItems& table_items,
|
||||
HashTableProbeState* probe_state,
|
||||
const Buffer<CppType>& build_keys,
|
||||
const Buffer<CppType>& probe_keys,
|
||||
const Buffer<uint8_t>* is_nulls) {
|
||||
auto process = [&]<bool IsNullable>() {
|
||||
const uint32_t bucket_size_mask = table_items.bucket_size - 1;
|
||||
const uint32_t row_count = probe_state->probe_row_count;
|
||||
|
||||
const auto* firsts = table_items.first.data();
|
||||
auto* hashes = probe_state->buckets.data();
|
||||
auto* nexts = probe_state->next.data();
|
||||
const uint8_t* is_nulls_data = IsNullable ? is_nulls->data() : nullptr;
|
||||
|
||||
auto need_calc_bucket_num = [&](const uint32_t index) {
|
||||
if constexpr (!IsNullable || !std::is_same_v<CppType, Slice>) {
|
||||
// Only check `is_nulls_data[i]` for the nullable slice type. The hash calculation overhead for
|
||||
// fixed-size types is small, and thus we do not check it to allow vectorization of the hash calculation.
|
||||
return true;
|
||||
} else {
|
||||
return is_nulls_data[index] == 0;
|
||||
}
|
||||
};
|
||||
auto is_null = [&](const uint32_t index) {
|
||||
if constexpr (!IsNullable) {
|
||||
return false;
|
||||
} else {
|
||||
return is_nulls_data[index] != 0;
|
||||
}
|
||||
};
|
||||
|
||||
for (uint32_t i = 0; i < row_count; i++) {
|
||||
if (need_calc_bucket_num(i)) {
|
||||
hashes[i] = JoinHashMapHelper::calc_bucket_num<CppType>(
|
||||
probe_keys[i], table_items.bucket_size << FP_BITS, table_items.log_bucket_size + FP_BITS);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < row_count; i++) {
|
||||
if (i + 16 < row_count && !is_null(i + 16)) {
|
||||
__builtin_prefetch(firsts + _get_bucket_num_from_hash(hashes[i + 16]));
|
||||
}
|
||||
|
||||
if (is_null(i)) {
|
||||
nexts[i] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
const uint32_t hash = hashes[i];
|
||||
const uint32_t fp = _get_fp_from_hash(hash);
|
||||
uint32_t bucket_num = _get_bucket_num_from_hash(hash);
|
||||
|
||||
uint32_t probe_times = 1;
|
||||
while (true) {
|
||||
if (firsts[bucket_num] == 0) {
|
||||
nexts[i] = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
const uint32_t cur_fp = _extract_fp(firsts[bucket_num]);
|
||||
const uint32_t cur_index = _extract_data(firsts[bucket_num]);
|
||||
if (fp == cur_fp && probe_keys[i] == build_keys[cur_index]) {
|
||||
if constexpr (NeedBuildChained) {
|
||||
nexts[i] = cur_index;
|
||||
} else {
|
||||
nexts[i] = 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
bucket_num = (bucket_num + probe_times) & bucket_size_mask;
|
||||
probe_times++;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (is_nulls == nullptr) {
|
||||
process.template operator()<false>();
|
||||
} else {
|
||||
process.template operator()<true>();
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------
|
||||
// DirectMappingJoinHashMap
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
|
@ -155,7 +339,8 @@ void DirectMappingJoinHashMap<LT>::construct_hash_table(JoinHashTableItems* tabl
|
|||
|
||||
template <LogicalType LT>
|
||||
void DirectMappingJoinHashMap<LT>::lookup_init(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,
|
||||
const Buffer<CppType>& keys, const Buffer<uint8_t>* is_nulls) {
|
||||
const Buffer<CppType>& build_keys, const Buffer<CppType>& probe_keys,
|
||||
const Buffer<uint8_t>* is_nulls) {
|
||||
probe_state->active_coroutines = 0; // the ht data is not large, so disable it always.
|
||||
|
||||
static constexpr CppType MIN_VALUE = RunTimeTypeLimits<LT>::min_value();
|
||||
|
|
@ -163,13 +348,13 @@ void DirectMappingJoinHashMap<LT>::lookup_init(const JoinHashTableItems& table_i
|
|||
|
||||
if (is_nulls == nullptr) {
|
||||
for (size_t i = 0; i < probe_row_count; i++) {
|
||||
probe_state->next[i] = table_items.first[keys[i] - MIN_VALUE];
|
||||
probe_state->next[i] = table_items.first[probe_keys[i] - MIN_VALUE];
|
||||
}
|
||||
} else {
|
||||
const auto* is_nulls_data = is_nulls->data();
|
||||
for (size_t i = 0; i < probe_row_count; i++) {
|
||||
if (is_nulls_data[i] == 0) {
|
||||
probe_state->next[i] = table_items.first[keys[i] - MIN_VALUE];
|
||||
probe_state->next[i] = table_items.first[probe_keys[i] - MIN_VALUE];
|
||||
} else {
|
||||
probe_state->next[i] = 0;
|
||||
}
|
||||
|
|
@ -215,7 +400,8 @@ void RangeDirectMappingJoinHashMap<LT>::construct_hash_table(JoinHashTableItems*
|
|||
|
||||
template <LogicalType LT>
|
||||
void RangeDirectMappingJoinHashMap<LT>::lookup_init(const JoinHashTableItems& table_items,
|
||||
HashTableProbeState* probe_state, const Buffer<CppType>& keys,
|
||||
HashTableProbeState* probe_state, const Buffer<CppType>& build_keys,
|
||||
const Buffer<CppType>& probe_keys,
|
||||
const Buffer<uint8_t>* is_nulls) {
|
||||
probe_state->active_coroutines = 0; // the ht data is not large, so disable it always.
|
||||
|
||||
|
|
@ -224,8 +410,8 @@ void RangeDirectMappingJoinHashMap<LT>::lookup_init(const JoinHashTableItems& ta
|
|||
const size_t num_rows = probe_state->probe_row_count;
|
||||
if (is_nulls == nullptr) {
|
||||
for (size_t i = 0; i < num_rows; i++) {
|
||||
if ((keys[i] >= min_value) & (keys[i] <= max_value)) {
|
||||
const uint64_t index = keys[i] - min_value;
|
||||
if ((probe_keys[i] >= min_value) & (probe_keys[i] <= max_value)) {
|
||||
const uint64_t index = probe_keys[i] - min_value;
|
||||
probe_state->next[i] = table_items.first[index];
|
||||
} else {
|
||||
probe_state->next[i] = 0;
|
||||
|
|
@ -234,8 +420,8 @@ void RangeDirectMappingJoinHashMap<LT>::lookup_init(const JoinHashTableItems& ta
|
|||
} else {
|
||||
const auto* is_nulls_data = is_nulls->data();
|
||||
for (size_t i = 0; i < num_rows; i++) {
|
||||
if ((is_nulls_data[i] == 0) & (keys[i] >= min_value) & (keys[i] <= max_value)) {
|
||||
const uint64_t index = keys[i] - min_value;
|
||||
if ((is_nulls_data[i] == 0) & (probe_keys[i] >= min_value) & (probe_keys[i] <= max_value)) {
|
||||
const uint64_t index = probe_keys[i] - min_value;
|
||||
probe_state->next[i] = table_items.first[index];
|
||||
} else {
|
||||
probe_state->next[i] = 0;
|
||||
|
|
@ -281,7 +467,8 @@ void RangeDirectMappingJoinHashSet<LT>::construct_hash_table(JoinHashTableItems*
|
|||
|
||||
template <LogicalType LT>
|
||||
void RangeDirectMappingJoinHashSet<LT>::lookup_init(const JoinHashTableItems& table_items,
|
||||
HashTableProbeState* probe_state, const Buffer<CppType>& keys,
|
||||
HashTableProbeState* probe_state, const Buffer<CppType>& build_keys,
|
||||
const Buffer<CppType>& probe_keys,
|
||||
const Buffer<uint8_t>* is_nulls) {
|
||||
probe_state->active_coroutines = 0; // the ht data is not large, so disable it always.
|
||||
|
||||
|
|
@ -290,8 +477,8 @@ void RangeDirectMappingJoinHashSet<LT>::lookup_init(const JoinHashTableItems& ta
|
|||
const size_t num_rows = probe_state->probe_row_count;
|
||||
if (is_nulls == nullptr) {
|
||||
for (size_t i = 0; i < num_rows; i++) {
|
||||
if ((keys[i] >= min_value) & (keys[i] <= max_value)) {
|
||||
const uint64_t index = keys[i] - min_value;
|
||||
if ((probe_keys[i] >= min_value) & (probe_keys[i] <= max_value)) {
|
||||
const uint64_t index = probe_keys[i] - min_value;
|
||||
const uint32_t group = index / 8;
|
||||
const uint32_t offset = index % 8;
|
||||
probe_state->next[i] = (table_items.key_bitset[group] & (1 << offset)) != 0;
|
||||
|
|
@ -302,8 +489,8 @@ void RangeDirectMappingJoinHashSet<LT>::lookup_init(const JoinHashTableItems& ta
|
|||
} else {
|
||||
const auto* is_nulls_data = is_nulls->data();
|
||||
for (size_t i = 0; i < num_rows; i++) {
|
||||
if ((is_nulls_data[i] == 0) & (keys[i] >= min_value) & (keys[i] <= max_value)) {
|
||||
const uint64_t index = keys[i] - min_value;
|
||||
if ((is_nulls_data[i] == 0) & (probe_keys[i] >= min_value) & (probe_keys[i] <= max_value)) {
|
||||
const uint64_t index = probe_keys[i] - min_value;
|
||||
const uint32_t group = index / 8;
|
||||
const uint32_t offset = index % 8;
|
||||
probe_state->next[i] = (table_items.key_bitset[group] & (1 << offset)) != 0;
|
||||
|
|
@ -387,7 +574,9 @@ void DenseRangeDirectMappingJoinHashMap<LT>::construct_hash_table(JoinHashTableI
|
|||
|
||||
template <LogicalType LT>
|
||||
void DenseRangeDirectMappingJoinHashMap<LT>::lookup_init(const JoinHashTableItems& table_items,
|
||||
HashTableProbeState* probe_state, const Buffer<CppType>& keys,
|
||||
HashTableProbeState* probe_state,
|
||||
const Buffer<CppType>& build_keys,
|
||||
const Buffer<CppType>& probe_keys,
|
||||
const Buffer<uint8_t>* is_nulls) {
|
||||
probe_state->active_coroutines = 0; // the ht data is not large, so disable it always.
|
||||
|
||||
|
|
@ -415,8 +604,8 @@ void DenseRangeDirectMappingJoinHashMap<LT>::lookup_init(const JoinHashTableItem
|
|||
const size_t num_rows = probe_state->probe_row_count;
|
||||
if (is_nulls == nullptr) {
|
||||
for (size_t i = 0; i < num_rows; i++) {
|
||||
if ((keys[i] >= min_value) & (keys[i] <= max_value)) {
|
||||
const uint64_t bucket_num = keys[i] - min_value;
|
||||
if ((probe_keys[i] >= min_value) & (probe_keys[i] <= max_value)) {
|
||||
const uint64_t bucket_num = probe_keys[i] - min_value;
|
||||
probe_state->next[i] = get_dense_first(bucket_num);
|
||||
} else {
|
||||
probe_state->next[i] = 0;
|
||||
|
|
@ -425,8 +614,8 @@ void DenseRangeDirectMappingJoinHashMap<LT>::lookup_init(const JoinHashTableItem
|
|||
} else {
|
||||
const auto* is_nulls_data = is_nulls->data();
|
||||
for (size_t i = 0; i < num_rows; i++) {
|
||||
if ((is_nulls_data[i] == 0) & (keys[i] >= min_value) & (keys[i] <= max_value)) {
|
||||
const uint64_t bucket_num = keys[i] - min_value;
|
||||
if ((is_nulls_data[i] == 0) & (probe_keys[i] >= min_value) & (probe_keys[i] <= max_value)) {
|
||||
const uint64_t bucket_num = probe_keys[i] - min_value;
|
||||
probe_state->next[i] = get_dense_first(bucket_num);
|
||||
} else {
|
||||
probe_state->next[i] = 0;
|
||||
|
|
|
|||
|
|
@ -114,7 +114,8 @@ struct JoinHashTableItems {
|
|||
// 1) the ht's size is enough large, for example, larger than (1UL << 27) bytes.
|
||||
// 2) smaller ht but most buckets have more than one keys
|
||||
cache_miss_serious = row_count > (1UL << 18) &&
|
||||
((probe_bytes > (1UL << 25) && keys_per_bucket > 2) ||
|
||||
((probe_bytes > (1UL << 24) && keys_per_bucket >= 10) ||
|
||||
(probe_bytes > (1UL << 25) && keys_per_bucket > 2) ||
|
||||
(probe_bytes > (1UL << 26) && keys_per_bucket > 1.5) || probe_bytes > (1UL << 27));
|
||||
VLOG_QUERY << "ht cache miss serious = " << cache_miss_serious << " row# = " << row_count
|
||||
<< " , bytes = " << probe_bytes << " , depth = " << keys_per_bucket;
|
||||
|
|
|
|||
|
|
@ -43,7 +43,9 @@ namespace starrocks {
|
|||
M(DIRECT_MAPPING) \
|
||||
M(RANGE_DIRECT_MAPPING) \
|
||||
M(RANGE_DIRECT_MAPPING_SET) \
|
||||
M(DENSE_RANGE_DIRECT_MAPPING)
|
||||
M(DENSE_RANGE_DIRECT_MAPPING) \
|
||||
M(LINEAR_CHAINED) \
|
||||
M(LINEAR_CHAINED_SET)
|
||||
|
||||
#define APPLY_JOIN_KEY_CONSTRUCTOR_UNARY_TYPE(M) \
|
||||
M(ONE_KEY_BOOLEAN) \
|
||||
|
|
@ -89,7 +91,33 @@ namespace starrocks {
|
|||
M(RANGE_DIRECT_MAPPING_SET_INT) \
|
||||
M(RANGE_DIRECT_MAPPING_SET_BIGINT) \
|
||||
M(DENSE_RANGE_DIRECT_MAPPING_INT) \
|
||||
M(DENSE_RANGE_DIRECT_MAPPING_BIGINT)
|
||||
M(DENSE_RANGE_DIRECT_MAPPING_BIGINT) \
|
||||
\
|
||||
M(LINEAR_CHAINED_INT) \
|
||||
M(LINEAR_CHAINED_BIGINT) \
|
||||
M(LINEAR_CHAINED_LARGEINT) \
|
||||
M(LINEAR_CHAINED_FLOAT) \
|
||||
M(LINEAR_CHAINED_DOUBLE) \
|
||||
M(LINEAR_CHAINED_DATE) \
|
||||
M(LINEAR_CHAINED_DATETIME) \
|
||||
M(LINEAR_CHAINED_DECIMALV2) \
|
||||
M(LINEAR_CHAINED_DECIMAL32) \
|
||||
M(LINEAR_CHAINED_DECIMAL64) \
|
||||
M(LINEAR_CHAINED_DECIMAL128) \
|
||||
M(LINEAR_CHAINED_VARCHAR) \
|
||||
\
|
||||
M(LINEAR_CHAINED_SET_INT) \
|
||||
M(LINEAR_CHAINED_SET_BIGINT) \
|
||||
M(LINEAR_CHAINED_SET_LARGEINT) \
|
||||
M(LINEAR_CHAINED_SET_FLOAT) \
|
||||
M(LINEAR_CHAINED_SET_DOUBLE) \
|
||||
M(LINEAR_CHAINED_SET_DATE) \
|
||||
M(LINEAR_CHAINED_SET_DATETIME) \
|
||||
M(LINEAR_CHAINED_SET_DECIMALV2) \
|
||||
M(LINEAR_CHAINED_SET_DECIMAL32) \
|
||||
M(LINEAR_CHAINED_SET_DECIMAL64) \
|
||||
M(LINEAR_CHAINED_SET_DECIMAL128) \
|
||||
M(LINEAR_CHAINED_SET_VARCHAR)
|
||||
|
||||
enum class JoinKeyConstructorType {
|
||||
#define NAME_TO_ENUM(NAME) NAME,
|
||||
|
|
@ -237,6 +265,36 @@ REGISTER_JOIN_MAP_METHOD_TYPE(DENSE_RANGE_DIRECT_MAPPING, TYPE_INT, DenseRangeDi
|
|||
REGISTER_JOIN_MAP_METHOD_TYPE(DENSE_RANGE_DIRECT_MAPPING, TYPE_BIGINT, DenseRangeDirectMappingJoinHashMap,
|
||||
DENSE_RANGE_DIRECT_MAPPING_BIGINT);
|
||||
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED, TYPE_INT, LinearChainedJoinHashMap, LINEAR_CHAINED_INT);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED, TYPE_BIGINT, LinearChainedJoinHashMap, LINEAR_CHAINED_BIGINT);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED, TYPE_LARGEINT, LinearChainedJoinHashMap, LINEAR_CHAINED_LARGEINT);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED, TYPE_FLOAT, LinearChainedJoinHashMap, LINEAR_CHAINED_FLOAT);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED, TYPE_DOUBLE, LinearChainedJoinHashMap, LINEAR_CHAINED_DOUBLE);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED, TYPE_DATE, LinearChainedJoinHashMap, LINEAR_CHAINED_DATE);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED, TYPE_DATETIME, LinearChainedJoinHashMap, LINEAR_CHAINED_DATETIME);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED, TYPE_DECIMALV2, LinearChainedJoinHashMap, LINEAR_CHAINED_DECIMALV2);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED, TYPE_DECIMAL32, LinearChainedJoinHashMap, LINEAR_CHAINED_DECIMAL32);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED, TYPE_DECIMAL64, LinearChainedJoinHashMap, LINEAR_CHAINED_DECIMAL64);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED, TYPE_DECIMAL128, LinearChainedJoinHashMap, LINEAR_CHAINED_DECIMAL128);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED, TYPE_VARCHAR, LinearChainedJoinHashMap, LINEAR_CHAINED_VARCHAR);
|
||||
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_SET, TYPE_INT, LinearChainedJoinHashSet, LINEAR_CHAINED_SET_INT);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_SET, TYPE_BIGINT, LinearChainedJoinHashSet, LINEAR_CHAINED_SET_BIGINT);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_SET, TYPE_LARGEINT, LinearChainedJoinHashSet, LINEAR_CHAINED_SET_LARGEINT);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_SET, TYPE_FLOAT, LinearChainedJoinHashSet, LINEAR_CHAINED_SET_FLOAT);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_SET, TYPE_DOUBLE, LinearChainedJoinHashSet, LINEAR_CHAINED_SET_DOUBLE);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_SET, TYPE_DATE, LinearChainedJoinHashSet, LINEAR_CHAINED_SET_DATE);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_SET, TYPE_DATETIME, LinearChainedJoinHashSet, LINEAR_CHAINED_SET_DATETIME);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_SET, TYPE_DECIMALV2, LinearChainedJoinHashSet,
|
||||
LINEAR_CHAINED_SET_DECIMALV2);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_SET, TYPE_DECIMAL32, LinearChainedJoinHashSet,
|
||||
LINEAR_CHAINED_SET_DECIMAL32);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_SET, TYPE_DECIMAL64, LinearChainedJoinHashSet,
|
||||
LINEAR_CHAINED_SET_DECIMAL64);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_SET, TYPE_DECIMAL128, LinearChainedJoinHashSet,
|
||||
LINEAR_CHAINED_SET_DECIMAL128);
|
||||
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_SET, TYPE_VARCHAR, LinearChainedJoinHashSet, LINEAR_CHAINED_SET_VARCHAR);
|
||||
|
||||
#undef REGISTER_JOIN_MAP_TYPE
|
||||
|
||||
// ------------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -72,7 +72,8 @@ Status OlapMetaScanner::_init_meta_reader_params() {
|
|||
column.set_type(path->value_type().type);
|
||||
column.set_length(path->value_type().len);
|
||||
column.set_is_nullable(true);
|
||||
column.set_extended_info(std::make_unique<ExtendedColumnInfo>(path.get(), root_column_index));
|
||||
int32_t root_uid = tmp_schema->column(static_cast<size_t>(root_column_index)).unique_id();
|
||||
column.set_extended_info(std::make_unique<ExtendedColumnInfo>(path.get(), root_uid));
|
||||
|
||||
tmp_schema->append_column(column);
|
||||
VLOG(2) << "extend the tablet-schema: " << column.debug_string();
|
||||
|
|
|
|||
|
|
@ -69,8 +69,7 @@ Status AggregateBlockingSinkOperator::set_finishing(RuntimeState* state) {
|
|||
if (_aggregator->hash_map_variant().size() == 0) {
|
||||
_aggregator->set_ht_eos();
|
||||
}
|
||||
_aggregator->hash_map_variant().visit(
|
||||
[&](auto& hash_map_with_key) { _aggregator->it_hash() = _aggregator->_state_allocator.begin(); });
|
||||
_aggregator->it_hash() = _aggregator->state_allocator().begin();
|
||||
|
||||
} else if (_aggregator->is_none_group_by_exprs()) {
|
||||
// for aggregate no group by, if _num_input_rows is 0,
|
||||
|
|
|
|||
|
|
@ -89,8 +89,7 @@ DEFINE_FAIL_POINT(force_reset_aggregator_after_agg_streaming_sink_finish);
|
|||
|
||||
Status AggregateStreamingSourceOperator::_output_chunk_from_hash_map(ChunkPtr* chunk, RuntimeState* state) {
|
||||
if (!_aggregator->it_hash().has_value()) {
|
||||
_aggregator->hash_map_variant().visit(
|
||||
[&](auto& hash_map_with_key) { _aggregator->it_hash() = _aggregator->_state_allocator.begin(); });
|
||||
_aggregator->it_hash() = _aggregator->state_allocator().begin();
|
||||
COUNTER_SET(_aggregator->hash_table_size(), (int64_t)_aggregator->hash_map_variant().size());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "exec/aggregator.h"
|
||||
#include "exec/aggregator_fwd.h"
|
||||
#include "exec/pipeline/source_operator.h"
|
||||
|
||||
namespace starrocks::pipeline {
|
||||
|
|
|
|||
|
|
@ -66,8 +66,7 @@ Status SpillableAggregateBlockingSinkOperator::set_finishing(RuntimeState* state
|
|||
}
|
||||
if (!_aggregator->spill_channel()->has_task()) {
|
||||
if (_aggregator->hash_map_variant().size() > 0 || !_streaming_chunks.empty()) {
|
||||
_aggregator->hash_map_variant().visit(
|
||||
[&](auto& hash_map_with_key) { _aggregator->it_hash() = _aggregator->_state_allocator.begin(); });
|
||||
_aggregator->it_hash() = _aggregator->state_allocator().begin();
|
||||
_aggregator->spill_channel()->add_spill_task(_build_spill_task(state));
|
||||
}
|
||||
}
|
||||
|
|
@ -270,8 +269,7 @@ Status SpillableAggregateBlockingSinkOperator::_try_to_spill_by_auto(RuntimeStat
|
|||
Status SpillableAggregateBlockingSinkOperator::_spill_all_data(RuntimeState* state, bool should_spill_hash_table) {
|
||||
RETURN_IF(_aggregator->hash_map_variant().size() == 0, Status::OK());
|
||||
if (should_spill_hash_table) {
|
||||
_aggregator->hash_map_variant().visit(
|
||||
[&](auto& hash_map_with_key) { _aggregator->it_hash() = _aggregator->_state_allocator.begin(); });
|
||||
_aggregator->it_hash() = _aggregator->state_allocator().begin();
|
||||
}
|
||||
CHECK(!_aggregator->spill_channel()->has_task());
|
||||
RETURN_IF_ERROR(_aggregator->spill_aggregate_data(state, _build_spill_task(state, should_spill_hash_table)));
|
||||
|
|
|
|||
|
|
@ -16,9 +16,8 @@
|
|||
|
||||
#include <utility>
|
||||
|
||||
#include "exec/aggregator.h"
|
||||
#include "exec/aggregator_fwd.h"
|
||||
#include "exec/pipeline/aggregate/aggregate_blocking_source_operator.h"
|
||||
#include "exec/sorted_streaming_aggregator.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "storage/chunk_helper.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
#include <utility>
|
||||
|
||||
#include "exec/aggregator.h"
|
||||
#include "exec/aggregator_fwd.h"
|
||||
#include "exec/pipeline/aggregate/aggregate_distinct_blocking_sink_operator.h"
|
||||
#include "exec/pipeline/aggregate/aggregate_distinct_blocking_source_operator.h"
|
||||
#include "exec/pipeline/operator.h"
|
||||
|
|
|
|||
|
|
@ -49,9 +49,7 @@ Status SpillablePartitionWiseAggregateSinkOperator::set_finishing(RuntimeState*
|
|||
}
|
||||
if (!_agg_op->aggregator()->spill_channel()->has_task()) {
|
||||
if (_agg_op->aggregator()->hash_map_variant().size() > 0 || !_streaming_chunks.empty()) {
|
||||
_agg_op->aggregator()->hash_map_variant().visit([&](auto& hash_map_with_key) {
|
||||
_agg_op->aggregator()->it_hash() = _agg_op->aggregator()->_state_allocator.begin();
|
||||
});
|
||||
_agg_op->aggregator()->it_hash() = _agg_op->aggregator()->state_allocator().begin();
|
||||
_agg_op->aggregator()->spill_channel()->add_spill_task(_build_spill_task(state));
|
||||
}
|
||||
}
|
||||
|
|
@ -279,9 +277,7 @@ ChunkPtr& SpillablePartitionWiseAggregateSinkOperator::_append_hash_column(Chunk
|
|||
Status SpillablePartitionWiseAggregateSinkOperator::_spill_all_data(RuntimeState* state, bool should_spill_hash_table) {
|
||||
RETURN_IF(_agg_op->aggregator()->hash_map_variant().size() == 0, Status::OK());
|
||||
if (should_spill_hash_table) {
|
||||
_agg_op->aggregator()->hash_map_variant().visit([&](auto& hash_map_with_key) {
|
||||
_agg_op->aggregator()->it_hash() = _agg_op->aggregator()->_state_allocator.begin();
|
||||
});
|
||||
_agg_op->aggregator()->it_hash() = _agg_op->aggregator()->state_allocator().begin();
|
||||
}
|
||||
CHECK(!_agg_op->aggregator()->spill_channel()->has_task());
|
||||
RETURN_IF_ERROR(
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ HashJoinBuildOperator::HashJoinBuildOperator(OperatorFactory* factory, int32_t i
|
|||
_distribution_mode(distribution_mode) {}
|
||||
|
||||
Status HashJoinBuildOperator::push_chunk(RuntimeState* state, const ChunkPtr& chunk) {
|
||||
return _join_builder->append_chunk_to_ht(chunk);
|
||||
return _join_builder->append_chunk_to_ht(state, chunk);
|
||||
}
|
||||
|
||||
Status HashJoinBuildOperator::prepare(RuntimeState* state) {
|
||||
|
|
|
|||
|
|
@ -85,6 +85,7 @@ Status SpillableHashJoinBuildOperator::set_finishing(RuntimeState* state) {
|
|||
if (!_join_builder->spiller()->spilled()) {
|
||||
DCHECK(_is_first_time_spill);
|
||||
_is_first_time_spill = false;
|
||||
RETURN_IF_ERROR(_join_builder->hash_join_builder()->prepare_for_spill_start(runtime_state()));
|
||||
RETURN_IF_ERROR(init_spiller_partitions(state, _join_builder->hash_join_builder()));
|
||||
ASSIGN_OR_RETURN(_hash_table_slice_iterator, _convert_hash_map_to_chunk());
|
||||
RETURN_IF_ERROR(_join_builder->append_spill_task(state, _hash_table_slice_iterator));
|
||||
|
|
@ -201,6 +202,7 @@ Status SpillableHashJoinBuildOperator::push_chunk(RuntimeState* state, const Chu
|
|||
|
||||
// Estimate the appropriate number of partitions
|
||||
if (_is_first_time_spill) {
|
||||
RETURN_IF_ERROR(_join_builder->hash_join_builder()->prepare_for_spill_start(runtime_state()));
|
||||
RETURN_IF_ERROR(init_spiller_partitions(state, _join_builder->hash_join_builder()));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -280,7 +280,7 @@ Status SpillableHashJoinProbeOperator::_load_partition_build_side(workgroup::Yie
|
|||
|
||||
if (chunk_st.ok() && chunk_st.value() != nullptr && !chunk_st.value()->is_empty()) {
|
||||
int64_t old_mem_usage = hash_table_mem_usage;
|
||||
RETURN_IF_ERROR(builder->append_chunk(std::move(chunk_st.value())));
|
||||
RETURN_IF_ERROR(builder->append_chunk(state, std::move(chunk_st.value())));
|
||||
hash_table_mem_usage = builder->ht_mem_usage();
|
||||
COUNTER_ADD(metrics.build_partition_peak_memory_usage, hash_table_mem_usage - old_mem_usage);
|
||||
} else if (chunk_st.status().is_end_of_file()) {
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@
|
|||
#include "runtime/mem_tracker.h"
|
||||
#include "runtime/runtime_filter_cache.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "service/backend_options.h"
|
||||
#include "util/failpoint/fail_point.h"
|
||||
#include "util/runtime_profile.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include "agent/master_info.h"
|
||||
#include "common/status.h"
|
||||
#include "exec/pipeline/fragment_context.h"
|
||||
#include "exec/pipeline/pipeline_fwd.h"
|
||||
#include "exec/pipeline/scan/connector_scan_operator.h"
|
||||
|
|
@ -29,6 +30,7 @@
|
|||
#include "runtime/exec_env.h"
|
||||
#include "runtime/query_statistics.h"
|
||||
#include "runtime/runtime_filter_cache.h"
|
||||
#include "util/defer_op.h"
|
||||
#include "util/thread.h"
|
||||
#include "util/thrift_rpc_helper.h"
|
||||
|
||||
|
|
@ -83,7 +85,7 @@ QueryContext::~QueryContext() noexcept {
|
|||
}
|
||||
}
|
||||
|
||||
void QueryContext::count_down_fragments() {
|
||||
void QueryContext::count_down_fragments(QueryContextManager* query_context_mgr) {
|
||||
size_t old = _num_active_fragments.fetch_sub(1);
|
||||
DCHECK_GE(old, 1);
|
||||
bool all_fragments_finished = old == 1;
|
||||
|
|
@ -93,7 +95,7 @@ void QueryContext::count_down_fragments() {
|
|||
|
||||
// Acquire the pointer to avoid be released when removing query
|
||||
auto query_trace = shared_query_trace();
|
||||
ExecEnv::GetInstance()->query_context_mgr()->remove(_query_id);
|
||||
query_context_mgr->remove(_query_id);
|
||||
// @TODO(silverbullet233): if necessary, remove the dump from the execution thread
|
||||
// considering that this feature is generally used for debugging,
|
||||
// I think it should not have a big impact now
|
||||
|
|
@ -102,6 +104,10 @@ void QueryContext::count_down_fragments() {
|
|||
}
|
||||
}
|
||||
|
||||
void QueryContext::count_down_fragments() {
|
||||
return this->count_down_fragments(ExecEnv::GetInstance()->query_context_mgr());
|
||||
}
|
||||
|
||||
FragmentContextManager* QueryContext::fragment_mgr() {
|
||||
return _fragment_mgr.get();
|
||||
}
|
||||
|
|
@ -417,10 +423,20 @@ StatusOr<QueryContext*> QueryContextManager::get_or_register(const TUniqueId& qu
|
|||
// lookup query context for the second chance in sc_map
|
||||
if (sc_it != sc_map.end()) {
|
||||
auto ctx = std::move(sc_it->second);
|
||||
sc_map.erase(sc_it);
|
||||
RETURN_CANCELLED_STATUS_IF_CTX_CANCELLED(ctx);
|
||||
auto* raw_ctx_ptr = ctx.get();
|
||||
context_map.emplace(query_id, std::move(ctx));
|
||||
sc_map.erase(sc_it);
|
||||
auto cancel_status = [ctx]() -> Status {
|
||||
RETURN_CANCELLED_STATUS_IF_CTX_CANCELLED(ctx);
|
||||
return Status::OK();
|
||||
}();
|
||||
// If there are still active fragments, we cannot directly remove the query context
|
||||
// because the operator is still executing.
|
||||
// We need to wait until the fragment execution is complete,
|
||||
// then call QueryContextManager::remove to safely remove this query context.
|
||||
if (cancel_status.ok() || !ctx->has_no_active_instances()) {
|
||||
context_map.emplace(query_id, std::move(ctx));
|
||||
}
|
||||
RETURN_IF_ERROR(cancel_status);
|
||||
return raw_ctx_ptr;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -71,6 +71,7 @@ public:
|
|||
}
|
||||
|
||||
void count_down_fragments();
|
||||
void count_down_fragments(QueryContextManager* query_context_mgr);
|
||||
int num_active_fragments() const { return _num_active_fragments.load(); }
|
||||
bool has_no_active_instances() { return _num_active_fragments.load() == 0; }
|
||||
|
||||
|
|
|
|||
|
|
@ -251,6 +251,9 @@ StatusOr<MorselPtr> BucketSequenceMorselQueue::try_get() {
|
|||
}
|
||||
ASSIGN_OR_RETURN(auto morsel, _morsel_queue->try_get());
|
||||
auto* m = down_cast<ScanMorsel*>(morsel.get());
|
||||
if (m == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
DCHECK(m->has_owner_id());
|
||||
auto owner_id = m->owner_id();
|
||||
ASSIGN_OR_RETURN(int64_t next_owner_id, _peek_sequence_id());
|
||||
|
|
@ -309,10 +312,10 @@ void PhysicalSplitMorselQueue::set_key_ranges(const std::vector<std::unique_ptr<
|
|||
}
|
||||
}
|
||||
|
||||
void PhysicalSplitMorselQueue::set_key_ranges(TabletReaderParams::RangeStartOperation range_start_op,
|
||||
TabletReaderParams::RangeEndOperation range_end_op,
|
||||
std::vector<OlapTuple> range_start_key,
|
||||
std::vector<OlapTuple> range_end_key) {
|
||||
void PhysicalSplitMorselQueue::set_key_ranges(const TabletReaderParams::RangeStartOperation& range_start_op,
|
||||
const TabletReaderParams::RangeEndOperation& range_end_op,
|
||||
const std::vector<OlapTuple>& range_start_key,
|
||||
const std::vector<OlapTuple>& range_end_key) {
|
||||
_range_start_op = range_start_op;
|
||||
_range_end_op = range_end_op;
|
||||
_range_start_key = range_start_key;
|
||||
|
|
@ -575,10 +578,10 @@ void LogicalSplitMorselQueue::set_key_ranges(const std::vector<std::unique_ptr<O
|
|||
}
|
||||
}
|
||||
|
||||
void LogicalSplitMorselQueue::set_key_ranges(TabletReaderParams::RangeStartOperation range_start_op,
|
||||
TabletReaderParams::RangeEndOperation range_end_op,
|
||||
std::vector<OlapTuple> range_start_key,
|
||||
std::vector<OlapTuple> range_end_key) {
|
||||
void LogicalSplitMorselQueue::set_key_ranges(const TabletReaderParams::RangeStartOperation& range_start_op,
|
||||
const TabletReaderParams::RangeEndOperation& range_end_op,
|
||||
const std::vector<OlapTuple>& range_start_key,
|
||||
const std::vector<OlapTuple>& range_end_key) {
|
||||
_range_start_op = range_start_op;
|
||||
_range_end_op = range_end_op;
|
||||
_range_start_key = range_start_key;
|
||||
|
|
|
|||
|
|
@ -340,11 +340,15 @@ public:
|
|||
MorselQueue(Morsels&& morsels) : _morsels(std::move(morsels)), _num_morsels(_morsels.size()) {}
|
||||
virtual ~MorselQueue() = default;
|
||||
|
||||
// NOTE: some subclasses of MorselQueue nest another MorselQueue, such as BucketSequenceMorselQueue.
|
||||
// When adding a new virtual method, DO NOT forget to invoke it on the nested MorselQueue as well.
|
||||
|
||||
virtual std::vector<TInternalScanRange*> prepare_olap_scan_ranges() const;
|
||||
virtual void set_key_ranges(const std::vector<std::unique_ptr<OlapScanRange>>& key_ranges) {}
|
||||
virtual void set_key_ranges(TabletReaderParams::RangeStartOperation _range_start_op,
|
||||
TabletReaderParams::RangeEndOperation _range_end_op,
|
||||
std::vector<OlapTuple> _range_start_key, std::vector<OlapTuple> _range_end_key) {}
|
||||
virtual void set_key_ranges(const TabletReaderParams::RangeStartOperation& range_start_op,
|
||||
const TabletReaderParams::RangeEndOperation& range_end_op,
|
||||
const std::vector<OlapTuple>& range_start_key,
|
||||
const std::vector<OlapTuple>& range_end_key) {}
|
||||
virtual void set_tablets(const std::vector<BaseTabletSharedPtr>& tablets) { _tablets = tablets; }
|
||||
virtual void set_tablet_rowsets(const std::vector<std::vector<BaseRowsetSharedPtr>>& tablet_rowsets) {
|
||||
_tablet_rowsets = tablet_rowsets;
|
||||
|
|
@ -361,7 +365,7 @@ public:
|
|||
virtual StatusOr<bool> ready_for_next() const { return true; }
|
||||
virtual Status append_morsels(Morsels&& morsels);
|
||||
virtual Type type() const = 0;
|
||||
void set_tablet_schema(TabletSchemaCSPtr tablet_schema) {
|
||||
virtual void set_tablet_schema(const TabletSchemaCSPtr& tablet_schema) {
|
||||
DCHECK(tablet_schema != nullptr);
|
||||
_tablet_schema = tablet_schema;
|
||||
}
|
||||
|
|
@ -402,6 +406,13 @@ public:
|
|||
_morsel_queue->set_key_ranges(key_ranges);
|
||||
}
|
||||
|
||||
void set_key_ranges(const TabletReaderParams::RangeStartOperation& range_start_op,
|
||||
const TabletReaderParams::RangeEndOperation& range_end_op,
|
||||
const std::vector<OlapTuple>& range_start_key,
|
||||
const std::vector<OlapTuple>& range_end_key) override {
|
||||
_morsel_queue->set_key_ranges(range_start_op, range_end_op, range_start_key, range_end_key);
|
||||
}
|
||||
|
||||
void set_tablets(const std::vector<BaseTabletSharedPtr>& tablets) override { _morsel_queue->set_tablets(tablets); }
|
||||
|
||||
void set_tablet_rowsets(const std::vector<std::vector<BaseRowsetSharedPtr>>& tablet_rowsets) override {
|
||||
|
|
@ -422,6 +433,11 @@ public:
|
|||
Status append_morsels(Morsels&& morsels) override { return _morsel_queue->append_morsels(std::move(morsels)); }
|
||||
Type type() const override { return BUCKET_SEQUENCE; }
|
||||
|
||||
void set_tablet_schema(const TabletSchemaCSPtr& tablet_schema) override {
|
||||
MorselQueue::set_tablet_schema(tablet_schema);
|
||||
_morsel_queue->set_tablet_schema(tablet_schema);
|
||||
}
|
||||
|
||||
private:
|
||||
StatusOr<int64_t> _peek_sequence_id() const;
|
||||
mutable std::mutex _mutex;
|
||||
|
|
@ -470,9 +486,10 @@ public:
|
|||
~PhysicalSplitMorselQueue() override = default;
|
||||
|
||||
void set_key_ranges(const std::vector<std::unique_ptr<OlapScanRange>>& key_ranges) override;
|
||||
void set_key_ranges(TabletReaderParams::RangeStartOperation _range_start_op,
|
||||
TabletReaderParams::RangeEndOperation _range_end_op, std::vector<OlapTuple> _range_start_key,
|
||||
std::vector<OlapTuple> _range_end_key) override;
|
||||
void set_key_ranges(const TabletReaderParams::RangeStartOperation& range_start_op,
|
||||
const TabletReaderParams::RangeEndOperation& range_end_op,
|
||||
const std::vector<OlapTuple>& range_start_key,
|
||||
const std::vector<OlapTuple>& range_end_key) override;
|
||||
bool empty() const override { return _unget_morsel == nullptr && _tablet_idx >= _tablets.size(); }
|
||||
StatusOr<MorselPtr> try_get() override;
|
||||
|
||||
|
|
@ -527,9 +544,10 @@ public:
|
|||
~LogicalSplitMorselQueue() override = default;
|
||||
|
||||
void set_key_ranges(const std::vector<std::unique_ptr<OlapScanRange>>& key_ranges) override;
|
||||
void set_key_ranges(TabletReaderParams::RangeStartOperation range_start_op,
|
||||
TabletReaderParams::RangeEndOperation range_end_op, std::vector<OlapTuple> range_start_key,
|
||||
std::vector<OlapTuple> range_end_key) override;
|
||||
void set_key_ranges(const TabletReaderParams::RangeStartOperation& range_start_op,
|
||||
const TabletReaderParams::RangeEndOperation& range_end_op,
|
||||
const std::vector<OlapTuple>& range_start_key,
|
||||
const std::vector<OlapTuple>& range_end_key) override;
|
||||
bool empty() const override { return _unget_morsel == nullptr && _tablet_idx >= _tablets.size(); }
|
||||
StatusOr<MorselPtr> try_get() override;
|
||||
|
||||
|
|
|
|||
|
|
@ -509,7 +509,17 @@ Status OlapChunkSource::_extend_schema_by_access_paths() {
|
|||
column.set_type(value_type);
|
||||
column.set_length(path->value_type().len);
|
||||
column.set_is_nullable(true);
|
||||
column.set_extended_info(std::make_unique<ExtendedColumnInfo>(path.get(), root_column_index));
|
||||
// Record root column unique id to make it robust across schema changes
|
||||
int32_t root_uid = _tablet_schema->column(static_cast<size_t>(root_column_index)).unique_id();
|
||||
column.set_extended_info(std::make_unique<ExtendedColumnInfo>(path.get(), root_uid));
|
||||
|
||||
// For UNIQUE/AGG tables, extended flat JSON subcolumns act as value columns and
|
||||
// must have a valid aggregation method for pre-aggregation. Use REPLACE, which is
|
||||
// consistent with value-column semantics in these models.
|
||||
auto keys_type = _tablet_schema->keys_type();
|
||||
if (keys_type == KeysType::UNIQUE_KEYS || keys_type == KeysType::AGG_KEYS) {
|
||||
column.set_aggregation(StorageAggregateType::STORAGE_AGGREGATE_REPLACE);
|
||||
}
|
||||
|
||||
tmp_schema->append_column(column);
|
||||
VLOG(2) << "extend the access path column: " << path->linear_path();
|
||||
|
|
|
|||
|
|
@ -83,9 +83,7 @@ StatusOr<ChunkPtr> OlapScanPrepareOperator::pull_chunk(RuntimeState* state) {
|
|||
}
|
||||
_morsel_queue->set_tablet_rowsets(std::move(tablet_rowsets));
|
||||
|
||||
if ((_morsel_queue->type() == MorselQueue::Type::LOGICAL_SPLIT ||
|
||||
_morsel_queue->type() == MorselQueue::Type::PHYSICAL_SPLIT) &&
|
||||
!tablets.empty()) {
|
||||
if (!tablets.empty()) {
|
||||
_morsel_queue->set_tablet_schema(tablets[0]->tablet_schema());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -60,6 +60,9 @@ bool ConnectorSinkOperator::need_input() const {
|
|||
}
|
||||
|
||||
auto [status, _] = _io_poller->poll();
|
||||
if (status.ok()) {
|
||||
status = _connector_chunk_sink->status();
|
||||
}
|
||||
if (!status.ok()) {
|
||||
LOG(WARNING) << "cancel fragment: " << status;
|
||||
_fragment_context->cancel(status);
|
||||
|
|
@ -74,12 +77,16 @@ bool ConnectorSinkOperator::is_finished() const {
|
|||
}
|
||||
|
||||
auto [status, finished] = _io_poller->poll();
|
||||
if (status.ok()) {
|
||||
status = _connector_chunk_sink->status();
|
||||
}
|
||||
if (!status.ok()) {
|
||||
LOG(WARNING) << "cancel fragment: " << status;
|
||||
_fragment_context->cancel(status);
|
||||
}
|
||||
|
||||
return finished;
|
||||
bool ret = finished && _connector_chunk_sink->is_finished();
|
||||
return ret;
|
||||
}
|
||||
|
||||
Status ConnectorSinkOperator::set_finishing(RuntimeState* state) {
|
||||
|
|
|
|||
|
|
@ -16,17 +16,14 @@
|
|||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
|
||||
#include "column/vectorized_fwd.h"
|
||||
#include "common/statusor.h"
|
||||
#include "exec/spill/executor.h"
|
||||
#include "exec/spill/spiller.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "util/blocking_queue.hpp"
|
||||
#include "util/defer_op.h"
|
||||
#include "util/runtime_profile.h"
|
||||
|
||||
namespace starrocks {
|
||||
class SpillProcessChannel;
|
||||
|
|
@ -73,7 +70,7 @@ using SpillProcessChannelFactoryPtr = std::shared_ptr<SpillProcessChannelFactory
|
|||
// SpillProcessOperator
|
||||
class SpillProcessChannel {
|
||||
public:
|
||||
SpillProcessChannel() {}
|
||||
SpillProcessChannel() = default;
|
||||
|
||||
bool add_spill_task(SpillProcessTask&& task) {
|
||||
DCHECK(!_is_finishing);
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ Status SchemaBeDataCacheMetricsScanner::get_next(ChunkPtr* chunk, bool* eos) {
|
|||
row.emplace_back(_be_id);
|
||||
|
||||
// TODO: Support LRUCacheEngine
|
||||
auto* cache = DataCache::GetInstance()->local_cache();
|
||||
auto* cache = DataCache::GetInstance()->local_disk_cache();
|
||||
if (cache != nullptr && cache->is_initialized() && cache->engine_type() == LocalCacheEngineType::STARCACHE) {
|
||||
auto* starcache = reinterpret_cast<StarCacheEngine*>(cache);
|
||||
// retrieve different priority's used bytes from level = 2 metrics
|
||||
|
|
|
|||
|
|
@ -22,20 +22,31 @@
|
|||
namespace starrocks {
|
||||
|
||||
SchemaScanner::ColumnDesc SchemaFeTabletSchedulesScanner::_s_columns[] = {
|
||||
{"TABLET_ID", TypeDescriptor::from_logical_type(TYPE_BIGINT), sizeof(int64_t), false},
|
||||
{"TABLE_ID", TypeDescriptor::from_logical_type(TYPE_BIGINT), sizeof(int64_t), false},
|
||||
{"PARTITION_ID", TypeDescriptor::from_logical_type(TYPE_BIGINT), sizeof(int64_t), false},
|
||||
{"TABLET_ID", TypeDescriptor::from_logical_type(TYPE_BIGINT), sizeof(int64_t), false},
|
||||
{"TYPE", TypeDescriptor::create_varchar_type(sizeof(Slice)), sizeof(Slice), false},
|
||||
{"PRIORITY", TypeDescriptor::create_varchar_type(sizeof(Slice)), sizeof(Slice), false},
|
||||
{"STATE", TypeDescriptor::create_varchar_type(sizeof(Slice)), sizeof(Slice), false},
|
||||
{"TABLET_STATUS", TypeDescriptor::create_varchar_type(sizeof(Slice)), sizeof(Slice), false},
|
||||
{"SCHEDULE_REASON", TypeDescriptor::create_varchar_type(sizeof(Slice)), sizeof(Slice), false},
|
||||
{"MEDIUM", TypeDescriptor::create_varchar_type(sizeof(Slice)), sizeof(Slice), false},
|
||||
{"PRIORITY", TypeDescriptor::create_varchar_type(sizeof(Slice)), sizeof(Slice), false},
|
||||
{"ORIG_PRIORITY", TypeDescriptor::create_varchar_type(sizeof(Slice)), sizeof(Slice), false},
|
||||
{"LAST_PRIORITY_ADJUST_TIME", TypeDescriptor::from_logical_type(TYPE_DATETIME), sizeof(DateTimeValue), true},
|
||||
{"VISIBLE_VERSION", TypeDescriptor::from_logical_type(TYPE_BIGINT), sizeof(int64_t), false},
|
||||
{"COMMITTED_VERSION", TypeDescriptor::from_logical_type(TYPE_BIGINT), sizeof(int64_t), false},
|
||||
{"SRC_BE_ID", TypeDescriptor::from_logical_type(TYPE_BIGINT), sizeof(int64_t), false},
|
||||
{"SRC_PATH", TypeDescriptor::create_varchar_type(sizeof(Slice)), sizeof(Slice), false},
|
||||
{"DEST_BE_ID", TypeDescriptor::from_logical_type(TYPE_BIGINT), sizeof(int64_t), false},
|
||||
{"DEST_PATH", TypeDescriptor::create_varchar_type(sizeof(Slice)), sizeof(Slice), false},
|
||||
{"TIMEOUT", TypeDescriptor::from_logical_type(TYPE_BIGINT), sizeof(int64_t), false},
|
||||
{"CREATE_TIME", TypeDescriptor::from_logical_type(TYPE_DATETIME), sizeof(DateTimeValue), true},
|
||||
{"SCHEDULE_TIME", TypeDescriptor::from_logical_type(TYPE_DATETIME), sizeof(DateTimeValue), true},
|
||||
{"FINISH_TIME", TypeDescriptor::from_logical_type(TYPE_DATETIME), sizeof(DateTimeValue), true},
|
||||
{"CLONE_SRC", TypeDescriptor::from_logical_type(TYPE_BIGINT), sizeof(int64_t), false},
|
||||
{"CLONE_DEST", TypeDescriptor::from_logical_type(TYPE_BIGINT), sizeof(int64_t), false},
|
||||
{"CLONE_BYTES", TypeDescriptor::from_logical_type(TYPE_BIGINT), sizeof(int64_t), false},
|
||||
{"CLONE_DURATION", TypeDescriptor::from_logical_type(TYPE_DOUBLE), sizeof(double), false},
|
||||
{"CLONE_RATE", TypeDescriptor::from_logical_type(TYPE_DOUBLE), sizeof(double), false},
|
||||
{"FAILED_SCHEDULE_COUNT", TypeDescriptor::from_logical_type(TYPE_INT), sizeof(int32_t), false},
|
||||
{"FAILED_RUNNING_COUNT", TypeDescriptor::from_logical_type(TYPE_INT), sizeof(int32_t), false},
|
||||
{"MSG", TypeDescriptor::create_varchar_type(sizeof(Slice)), sizeof(Slice), false},
|
||||
};
|
||||
|
||||
|
|
@ -89,21 +100,21 @@ Status SchemaFeTabletSchedulesScanner::fill_chunk(ChunkPtr* chunk) {
|
|||
for (; _cur_idx < end; _cur_idx++) {
|
||||
auto& info = _infos[_cur_idx];
|
||||
for (const auto& [slot_id, index] : slot_id_to_index_map) {
|
||||
if (slot_id < 1 || slot_id > 15) {
|
||||
if (slot_id < 1 || slot_id > 26) {
|
||||
return Status::InternalError(strings::Substitute("invalid slot id:$0", slot_id));
|
||||
}
|
||||
ColumnPtr column = (*chunk)->get_column_by_slot_id(slot_id);
|
||||
switch (slot_id) {
|
||||
case 1: {
|
||||
fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.table_id);
|
||||
fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.tablet_id);
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.partition_id);
|
||||
fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.table_id);
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.tablet_id);
|
||||
fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.partition_id);
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
|
|
@ -112,21 +123,71 @@ Status SchemaFeTabletSchedulesScanner::fill_chunk(ChunkPtr* chunk) {
|
|||
break;
|
||||
}
|
||||
case 5: {
|
||||
Slice v = Slice(info.priority);
|
||||
fill_column_with_slot<TYPE_VARCHAR>(column.get(), (void*)&v);
|
||||
break;
|
||||
}
|
||||
case 6: {
|
||||
Slice v = Slice(info.state);
|
||||
fill_column_with_slot<TYPE_VARCHAR>(column.get(), (void*)&v);
|
||||
break;
|
||||
}
|
||||
case 6: {
|
||||
Slice v = Slice(info.schedule_reason);
|
||||
fill_column_with_slot<TYPE_VARCHAR>(column.get(), (void*)&v);
|
||||
break;
|
||||
}
|
||||
case 7: {
|
||||
Slice v = Slice(info.tablet_status);
|
||||
Slice v = Slice(info.medium);
|
||||
fill_column_with_slot<TYPE_VARCHAR>(column.get(), (void*)&v);
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
Slice v = Slice(info.priority);
|
||||
fill_column_with_slot<TYPE_VARCHAR>(column.get(), (void*)&v);
|
||||
break;
|
||||
}
|
||||
case 9: {
|
||||
Slice v = Slice(info.orig_priority);
|
||||
fill_column_with_slot<TYPE_VARCHAR>(column.get(), (void*)&v);
|
||||
break;
|
||||
}
|
||||
case 10: {
|
||||
if (info.last_priority_adjust_time > 0) {
|
||||
DateTimeValue v;
|
||||
v.from_unixtime(info.last_priority_adjust_time, _runtime_state->timezone_obj());
|
||||
fill_column_with_slot<TYPE_DATETIME>(column.get(), (void*)&v);
|
||||
} else {
|
||||
down_cast<NullableColumn*>(column.get())->append_nulls(1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 11: {
|
||||
fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.visible_version);
|
||||
break;
|
||||
}
|
||||
case 12: {
|
||||
fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.committed_version);
|
||||
break;
|
||||
}
|
||||
case 13: {
|
||||
fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.src_be_id);
|
||||
break;
|
||||
}
|
||||
case 14: {
|
||||
Slice v = Slice(info.src_path);
|
||||
fill_column_with_slot<TYPE_VARCHAR>(column.get(), (void*)&v);
|
||||
break;
|
||||
}
|
||||
case 15: {
|
||||
fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.dest_be_id);
|
||||
break;
|
||||
}
|
||||
case 16: {
|
||||
Slice v = Slice(info.dest_path);
|
||||
fill_column_with_slot<TYPE_VARCHAR>(column.get(), (void*)&v);
|
||||
break;
|
||||
}
|
||||
case 17: {
|
||||
fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.timeout);
|
||||
break;
|
||||
}
|
||||
case 18: {
|
||||
if (info.create_time > 0) {
|
||||
DateTimeValue v;
|
||||
v.from_unixtime(static_cast<int64_t>(info.create_time), _runtime_state->timezone_obj());
|
||||
|
|
@ -136,7 +197,7 @@ Status SchemaFeTabletSchedulesScanner::fill_chunk(ChunkPtr* chunk) {
|
|||
}
|
||||
break;
|
||||
}
|
||||
case 9: {
|
||||
case 19: {
|
||||
if (info.schedule_time > 0) {
|
||||
DateTimeValue v;
|
||||
v.from_unixtime(static_cast<int64_t>(info.schedule_time), _runtime_state->timezone_obj());
|
||||
|
|
@ -146,7 +207,7 @@ Status SchemaFeTabletSchedulesScanner::fill_chunk(ChunkPtr* chunk) {
|
|||
}
|
||||
break;
|
||||
}
|
||||
case 10: {
|
||||
case 20: {
|
||||
if (info.finish_time > 0) {
|
||||
DateTimeValue v;
|
||||
v.from_unixtime(static_cast<int64_t>(info.finish_time), _runtime_state->timezone_obj());
|
||||
|
|
@ -156,23 +217,27 @@ Status SchemaFeTabletSchedulesScanner::fill_chunk(ChunkPtr* chunk) {
|
|||
}
|
||||
break;
|
||||
}
|
||||
case 11: {
|
||||
fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.clone_src);
|
||||
break;
|
||||
}
|
||||
case 12: {
|
||||
fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.clone_dest);
|
||||
break;
|
||||
}
|
||||
case 13: {
|
||||
case 21: {
|
||||
fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.clone_bytes);
|
||||
break;
|
||||
}
|
||||
case 14: {
|
||||
case 22: {
|
||||
fill_column_with_slot<TYPE_DOUBLE>(column.get(), (void*)&info.clone_duration);
|
||||
break;
|
||||
}
|
||||
case 15: {
|
||||
case 23: {
|
||||
fill_column_with_slot<TYPE_DOUBLE>(column.get(), (void*)&info.clone_rate);
|
||||
break;
|
||||
}
|
||||
case 24: {
|
||||
fill_column_with_slot<TYPE_INT>(column.get(), (void*)&info.failed_schedule_count);
|
||||
break;
|
||||
}
|
||||
case 25: {
|
||||
fill_column_with_slot<TYPE_INT>(column.get(), (void*)&info.failed_running_count);
|
||||
break;
|
||||
}
|
||||
case 26: {
|
||||
Slice v = Slice(info.error_msg);
|
||||
fill_column_with_slot<TYPE_VARCHAR>(column.get(), (void*)&v);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ SchemaScanner::ColumnDesc SchemaPartitionsMetaScanner::_s_columns[] = {
|
|||
{"STORAGE_SIZE", TypeDescriptor::from_logical_type(TYPE_BIGINT), sizeof(int64_t), false},
|
||||
{"TABLET_BALANCED", TypeDescriptor::from_logical_type(TYPE_BOOLEAN), sizeof(bool), false},
|
||||
{"METADATA_SWITCH_VERSION", TypeDescriptor::from_logical_type(TYPE_BIGINT), sizeof(int64_t), false},
|
||||
{"PATH_ID", TypeDescriptor::from_logical_type(TYPE_BIGINT), sizeof(int64_t), false},
|
||||
};
|
||||
|
||||
SchemaPartitionsMetaScanner::SchemaPartitionsMetaScanner()
|
||||
|
|
@ -313,6 +314,11 @@ Status SchemaPartitionsMetaScanner::fill_chunk(ChunkPtr* chunk) {
|
|||
fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.metadata_switch_version);
|
||||
break;
|
||||
}
|
||||
case 32: {
|
||||
// PATH_ID
|
||||
fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.path_id);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
|
||||
#include "column/vectorized_fwd.h"
|
||||
|
|
@ -24,7 +23,7 @@
|
|||
#include "exec/workgroup/work_group_fwd.h"
|
||||
|
||||
namespace starrocks {
|
||||
class AggregatorParams;
|
||||
struct AggregatorParams;
|
||||
using AggregatorParamsPtr = std::shared_ptr<AggregatorParams>;
|
||||
}; // namespace starrocks
|
||||
|
||||
|
|
|
|||
|
|
@ -757,8 +757,7 @@ Status PartitionedSpillerWriter::_compact_skew_chunks(size_t num_rows, std::vect
|
|||
auto hash_set_sz = merger->hash_set_variant().size();
|
||||
merger->convert_hash_set_to_chunk(hash_set_sz, &chunk_merged);
|
||||
} else {
|
||||
merger->hash_map_variant().visit(
|
||||
[&](auto& hash_map_with_key) { merger->it_hash() = merger->_state_allocator.begin(); });
|
||||
merger->it_hash() = merger->state_allocator().begin();
|
||||
auto hash_map_sz = merger->hash_map_variant().size();
|
||||
RETURN_IF_ERROR(merger->convert_hash_map_to_chunk(hash_map_sz, &chunk_merged, true));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@
|
|||
#include "column/vectorized_fwd.h"
|
||||
#include "common/status.h"
|
||||
#include "exec/olap_scan_node.h"
|
||||
#include "service/backend_options.h"
|
||||
#include "storage/chunk_helper.h"
|
||||
#include "storage/column_predicate_rewriter.h"
|
||||
#include "storage/predicate_parser.h"
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <new>
|
||||
#include <type_traits>
|
||||
|
||||
#include "column/column.h"
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue