Compare commits

..

3 Commits

Author SHA1 Message Date
Cursor Agent 1c5347773f Refactor AnalyzeJoinTest to align USING semantics with SQL standard
Co-authored-by: 936797922 <936797922@qq.com>
2025-09-18 12:01:49 +00:00
Cursor Agent e9be84c804 Checkpoint before follow-up message
Co-authored-by: 936797922 <936797922@qq.com>
2025-09-18 12:00:09 +00:00
Cursor Agent e226d543d4 Fix: Deduplicate USING clause columns in join scope
Co-authored-by: 936797922 <936797922@qq.com>
2025-09-18 11:49:04 +00:00
784 changed files with 7583 additions and 40540 deletions

17
.gitignore vendored
View File

@ -33,7 +33,6 @@ fe_plugins/output
fe/mocked
fe/ut_ports
fe/*/target
fe/*/bin
fe/plugin/*/target/
fe/fe-core/gen
fe/fe-grammar/gen
@ -61,8 +60,6 @@ compile_commands.json
.classpath
.vimspector.json
.gdb_history
fe/plugin/hive-udf/target
fe/plugin/spark-dpp/target
# ignore generated files
StarRocksLex.tokens
@ -95,18 +92,6 @@ be/tags
build/
cmake-build-debug/
CMakeLists.txt
.claude
CLAUDE.md
.clangd
!build-mac/CMakeLists.txt
# build-mac generated files
build-mac/.ninja_deps
build-mac/.ninja_lock
build-mac/.ninja_log
build-mac/CMakeCache.txt
build-mac/CMakeFiles/
build-mac/build.ninja
build-mac/build_version.cc
build-mac/cmake_install.cmake

View File

@ -130,8 +130,6 @@ This project is used by the following companies. Learn more about their use case
- [Airbnb](https://www.youtube.com/watch?v=AzDxEZuMBwM&ab_channel=StarRocks_labs)
- [Airtable](https://medium.com/airtable-eng/live-shard-data-archive-export-and-ingestion-to-starrocks-for-validation-6af555e8b3fe)
- [Alibaba](https://www.youtube.com/watch?v=7N34q65mthk)
- [Celonis](https://www.youtube.com/watch?v=Jm-D0xvOtek)
- [Coinbase](https://www.youtube.com/watch?v=3Z9jSCaHnYg&list=PL0eWwaesODdhBhKSnvpfIEAB9sgk8rKmy)
- [Demandbase](https://starrocks.medium.com/demandbase-ditches-denormalization-by-switching-off-clickhouse-44195d795a83)
- [Didi](https://www.starrocks.io/blog/reduced-80-cost-didis-journey-from-multiple-olap-engines-to-starrocks)
@ -139,7 +137,6 @@ This project is used by the following companies. Learn more about their use case
- [Fanatics](https://www.youtube.com/watch?v=hbXovqR6tOc)
- [Fresha](https://medium.com/fresha-data-engineering/how-we-accidentally-became-one-of-uks-first-starrocks-production-pioneers-7db249f10010)
- [Grab](https://engineering.grab.com/building-a-spark-observability)
- [Haezoom](https://www.starrocks.io/blog/haezoom-and-cloudshift-overcome-apache-druids-limits-with-starrocks)
- [HerdWatch](https://medium.com/p/a7916a7e87bf)
- [Intuit](https://www.youtube.com/watch?v=tUC3FS3ki10)
- [iQiyi](https://medium.com/starrocks-engineering/real-time-analytics-at-scale-why-we-use-starrocks-0aa3c859cbeb)

View File

@ -650,7 +650,6 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Werror")
endif()
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-parameter -Wno-documentation -Wno-weak-vtables")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unknown-warning-option")
# Turn on following warning as error explicitly
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Werror=string-plus-int")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Werror=pessimizing-move")
@ -671,10 +670,6 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
# ignore warning from apache-orc
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-switch-default")
endif ()
# Add -rtlib=compiler-rt for ARM architecture to fix LLVM bug: https://bugs.llvm.org/show_bug.cgi?id=16404
if ("${CMAKE_BUILD_TARGET_ARCH}" STREQUAL "aarch64")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -rtlib=compiler-rt")
endif()
else ()
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "14.0.0")
# ignore error from apache-orc

View File

@ -848,22 +848,15 @@ void* ReportDataCacheMetricsTaskWorkerPool::_worker_thread_callback(void* arg_th
request.__set_report_version(g_report_version.load(std::memory_order_relaxed));
TDataCacheMetrics t_metrics{};
const LocalDiskCacheEngine* disk_cache = DataCache::GetInstance()->local_disk_cache();
const LocalMemCacheEngine* mem_cache = DataCache::GetInstance()->local_mem_cache();
bool disk_cache_inited = disk_cache != nullptr && disk_cache->is_initialized();
bool mem_cache_inited = mem_cache != nullptr && mem_cache->is_initialized();
if (!disk_cache_inited && !mem_cache_inited) {
t_metrics.__set_status(TDataCacheStatus::DISABLED);
// TODO: mem_metrics + disk_metrics
const LocalCacheEngine* cache = DataCache::GetInstance()->local_disk_cache();
if (cache != nullptr && cache->is_initialized()) {
const auto metrics = cache->cache_metrics();
DataCacheUtils::set_metrics_from_thrift(t_metrics, metrics);
} else {
if (mem_cache_inited) {
t_metrics.__set_status(TDataCacheStatus::NORMAL);
DataCacheUtils::set_metrics_to_thrift(t_metrics, mem_cache->cache_metrics());
}
if (disk_cache_inited) {
DataCacheUtils::set_metrics_to_thrift(t_metrics, disk_cache->cache_metrics());
}
t_metrics.__set_status(TDataCacheStatus::DISABLED);
}
request.__set_datacache_metrics(t_metrics);
TMasterResult result;

View File

@ -24,7 +24,7 @@
#include <filesystem>
#include <memory>
#include "cache/disk_cache/starcache_engine.h"
#include "cache/starcache_engine.h"
#include "starcache/common/types.h"
#include "util/logging.h"
#include "util/random.h"

View File

@ -18,9 +18,9 @@
#include <random>
#include "cache/cache_options.h"
#include "cache/disk_cache/starcache_engine.h"
#include "cache/mem_cache/lrucache_engine.h"
#include "cache/mem_cache/page_cache.h"
#include "cache/lrucache_engine.h"
#include "cache/object_cache/page_cache.h"
#include "cache/starcache_engine.h"
#include "common/config.h"
#include "runtime/current_thread.h"
#include "runtime/exec_env.h"
@ -105,13 +105,14 @@ std::string ObjectCacheBench::get_cache_type_str(CacheType type) {
}
void ObjectCacheBench::init_cache(CacheType cache_type) {
DiskCacheOptions opt;
CacheOptions opt;
opt.mem_space_size = _capacity;
opt.block_size = config::datacache_block_size;
opt.max_flying_memory_mb = config::datacache_max_flying_memory_mb;
opt.max_concurrent_inserts = config::datacache_max_concurrent_inserts;
opt.enable_checksum = config::datacache_checksum_enable;
opt.enable_direct_io = config::datacache_direct_io_enable;
opt.enable_tiered_cache = config::datacache_tiered_cache_enable;
opt.skip_read_factor = config::datacache_skip_read_factor;
opt.scheduler_threads_per_cpu = config::datacache_scheduler_threads_per_cpu;
opt.enable_datacache_persistence = false;
@ -145,7 +146,7 @@ void ObjectCacheBench::prepare_sequence_data(StoragePageCache* cache, int64_t co
auto* ptr = new std::vector<uint8_t>(_page_size);
(*ptr)[0] = 1;
PageCacheHandle handle;
MemCacheWriteOptions options;
ObjectCacheWriteOptions options;
Status st = cache->insert(key, ptr, options, &handle);
if (!st.ok()) {
if (!st.is_already_exist()) {
@ -161,7 +162,7 @@ void ObjectCacheBench::prepare_data(StoragePageCache* cache, int64_t count) {
auto* ptr = new std::vector<uint8_t>(_page_size);
(*ptr)[0] = 1;
PageCacheHandle handle;
MemCacheWriteOptions options;
ObjectCacheWriteOptions options;
Status st = cache->insert(key, ptr, options, &handle);
if (!st.ok()) {
if (!st.is_already_exist()) {
@ -210,7 +211,7 @@ void ObjectCacheBench::random_insert_multi_threads(benchmark::State* state, Stor
auto* ptr = new std::vector<uint8_t>(page_size);
(*ptr)[0] = 1;
PageCacheHandle handle;
MemCacheWriteOptions options;
ObjectCacheWriteOptions options;
Status st = cache->insert(key, ptr, options, &handle);
if (!st.ok()) {
if (!st.is_already_exist()) {

View File

@ -23,15 +23,15 @@ set(CACHE_FILES
mem_space_monitor.cpp
datacache.cpp
datacache_utils.cpp
mem_cache/lrucache_engine.cpp
mem_cache/page_cache.cpp
disk_cache/block_cache.cpp
disk_cache/io_buffer.cpp
disk_cache/block_cache_hit_rate_counter.hpp
lrucache_engine.cpp
block_cache/block_cache.cpp
block_cache/io_buffer.cpp
block_cache/block_cache_hit_rate_counter.hpp
object_cache/page_cache.cpp
)
if (${WITH_STARCACHE} STREQUAL "ON")
list(APPEND CACHE_FILES disk_cache/starcache_engine.cpp)
list(APPEND CACHE_FILES starcache_engine.cpp)
list(APPEND CACHE_FILES peer_cache_engine.cpp)
endif()

View File

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cache/disk_cache/block_cache.h"
#include "cache/block_cache/block_cache.h"
#include <fmt/format.h>
@ -36,7 +36,7 @@ BlockCache::~BlockCache() {
(void)shutdown();
}
Status BlockCache::init(const BlockCacheOptions& options, std::shared_ptr<LocalDiskCacheEngine> local_cache,
Status BlockCache::init(const BlockCacheOptions& options, std::shared_ptr<LocalCacheEngine> local_cache,
std::shared_ptr<RemoteCacheEngine> remote_cache) {
_block_size = std::min(options.block_size, MAX_BLOCK_SIZE);
_local_cache = std::move(local_cache);
@ -45,8 +45,7 @@ Status BlockCache::init(const BlockCacheOptions& options, std::shared_ptr<LocalD
return Status::OK();
}
Status BlockCache::write(const CacheKey& cache_key, off_t offset, const IOBuffer& buffer,
DiskCacheWriteOptions* options) {
Status BlockCache::write(const CacheKey& cache_key, off_t offset, const IOBuffer& buffer, WriteCacheOptions* options) {
if (offset % _block_size != 0) {
LOG(WARNING) << "write block key: " << cache_key << " with invalid args, offset: " << offset;
return Status::InvalidArgument(strings::Substitute("offset must be aligned by block size $0", _block_size));
@ -63,7 +62,7 @@ Status BlockCache::write(const CacheKey& cache_key, off_t offset, const IOBuffer
static void empty_deleter(void*) {}
Status BlockCache::write(const CacheKey& cache_key, off_t offset, size_t size, const char* data,
DiskCacheWriteOptions* options) {
WriteCacheOptions* options) {
if (!data) {
return Status::InvalidArgument("invalid data buffer");
}
@ -74,7 +73,7 @@ Status BlockCache::write(const CacheKey& cache_key, off_t offset, size_t size, c
}
Status BlockCache::read(const CacheKey& cache_key, off_t offset, size_t size, IOBuffer* buffer,
DiskCacheReadOptions* options) {
ReadCacheOptions* options) {
if (size == 0) {
return Status::OK();
}
@ -85,7 +84,7 @@ Status BlockCache::read(const CacheKey& cache_key, off_t offset, size_t size, IO
}
StatusOr<size_t> BlockCache::read(const CacheKey& cache_key, off_t offset, size_t size, char* data,
DiskCacheReadOptions* options) {
ReadCacheOptions* options) {
IOBuffer buffer;
RETURN_IF_ERROR(read(cache_key, offset, size, &buffer, options));
buffer.copy_to(data);
@ -118,7 +117,7 @@ Status BlockCache::remove(const CacheKey& cache_key, off_t offset, size_t size)
}
Status BlockCache::read_buffer_from_remote_cache(const std::string& cache_key, size_t offset, size_t size,
IOBuffer* buffer, DiskCacheReadOptions* options) {
IOBuffer* buffer, ReadCacheOptions* options) {
if (size == 0) {
return Status::OK();
}

View File

@ -16,16 +16,12 @@
#include <atomic>
#include "cache/disk_cache/local_disk_cache_engine.h"
#include "cache/local_cache_engine.h"
#include "cache/remote_cache_engine.h"
#include "common/status.h"
namespace starrocks {
struct BlockCacheOptions {
size_t block_size = 0;
};
class BlockCache {
public:
using CacheKey = std::string;
@ -37,23 +33,22 @@ public:
~BlockCache();
// Init the block cache instance
Status init(const BlockCacheOptions& options, std::shared_ptr<LocalDiskCacheEngine> local_cache,
Status init(const BlockCacheOptions& options, std::shared_ptr<LocalCacheEngine> local_cache,
std::shared_ptr<RemoteCacheEngine> remote_cache);
// Write data buffer to cache, the `offset` must be aligned by block size
Status write(const CacheKey& cache_key, off_t offset, const IOBuffer& buffer,
DiskCacheWriteOptions* options = nullptr);
Status write(const CacheKey& cache_key, off_t offset, const IOBuffer& buffer, WriteCacheOptions* options = nullptr);
Status write(const CacheKey& cache_key, off_t offset, size_t size, const char* data,
DiskCacheWriteOptions* options = nullptr);
WriteCacheOptions* options = nullptr);
// Read data from cache, it returns the data size if successful; otherwise the error status
// will be returned. The offset and size must be aligned by block size.
Status read(const CacheKey& cache_key, off_t offset, size_t size, IOBuffer* buffer,
DiskCacheReadOptions* options = nullptr);
ReadCacheOptions* options = nullptr);
StatusOr<size_t> read(const CacheKey& cache_key, off_t offset, size_t size, char* data,
DiskCacheReadOptions* options = nullptr);
ReadCacheOptions* options = nullptr);
bool exist(const CacheKey& cache_key, off_t offset, size_t size) const;
@ -62,7 +57,7 @@ public:
// Read data from remote cache
Status read_buffer_from_remote_cache(const std::string& cache_key, size_t offset, size_t size, IOBuffer* buffer,
DiskCacheReadOptions* options);
ReadCacheOptions* options);
void record_read_local_cache(size_t size, int64_t latency_us);
@ -78,14 +73,15 @@ public:
bool is_initialized() const { return _initialized.load(std::memory_order_relaxed); }
bool available() const { return is_initialized() && _local_cache->available(); }
bool mem_cache_available() const { return is_initialized() && _local_cache->mem_cache_available(); }
std::shared_ptr<LocalDiskCacheEngine> local_cache() { return _local_cache; }
std::shared_ptr<LocalCacheEngine> local_cache() { return _local_cache; }
static const size_t MAX_BLOCK_SIZE;
private:
size_t _block_size = 0;
std::shared_ptr<LocalDiskCacheEngine> _local_cache;
std::shared_ptr<LocalCacheEngine> _local_cache;
std::shared_ptr<RemoteCacheEngine> _remote_cache;
std::atomic<bool> _initialized = false;
};

View File

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cache/disk_cache/io_buffer.h"
#include "cache/block_cache/io_buffer.h"
#include "gutil/strings/fastmem.h"

View File

@ -55,4 +55,19 @@ struct DataCacheStatusUtils {
}
};
struct DataCacheMetrics {
DataCacheStatus status;
size_t mem_quota_bytes;
size_t mem_used_bytes;
size_t disk_quota_bytes;
size_t disk_used_bytes;
size_t meta_used_bytes = 0;
};
#ifdef WITH_STARCACHE
using StarCacheMetrics = starcache::CacheMetrics;
using StarCacheStatus = starcache::CacheStatus;
#endif
} // namespace starrocks

View File

@ -14,8 +14,17 @@
#pragma once
#include <cstdint>
#include <functional>
#include <string>
#include <vector>
#include "cache/cache_metrics.h"
#include "common/status.h"
namespace starrocks {
// Options to control how to create DataCache instance
struct DataCacheOptions {
bool enable_datacache = false;
bool enable_cache_select = false;
@ -28,4 +37,82 @@ struct DataCacheOptions {
int64_t datacache_ttl_seconds = 0;
};
struct DirSpace {
std::string path;
size_t size;
};
struct RemoteCacheOptions {
double skip_read_factor = 0;
};
struct MemCacheOptions {
size_t mem_space_size = 0;
};
struct DiskCacheOptions {
// basic
size_t mem_space_size = 0;
std::vector<DirSpace> dir_spaces;
std::string meta_path;
// advanced
size_t block_size = 0;
bool enable_checksum = false;
bool enable_direct_io = false;
bool enable_tiered_cache = true;
bool enable_datacache_persistence = false;
size_t max_concurrent_inserts = 0;
size_t max_flying_memory_mb = 0;
double scheduler_threads_per_cpu = 0;
double skip_read_factor = 0;
uint32_t inline_item_count_limit = 0;
std::string eviction_policy;
};
struct BlockCacheOptions {
size_t block_size = 0;
};
struct WriteCacheOptions {
int8_t priority = 0;
// If ttl_seconds=0 (default), no ttl restriction will be set. If an old one exists, remove it.
uint64_t ttl_seconds = 0;
// If overwrite=true, the cache value will be replaced if it already exists.
bool overwrite = false;
bool async = false;
// When allow_zero_copy=true, it means the caller can ensure the target buffer not be released before
// to write finish. So the cache library can use the buffer directly without copying it to another buffer.
bool allow_zero_copy = false;
std::function<void(int, const std::string&)> callback = nullptr;
// The probability to evict other items if the cache space is full, which can help avoid frequent cache replacement
// and improve cache hit rate sometimes.
// It is expressed as a percentage. If evict_probability is 10, it means the probability to evict other data is 10%.
int32_t evict_probability = 100;
// The base frequency for target cache.
// When using multiple segment lru, a higher frequency may cause the cache is written to warm segment directly.
// For the default cache options, that `lru_segment_freq_bits` is 0:
// * The default `frequency=0` indicates the cache will be written to cold segment.
// * A frequency value greater than 0 indicates writing this cache directly to the warm segment.
int8_t frequency = 0;
struct Stats {
int64_t write_mem_bytes = 0;
int64_t write_disk_bytes = 0;
} stats;
};
struct ReadCacheOptions {
bool use_adaptor = false;
std::string remote_host;
int32_t remote_port;
struct Stats {
int64_t read_mem_bytes = 0;
int64_t read_disk_bytes = 0;
} stats;
};
} // namespace starrocks

View File

@ -16,9 +16,9 @@
#include "cache/datacache_utils.h"
#include "cache/disk_space_monitor.h"
#include "cache/mem_cache/lrucache_engine.h"
#include "cache/mem_cache/page_cache.h"
#include "cache/lrucache_engine.h"
#include "cache/mem_space_monitor.h"
#include "cache/object_cache/page_cache.h"
#include "common/status.h"
#include "gutil/strings/split.h"
#include "gutil/strings/strip.h"
@ -27,8 +27,8 @@
#include "util/parse_util.h"
#ifdef WITH_STARCACHE
#include "cache/disk_cache/starcache_engine.h"
#include "cache/peer_cache_engine.h"
#include "cache/starcache_engine.h"
#endif
namespace starrocks {
@ -44,6 +44,11 @@ Status DataCache::init(const std::vector<StorePath>& store_paths) {
_block_cache = std::make_shared<BlockCache>();
_page_cache = std::make_shared<StoragePageCache>();
#if defined(WITH_STARCACHE)
_local_disk_cache_engine = "starcache";
#endif
_local_mem_cache_engine = "lrucache";
if (!config::datacache_enable) {
config::disable_storage_page_cache = true;
config::block_cache_enable = false;
@ -168,88 +173,89 @@ BlockCacheOptions DataCache::_init_block_cache_options() {
return cache_options;
}
#if defined(WITH_STARCACHE)
StatusOr<DiskCacheOptions> DataCache::_init_disk_cache_options() {
DiskCacheOptions cache_options;
if (_local_disk_cache_engine == "starcache") {
#ifdef USE_STAROS
std::vector<string> corresponding_starlet_dirs;
if (config::datacache_unified_instance_enable && !config::starlet_cache_dir.empty()) {
// in older versions, users might set `starlet_cache_dir` instead of `storage_root_path` for starlet cache,
// we need to move starlet cache into storage_root_path/datacache
auto s = DataCacheUtils::get_corresponding_starlet_cache_dir(_store_paths, config::starlet_cache_dir);
if (!s.ok()) {
LOG(WARNING) << s.status().message() << ", change config::datacache_unified_instance_enable to false";
config::datacache_unified_instance_enable = false;
} else {
corresponding_starlet_dirs = *s;
}
}
int idx = 0;
#endif
for (auto& root_path : _store_paths) {
// Because we have unified the datacache between datalake and starlet, we also need to unify the
// cache path and quota.
// To reuse the old cache data in `starlet_cache` directory, we try to rename it to the new `datacache`
// directory if it exists. To avoid the risk of cross disk renaming of a large amount of cached data,
// we do not automatically rename it when the source and destination directories are on different disks.
// In this case, users should manually remount the directories and restart them.
std::string datacache_path = root_path.path + "/datacache";
#ifdef USE_STAROS
if (config::datacache_unified_instance_enable) {
std::string starlet_cache_path;
if (idx < corresponding_starlet_dirs.size()) {
starlet_cache_path = corresponding_starlet_dirs[idx++];
std::vector<string> corresponding_starlet_dirs;
if (config::datacache_unified_instance_enable && !config::starlet_cache_dir.empty()) {
// in older versions, users might set `starlet_cache_dir` instead of `storage_root_path` for starlet cache,
// we need to move starlet cache into storage_root_path/datacache
auto s = DataCacheUtils::get_corresponding_starlet_cache_dir(_store_paths, config::starlet_cache_dir);
if (!s.ok()) {
LOG(WARNING) << s.status().message() << ", change config::datacache_unified_instance_enable to false";
config::datacache_unified_instance_enable = false;
} else {
starlet_cache_path = root_path.path + "/starlet_cache/star_cache";
corresponding_starlet_dirs = *s;
}
RETURN_IF_ERROR(DataCacheUtils::change_disk_path(starlet_cache_path, datacache_path));
}
int idx = 0;
#endif
// Create it if not exist
Status st = FileSystem::Default()->create_dir_if_missing(datacache_path);
if (!st.ok()) {
LOG(ERROR) << "Fail to create datacache directory: " << datacache_path << ", reason: " << st.message();
return Status::InternalError("Fail to create datacache directory");
}
ASSIGN_OR_RETURN(int64_t disk_size, DataCacheUtils::parse_conf_datacache_disk_size(
datacache_path, config::datacache_disk_size, -1));
for (auto& root_path : _store_paths) {
// Because we have unified the datacache between datalake and starlet, we also need to unify the
// cache path and quota.
// To reuse the old cache data in `starlet_cache` directory, we try to rename it to the new `datacache`
// directory if it exists. To avoid the risk of cross disk renaming of a large amount of cached data,
// we do not automatically rename it when the source and destination directories are on different disks.
// In this case, users should manually remount the directories and restart them.
std::string datacache_path = root_path.path + "/datacache";
#ifdef USE_STAROS
// If the `datacache_disk_size` is manually set a positive value, we will use the maximum cache quota between
// dataleke and starlet cache as the quota of the unified cache. Otherwise, the cache quota will remain zero
// and then automatically adjusted based on the current avalible disk space.
if (config::datacache_unified_instance_enable &&
(!config::enable_datacache_disk_auto_adjust || disk_size > 0)) {
ASSIGN_OR_RETURN(
int64_t starlet_cache_size,
DataCacheUtils::parse_conf_datacache_disk_size(
datacache_path, fmt::format("{}%", config::starlet_star_cache_disk_size_percent), -1));
disk_size = std::max(disk_size, starlet_cache_size);
}
if (config::datacache_unified_instance_enable) {
std::string starlet_cache_path;
if (idx < corresponding_starlet_dirs.size()) {
starlet_cache_path = corresponding_starlet_dirs[idx++];
} else {
starlet_cache_path = root_path.path + "/starlet_cache/star_cache";
}
RETURN_IF_ERROR(DataCacheUtils::change_disk_path(starlet_cache_path, datacache_path));
}
#endif
cache_options.dir_spaces.push_back({.path = datacache_path, .size = static_cast<size_t>(disk_size)});
}
// Create it if not exist
Status st = FileSystem::Default()->create_dir_if_missing(datacache_path);
if (!st.ok()) {
LOG(ERROR) << "Fail to create datacache directory: " << datacache_path << ", reason: " << st.message();
return Status::InternalError("Fail to create datacache directory");
}
if (cache_options.dir_spaces.empty()) {
config::enable_datacache_disk_auto_adjust = false;
}
ASSIGN_OR_RETURN(int64_t disk_size, DataCacheUtils::parse_conf_datacache_disk_size(
datacache_path, config::datacache_disk_size, -1));
#ifdef USE_STAROS
// If the `datacache_disk_size` is manually set a positive value, we will use the maximum cache quota between
// dataleke and starlet cache as the quota of the unified cache. Otherwise, the cache quota will remain zero
// and then automatically adjusted based on the current avalible disk space.
if (config::datacache_unified_instance_enable &&
(!config::enable_datacache_disk_auto_adjust || disk_size > 0)) {
ASSIGN_OR_RETURN(
int64_t starlet_cache_size,
DataCacheUtils::parse_conf_datacache_disk_size(
datacache_path, fmt::format("{}%", config::starlet_star_cache_disk_size_percent), -1));
disk_size = std::max(disk_size, starlet_cache_size);
}
#endif
cache_options.dir_spaces.push_back({.path = datacache_path, .size = static_cast<size_t>(disk_size)});
}
cache_options.block_size = config::datacache_block_size;
cache_options.max_flying_memory_mb = config::datacache_max_flying_memory_mb;
cache_options.max_concurrent_inserts = config::datacache_max_concurrent_inserts;
cache_options.enable_checksum = config::datacache_checksum_enable;
cache_options.enable_direct_io = config::datacache_direct_io_enable;
cache_options.skip_read_factor = config::datacache_skip_read_factor;
cache_options.scheduler_threads_per_cpu = config::datacache_scheduler_threads_per_cpu;
cache_options.enable_datacache_persistence = config::datacache_persistence_enable;
cache_options.inline_item_count_limit = config::datacache_inline_item_count_limit;
cache_options.eviction_policy = config::datacache_eviction_policy;
if (cache_options.dir_spaces.empty()) {
config::enable_datacache_disk_auto_adjust = false;
}
cache_options.block_size = config::datacache_block_size;
cache_options.max_flying_memory_mb = config::datacache_max_flying_memory_mb;
cache_options.max_concurrent_inserts = config::datacache_max_concurrent_inserts;
cache_options.enable_checksum = config::datacache_checksum_enable;
cache_options.enable_direct_io = config::datacache_direct_io_enable;
cache_options.enable_tiered_cache = config::datacache_tiered_cache_enable;
cache_options.skip_read_factor = config::datacache_skip_read_factor;
cache_options.scheduler_threads_per_cpu = config::datacache_scheduler_threads_per_cpu;
cache_options.enable_datacache_persistence = config::datacache_persistence_enable;
cache_options.inline_item_count_limit = config::datacache_inline_item_count_limit;
cache_options.eviction_policy = config::datacache_eviction_policy;
}
return cache_options;
}
#endif
static bool parse_resource_str(const string& str, string* value) {
if (!str.empty()) {
@ -283,7 +289,7 @@ void DataCache::try_release_resource_before_core_dump() {
};
if (_local_mem_cache != nullptr && need_release("data_cache")) {
(void)_local_mem_cache->update_mem_quota(0);
(void)_local_mem_cache->update_mem_quota(0, false);
}
}

View File

@ -14,9 +14,8 @@
#pragma once
#include "cache/disk_cache/block_cache.h"
#include "cache/disk_cache/local_disk_cache_engine.h"
#include "cache/mem_cache/local_mem_cache_engine.h"
#include "cache/block_cache/block_cache.h"
#include "cache/local_cache_engine.h"
#include "common/status.h"
namespace starrocks {
@ -40,13 +39,16 @@ public:
void try_release_resource_before_core_dump();
void set_local_disk_cache(std::shared_ptr<LocalDiskCacheEngine> local_disk_cache) {
void set_local_mem_cache(std::shared_ptr<LocalCacheEngine> local_mem_cache) {
_local_mem_cache = std::move(local_mem_cache);
}
void set_local_disk_cache(std::shared_ptr<LocalCacheEngine> local_disk_cache) {
_local_disk_cache = std::move(local_disk_cache);
}
void set_page_cache(std::shared_ptr<StoragePageCache> page_cache) { _page_cache = std::move(page_cache); }
LocalMemCacheEngine* local_mem_cache() { return _local_mem_cache.get(); }
LocalDiskCacheEngine* local_disk_cache() { return _local_disk_cache.get(); }
LocalCacheEngine* local_mem_cache() { return _local_mem_cache.get(); }
LocalCacheEngine* local_disk_cache() { return _local_disk_cache.get(); }
BlockCache* block_cache() const { return _block_cache.get(); }
void set_block_cache(std::shared_ptr<BlockCache> block_cache) { _block_cache = std::move(block_cache); }
StoragePageCache* page_cache() const { return _page_cache.get(); }
@ -61,11 +63,11 @@ public:
private:
StatusOr<MemCacheOptions> _init_mem_cache_options();
StatusOr<DiskCacheOptions> _init_disk_cache_options();
RemoteCacheOptions _init_remote_cache_options();
BlockCacheOptions _init_block_cache_options();
#if defined(WITH_STARCACHE)
StatusOr<DiskCacheOptions> _init_disk_cache_options();
Status _init_starcache_engine(DiskCacheOptions* cache_options);
Status _init_peer_cache(const RemoteCacheOptions& cache_options);
#endif
@ -76,8 +78,10 @@ private:
std::vector<StorePath> _store_paths;
// cache engine
std::shared_ptr<LocalMemCacheEngine> _local_mem_cache;
std::shared_ptr<LocalDiskCacheEngine> _local_disk_cache;
std::string _local_mem_cache_engine;
std::string _local_disk_cache_engine;
std::shared_ptr<LocalCacheEngine> _local_mem_cache;
std::shared_ptr<LocalCacheEngine> _local_disk_cache;
std::shared_ptr<RemoteCacheEngine> _remote_cache;
std::shared_ptr<BlockCache> _block_cache;

View File

@ -21,29 +21,26 @@
#include "absl/status/statusor.h"
#include "absl/strings/str_split.h"
#include "cache/mem_cache/local_mem_cache_engine.h"
#include "fs/fs.h"
#include "gutil/strings/split.h"
#include "util/parse_util.h"
namespace starrocks {
void DataCacheUtils::set_metrics_to_thrift(TDataCacheMetrics& t_metrics, const DataCacheDiskMetrics& metrics) {
void DataCacheUtils::set_metrics_from_thrift(TDataCacheMetrics& t_metrics, const DataCacheMetrics& metrics) {
t_metrics.__set_status(DataCacheStatusUtils::to_thrift(metrics.status));
t_metrics.__set_disk_quota_bytes(metrics.disk_quota_bytes);
t_metrics.__set_disk_used_bytes(metrics.disk_used_bytes);
}
void DataCacheUtils::set_metrics_to_thrift(TDataCacheMetrics& t_metrics, const DataCacheMemMetrics& metrics) {
t_metrics.__set_mem_quota_bytes(metrics.mem_quota_bytes);
t_metrics.__set_mem_used_bytes(metrics.mem_used_bytes);
}
#ifdef WITH_STARCACHE
void DataCacheUtils::set_disk_metrics_to_thrift(TDataCacheMetrics& t_metrics, const StarCacheMetrics& metrics) {
void DataCacheUtils::set_metrics_from_thrift(TDataCacheMetrics& t_metrics, const StarCacheMetrics& metrics) {
t_metrics.__set_status(DataCacheStatusUtils::to_thrift(static_cast<DataCacheStatus>(metrics.status)));
t_metrics.__set_disk_quota_bytes(metrics.disk_quota_bytes);
t_metrics.__set_disk_used_bytes(metrics.disk_used_bytes);
t_metrics.__set_mem_quota_bytes(metrics.mem_quota_bytes);
t_metrics.__set_mem_used_bytes(metrics.mem_used_bytes);
}
#endif

View File

@ -15,21 +15,18 @@
#pragma once
#include "cache/cache_metrics.h"
#include "cache/disk_cache/starcache_engine.h"
#include "cache/local_cache_engine.h"
#include "gen_cpp/DataCache_types.h"
#include "storage/options.h"
namespace starrocks {
class DataCacheMemMetrics;
class DataCacheUtils {
public:
static void set_metrics_to_thrift(TDataCacheMetrics& t_metrics, const DataCacheDiskMetrics& metrics);
static void set_metrics_to_thrift(TDataCacheMetrics& t_metrics, const DataCacheMemMetrics& metrics);
static void set_metrics_from_thrift(TDataCacheMetrics& t_metrics, const DataCacheMetrics& metrics);
#ifdef WITH_STARCACHE
static void set_disk_metrics_to_thrift(TDataCacheMetrics& t_metrics, const StarCacheMetrics& metrics);
static void set_metrics_from_thrift(TDataCacheMetrics& t_metrics, const StarCacheMetrics& metrics);
#endif
static Status parse_conf_datacache_mem_size(const std::string& conf_mem_size_str, int64_t mem_limit,

View File

@ -1,137 +0,0 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "cache/cache_metrics.h"
#include "cache/disk_cache/io_buffer.h"
#include "common/status.h"
namespace starrocks {
struct DirSpace {
std::string path;
size_t size;
};
struct DiskCacheOptions {
// basic
size_t mem_space_size = 0;
std::vector<DirSpace> dir_spaces;
std::string meta_path;
// advanced
size_t block_size = 0;
bool enable_checksum = false;
bool enable_direct_io = false;
bool enable_datacache_persistence = false;
size_t max_concurrent_inserts = 0;
size_t max_flying_memory_mb = 0;
double scheduler_threads_per_cpu = 0;
double skip_read_factor = 0;
uint32_t inline_item_count_limit = 0;
std::string eviction_policy;
};
struct DiskCacheWriteOptions {
int8_t priority = 0;
// If ttl_seconds=0 (default), no ttl restriction will be set. If an old one exists, remove it.
uint64_t ttl_seconds = 0;
// If overwrite=true, the cache value will be replaced if it already exists.
bool overwrite = false;
bool async = false;
// When allow_zero_copy=true, it means the caller can ensure the target buffer not be released before
// to write finish. So the cache library can use the buffer directly without copying it to another buffer.
bool allow_zero_copy = false;
std::function<void(int, const std::string&)> callback = nullptr;
// The base frequency for target cache.
// When using multiple segment lru, a higher frequency may cause the cache is written to warm segment directly.
// For the default cache options, that `lru_segment_freq_bits` is 0:
// * The default `frequency=0` indicates the cache will be written to cold segment.
// * A frequency value greater than 0 indicates writing this cache directly to the warm segment.
int8_t frequency = 0;
struct Stats {
int64_t write_mem_bytes = 0;
int64_t write_disk_bytes = 0;
} stats;
};
struct DiskCacheReadOptions {
bool use_adaptor = false;
std::string remote_host;
int32_t remote_port;
struct Stats {
int64_t read_mem_bytes = 0;
int64_t read_disk_bytes = 0;
} stats;
};
struct DataCacheDiskMetrics {
DataCacheStatus status;
size_t disk_quota_bytes;
size_t disk_used_bytes;
};
class LocalDiskCacheEngine {
public:
virtual ~LocalDiskCacheEngine() = default;
virtual bool is_initialized() const = 0;
// Write data to cache
virtual Status write(const std::string& key, const IOBuffer& buffer, DiskCacheWriteOptions* options) = 0;
// Read data from cache, it returns the data size if successful; otherwise the error status
// will be returned.
virtual Status read(const std::string& key, size_t off, size_t size, IOBuffer* buffer,
DiskCacheReadOptions* options) = 0;
virtual bool exist(const std::string& key) const = 0;
// Remove data from cache.
virtual Status remove(const std::string& key) = 0;
// Update the datacache disk space information, such as disk quota or disk path.
virtual Status update_disk_spaces(const std::vector<DirSpace>& spaces) = 0;
// Update the datacache inline cache count limit
virtual Status update_inline_cache_count_limit(int32_t limit) = 0;
virtual const DataCacheDiskMetrics cache_metrics() const = 0;
virtual void record_read_remote(size_t size, int64_t latency_us) = 0;
virtual void record_read_cache(size_t size, int64_t latency_us) = 0;
virtual Status shutdown() = 0;
virtual bool has_disk_cache() const = 0;
virtual bool available() const = 0;
virtual void disk_spaces(std::vector<DirSpace>* spaces) const = 0;
// Get the lookup count, including cache hit count and cache miss count.
virtual size_t lookup_count() const = 0;
// Get the cache hit count.
virtual size_t hit_count() const = 0;
// Remove all cache entries that are not actively in use.
virtual Status prune() = 0;
};
} // namespace starrocks

View File

@ -224,10 +224,10 @@ dev_t DiskSpace::FileSystemWrapper::device_id(const std::string& path) {
return DataCacheUtils::disk_device_id(path);
}
DiskSpaceMonitor::DiskSpaceMonitor(LocalDiskCacheEngine* cache)
DiskSpaceMonitor::DiskSpaceMonitor(LocalCacheEngine* cache)
: _cache(cache), _fs(std::make_shared<DiskSpace::FileSystemWrapper>()) {}
DiskSpaceMonitor::DiskSpaceMonitor(LocalDiskCacheEngine* cache, std::shared_ptr<DiskSpace::FileSystemWrapper> fs)
DiskSpaceMonitor::DiskSpaceMonitor(LocalCacheEngine* cache, std::shared_ptr<DiskSpace::FileSystemWrapper> fs)
: _cache(cache), _fs(std::move(fs)) {}
DiskSpaceMonitor::~DiskSpaceMonitor() {

View File

@ -20,7 +20,7 @@
#include <unordered_map>
#include "cache/cache_options.h"
#include "cache/disk_cache/local_disk_cache_engine.h"
#include "cache/local_cache_engine.h"
#include "common/status.h"
#include "fs/fs.h"
#include "util/disk_info.h"
@ -118,8 +118,8 @@ private:
class DiskSpaceMonitor {
public:
DiskSpaceMonitor(LocalDiskCacheEngine* cache);
DiskSpaceMonitor(LocalDiskCacheEngine* cache, std::shared_ptr<DiskSpace::FileSystemWrapper> fs);
DiskSpaceMonitor(LocalCacheEngine* cache);
DiskSpaceMonitor(LocalCacheEngine* cache, std::shared_ptr<DiskSpace::FileSystemWrapper> fs);
~DiskSpaceMonitor();
Status init(std::vector<DirSpace>* dir_spaces);
@ -152,7 +152,7 @@ private:
size_t _total_cache_usage = 0;
size_t _total_cache_quota = 0;
LocalDiskCacheEngine* _cache = nullptr;
LocalCacheEngine* _cache = nullptr;
std::shared_ptr<DiskSpace::FileSystemWrapper> _fs = nullptr;
};

View File

@ -14,69 +14,46 @@
#pragma once
#include "cache/cache_metrics.h"
#include "cache/disk_cache/io_buffer.h"
#include "cache/block_cache/io_buffer.h"
#include "cache/cache_options.h"
#include "cache/object_cache/cache_types.h"
#include "common/status.h"
namespace starrocks {
class CacheKey;
struct MemCacheOptions {
size_t mem_space_size = 0;
};
enum class LocalCacheEngineType { STARCACHE, LRUCACHE };
struct MemCacheWriteOptions {
// The priority of the cache object, only support 0 and 1 now.
int8_t priority = 0;
// The probability to evict other items if the cache space is full, which can help avoid frequent cache replacement
// and improve cache hit rate sometimes.
// It is expressed as a percentage. If evict_probability is 10, it means the probability to evict other data is 10%.
int32_t evict_probability = 100;
};
struct MemCacheReadOptions {};
struct MemCacheHandle {};
using MemCacheHandlePtr = MemCacheHandle*;
// using CacheDeleter = std::function<void(const std::string&, void*)>;
//
// We only use the deleter function of the lru cache temporarily.
// Maybe a std::function object or a function pointer like `void (*)(std::string&, void*)` which
// independent on lru cache is more appropriate, but it is not easy to convert them to the lru
// cache deleter when using a lru cache module.
using MemCacheDeleter = void (*)(const CacheKey&, void*);
struct DataCacheMemMetrics {
size_t mem_quota_bytes = 0;
size_t mem_used_bytes = 0;
};
class LocalMemCacheEngine {
class LocalCacheEngine {
public:
virtual ~LocalMemCacheEngine() = default;
virtual ~LocalCacheEngine() = default;
virtual bool is_initialized() const = 0;
// Write data to cache
virtual Status write(const std::string& key, const IOBuffer& buffer, WriteCacheOptions* options) = 0;
// Read data from cache, it returns the data size if successful; otherwise the error status
// will be returned.
virtual Status read(const std::string& key, size_t off, size_t size, IOBuffer* buffer,
ReadCacheOptions* options) = 0;
// Insert object to cache
virtual Status insert(const std::string& key, void* value, size_t size, MemCacheDeleter deleter,
MemCacheHandlePtr* handle, const MemCacheWriteOptions& options) = 0;
virtual Status insert(const std::string& key, void* value, size_t size, ObjectCacheDeleter deleter,
ObjectCacheHandlePtr* handle, const ObjectCacheWriteOptions& options) = 0;
// Lookup object from cache, the `handle` wraps the object pointer.
// As long as the handle object is not destroyed and the user does not manually call the `handle->release()`
// function, the corresponding pointer will never be freed by the cache system.
virtual Status lookup(const std::string& key, MemCacheHandlePtr* handle,
MemCacheReadOptions* options = nullptr) = 0;
virtual Status lookup(const std::string& key, ObjectCacheHandlePtr* handle,
ObjectCacheReadOptions* options = nullptr) = 0;
// Release a handle returned by a previous insert() or lookup().
// The handle must have not been released yet.
virtual void release(MemCacheHandlePtr handle) = 0;
virtual void release(ObjectCacheHandlePtr handle) = 0;
// Return the value in the given handle returned by a previous insert() or lookup().
// The handle must have not been released yet.
virtual const void* value(MemCacheHandlePtr handle) = 0;
virtual const void* value(ObjectCacheHandlePtr handle) = 0;
virtual bool exist(const std::string& key) const = 0;
@ -88,15 +65,29 @@ public:
virtual Status adjust_mem_quota(int64_t delta, size_t min_capacity) = 0;
// Update the datacache memory quota.
virtual Status update_mem_quota(size_t quota_bytes) = 0;
virtual Status update_mem_quota(size_t quota_bytes, bool flush_to_disk) = 0;
virtual const DataCacheMemMetrics cache_metrics() const = 0;
// Update the datacache disk space information, such as disk quota or disk path.
virtual Status update_disk_spaces(const std::vector<DirSpace>& spaces) = 0;
// Update the datacache inline cache count limit
virtual Status update_inline_cache_count_limit(int32_t limit) = 0;
virtual const DataCacheMetrics cache_metrics() const = 0;
virtual void record_read_remote(size_t size, int64_t latency_us) = 0;
virtual void record_read_cache(size_t size, int64_t latency_us) = 0;
virtual Status shutdown() = 0;
virtual LocalCacheEngineType engine_type() = 0;
virtual bool has_mem_cache() const = 0;
virtual bool has_disk_cache() const = 0;
virtual bool available() const = 0;
virtual bool mem_cache_available() const = 0;
virtual void disk_spaces(std::vector<DirSpace>* spaces) const = 0;
virtual size_t mem_quota() const = 0;
virtual size_t mem_usage() const = 0;
@ -107,17 +98,11 @@ public:
// Get the cache hit count.
virtual size_t hit_count() const = 0;
// Get the insert count.
virtual size_t insert_count() const = 0;
// Get the insert evict count.
virtual size_t insert_evict_count() const = 0;
// Get the release evict count.
virtual size_t release_evict_count() const = 0;
// Get all cache metrics together.
virtual const ObjectCacheMetrics metrics() const = 0;
// Remove all cache entries that are not actively in use.
virtual Status prune() = 0;
};
} // namespace starrocks
} // namespace starrocks

View File

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cache/mem_cache/lrucache_engine.h"
#include "cache/lrucache_engine.h"
#include <butil/fast_rand.h>
@ -23,24 +23,33 @@ Status LRUCacheEngine::init(const MemCacheOptions& options) {
return Status::OK();
}
Status LRUCacheEngine::insert(const std::string& key, void* value, size_t size, MemCacheDeleter deleter,
MemCacheHandlePtr* handle, const MemCacheWriteOptions& options) {
Status LRUCacheEngine::write(const std::string& key, const IOBuffer& buffer, WriteCacheOptions* options) {
return Status::NotSupported("LRUCache engine don't support write block");
}
Status LRUCacheEngine::read(const std::string& key, size_t off, size_t size, IOBuffer* buffer,
ReadCacheOptions* options) {
return Status::NotSupported("LRUCache engine don't support read block");
}
Status LRUCacheEngine::insert(const std::string& key, void* value, size_t size, ObjectCacheDeleter deleter,
ObjectCacheHandlePtr* handle, const ObjectCacheWriteOptions& options) {
if (!_check_write(size, options)) {
return Status::InternalError("cache insertion is rejected");
}
auto* lru_handle = _cache->insert(key, value, size, deleter, static_cast<CachePriority>(options.priority));
if (handle) {
*handle = reinterpret_cast<MemCacheHandlePtr>(lru_handle);
*handle = reinterpret_cast<ObjectCacheHandlePtr>(lru_handle);
}
return Status::OK();
}
Status LRUCacheEngine::lookup(const std::string& key, MemCacheHandlePtr* handle, MemCacheReadOptions* options) {
Status LRUCacheEngine::lookup(const std::string& key, ObjectCacheHandlePtr* handle, ObjectCacheReadOptions* options) {
auto* lru_handle = _cache->lookup(CacheKey(key));
if (!lru_handle) {
return Status::NotFound("no such entry");
}
*handle = reinterpret_cast<MemCacheHandlePtr>(lru_handle);
*handle = reinterpret_cast<ObjectCacheHandlePtr>(lru_handle);
return Status::OK();
}
@ -59,13 +68,26 @@ Status LRUCacheEngine::remove(const std::string& key) {
return Status::OK();
}
Status LRUCacheEngine::update_mem_quota(size_t quota_bytes) {
Status LRUCacheEngine::update_mem_quota(size_t quota_bytes, bool flush_to_disk) {
_cache->set_capacity(quota_bytes);
return Status::OK();
}
const DataCacheMemMetrics LRUCacheEngine::cache_metrics() const {
return DataCacheMemMetrics{.mem_quota_bytes = _cache->get_capacity(), .mem_used_bytes = _cache->get_memory_usage()};
Status LRUCacheEngine::update_disk_spaces(const std::vector<DirSpace>& spaces) {
return Status::NotSupported("LRUCache engine don't support update disk spaces");
}
Status LRUCacheEngine::update_inline_cache_count_limit(int32_t limit) {
return Status::NotSupported("LRUCache engine don't support update inline cache count limit");
}
const DataCacheMetrics LRUCacheEngine::cache_metrics() const {
return DataCacheMetrics{.status = DataCacheStatus::NORMAL,
.mem_quota_bytes = _cache->get_capacity(),
.mem_used_bytes = _cache->get_memory_usage(),
.disk_quota_bytes = 0,
.disk_used_bytes = 0,
.meta_used_bytes = 0};
}
Status LRUCacheEngine::shutdown() {
@ -78,12 +100,12 @@ Status LRUCacheEngine::prune() {
return Status::OK();
}
void LRUCacheEngine::release(MemCacheHandlePtr handle) {
void LRUCacheEngine::release(ObjectCacheHandlePtr handle) {
auto lru_handle = reinterpret_cast<Cache::Handle*>(handle);
_cache->release(lru_handle);
}
const void* LRUCacheEngine::value(MemCacheHandlePtr handle) {
const void* LRUCacheEngine::value(ObjectCacheHandlePtr handle) {
auto lru_handle = reinterpret_cast<Cache::Handle*>(handle);
return _cache->value(lru_handle);
}
@ -111,19 +133,18 @@ size_t LRUCacheEngine::hit_count() const {
return _cache->get_hit_count();
}
size_t LRUCacheEngine::insert_count() const {
return _cache->get_insert_count();
const ObjectCacheMetrics LRUCacheEngine::metrics() const {
ObjectCacheMetrics m;
m.capacity = _cache->get_capacity();
m.usage = _cache->get_memory_usage();
m.lookup_count = _cache->get_lookup_count();
m.hit_count = _cache->get_hit_count();
// Unsupported
m.object_item_count = 0;
return m;
}
size_t LRUCacheEngine::insert_evict_count() const {
return _cache->get_insert_evict_count();
}
size_t LRUCacheEngine::release_evict_count() const {
return _cache->get_release_evict_count();
}
bool LRUCacheEngine::_check_write(size_t charge, const MemCacheWriteOptions& options) const {
bool LRUCacheEngine::_check_write(size_t charge, const ObjectCacheWriteOptions& options) const {
if (options.evict_probability >= 100) {
return true;
}
@ -143,4 +164,5 @@ bool LRUCacheEngine::_check_write(size_t charge, const MemCacheWriteOptions& opt
}
return false;
}
} // namespace starrocks

View File

@ -16,11 +16,11 @@
#include <atomic>
#include "cache/mem_cache/local_mem_cache_engine.h"
#include "cache/local_cache_engine.h"
#include "util/lru_cache.h"
namespace starrocks {
class LRUCacheEngine final : public LocalMemCacheEngine {
class LRUCacheEngine final : public LocalCacheEngine {
public:
LRUCacheEngine() = default;
~LRUCacheEngine() override = default;
@ -28,25 +28,36 @@ public:
Status init(const MemCacheOptions& options);
bool is_initialized() const override { return _initialized.load(std::memory_order_relaxed); }
Status insert(const std::string& key, void* value, size_t size, MemCacheDeleter deleter, MemCacheHandlePtr* handle,
const MemCacheWriteOptions& options) override;
Status lookup(const std::string& key, MemCacheHandlePtr* handle, MemCacheReadOptions* options) override;
Status write(const std::string& key, const IOBuffer& buffer, WriteCacheOptions* options) override;
Status read(const std::string& key, size_t off, size_t size, IOBuffer* buffer, ReadCacheOptions* options) override;
Status insert(const std::string& key, void* value, size_t size, ObjectCacheDeleter deleter,
ObjectCacheHandlePtr* handle, const ObjectCacheWriteOptions& options) override;
Status lookup(const std::string& key, ObjectCacheHandlePtr* handle, ObjectCacheReadOptions* options) override;
bool exist(const std::string& key) const override;
Status remove(const std::string& key) override;
Status update_mem_quota(size_t quota_bytes) override;
Status update_mem_quota(size_t quota_bytes, bool flush_to_disk) override;
Status update_disk_spaces(const std::vector<DirSpace>& spaces) override;
Status update_inline_cache_count_limit(int32_t limit) override;
const DataCacheMemMetrics cache_metrics() const override;
const DataCacheMetrics cache_metrics() const override;
void record_read_remote(size_t size, int64_t latency_us) override {}
void record_read_cache(size_t size, int64_t latency_us) override {}
Status shutdown() override;
LocalCacheEngineType engine_type() override { return LocalCacheEngineType::LRUCACHE; }
bool has_mem_cache() const override { return _cache->get_capacity() > 0; }
bool has_disk_cache() const override { return false; }
bool available() const override { return is_initialized() && has_mem_cache(); }
bool mem_cache_available() const override { return is_initialized() && has_mem_cache(); }
void release(MemCacheHandlePtr handle) override;
const void* value(MemCacheHandlePtr handle) override;
void disk_spaces(std::vector<DirSpace>* spaces) const override {}
void release(ObjectCacheHandlePtr handle) override;
const void* value(ObjectCacheHandlePtr handle) override;
Status adjust_mem_quota(int64_t delta, size_t min_capacity) override;
@ -57,18 +68,14 @@ public:
size_t hit_count() const override;
size_t insert_count() const override;
size_t insert_evict_count() const override;
size_t release_evict_count() const override;
const ObjectCacheMetrics metrics() const override;
Status prune() override;
private:
bool _check_write(size_t charge, const MemCacheWriteOptions& options) const;
bool _check_write(size_t charge, const ObjectCacheWriteOptions& options) const;
std::atomic<bool> _initialized = false;
std::unique_ptr<ShardedLRUCache> _cache;
};
} // namespace starrocks
} // namespace starrocks

View File

@ -14,7 +14,7 @@
#include "cache/mem_space_monitor.h"
#include "cache/mem_cache/page_cache.h"
#include "cache/object_cache/page_cache.h"
#include "common/config.h"
#include "runtime/exec_env.h"
#include "runtime/mem_tracker.h"

75
be/src/cache/object_cache/cache_types.h vendored Normal file
View File

@ -0,0 +1,75 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <functional>
#include <ostream>
#include <string>
// Not a good way to import lru cache header here, just for temporary compatibility with old deleters.
#include "util/lru_cache.h"
namespace starrocks {
enum class ObjectCacheModuleType { LRUCACHE, STARCACHE };
struct ObjectCacheWriteOptions {
// The priority of the cache object, only support 0 and 1 now.
int8_t priority = 0;
// If ttl_seconds=0 (default), no ttl restriction will be set. If an old one exists, remove it.
uint64_t ttl_seconds = 0;
// If overwrite=true, the cache value will be replaced if it already exists.
bool overwrite = false;
// The probability to evict other items if the cache space is full, which can help avoid frequent cache replacement
// and improve cache hit rate sometimes.
// It is expressed as a percentage. If evict_probability is 10, it means the probability to evict other data is 10%.
int32_t evict_probability = 100;
};
struct ObjectCacheReadOptions {};
struct ObjectCacheHandle {};
struct ObjectCacheMetrics {
size_t capacity = 0;
size_t usage = 0;
size_t lookup_count = 0;
size_t hit_count = 0;
size_t object_item_count = 0;
};
using ObjectCacheHandlePtr = ObjectCacheHandle*;
// using CacheDeleter = std::function<void(const std::string&, void*)>;
//
// We only use the deleter function of the lru cache temporarily.
// Maybe a std::function object or a function pointer like `void (*)(std::string&, void*)` which
// independent on lru cache is more appropriate, but it is not easy to convert them to the lru
// cache deleter when using a lru cache module.
using ObjectCacheDeleter = void (*)(const CacheKey&, void*);
inline std::ostream& operator<<(std::ostream& os, const ObjectCacheModuleType& module) {
switch (module) {
case ObjectCacheModuleType::LRUCACHE:
os << "lrucache";
break;
case ObjectCacheModuleType::STARCACHE:
os << "starcache";
break;
}
return os;
}
} // namespace starrocks

View File

@ -32,7 +32,7 @@
// specific language governing permissions and limitations
// under the License.
#include "cache/mem_cache/page_cache.h"
#include "cache/object_cache/page_cache.h"
#include <malloc.h>
@ -49,9 +49,6 @@ std::atomic<size_t> StoragePageCacheMetrics::released_page_handle_count{};
METRIC_DEFINE_UINT_GAUGE(page_cache_lookup_count, MetricUnit::OPERATIONS);
METRIC_DEFINE_UINT_GAUGE(page_cache_hit_count, MetricUnit::OPERATIONS);
METRIC_DEFINE_UINT_GAUGE(page_cache_insert_count, MetricUnit::OPERATIONS);
METRIC_DEFINE_UINT_GAUGE(page_cache_insert_evict_count, MetricUnit::OPERATIONS);
METRIC_DEFINE_UINT_GAUGE(page_cache_release_evict_count, MetricUnit::OPERATIONS);
METRIC_DEFINE_UINT_GAUGE(page_cache_capacity, MetricUnit::BYTES);
METRIC_DEFINE_UINT_GAUGE(page_cache_pinned_count, MetricUnit::BYTES);
@ -64,22 +61,6 @@ void StoragePageCache::init_metrics() {
StarRocksMetrics::instance()->metrics()->register_hook(
"page_cache_hit_count", [this]() { page_cache_hit_count.set_value(get_hit_count()); });
StarRocksMetrics::instance()->metrics()->register_metric("page_cache_insert_count", &page_cache_insert_count);
StarRocksMetrics::instance()->metrics()->register_hook(
"page_cache_insert_count", [this]() { page_cache_insert_count.set_value(get_insert_count()); });
StarRocksMetrics::instance()->metrics()->register_metric("page_cache_insert_evict_count",
&page_cache_insert_evict_count);
StarRocksMetrics::instance()->metrics()->register_hook("page_cache_insert_evict_count", [this]() {
page_cache_insert_evict_count.set_value(get_insert_evict_count());
});
StarRocksMetrics::instance()->metrics()->register_metric("page_cache_release_evict_count",
&page_cache_release_evict_count);
StarRocksMetrics::instance()->metrics()->register_hook("page_cache_release_evict_count", [this]() {
page_cache_release_evict_count.set_value(get_release_evict_count());
});
StarRocksMetrics::instance()->metrics()->register_metric("page_cache_capacity", &page_cache_capacity);
StarRocksMetrics::instance()->metrics()->register_hook("page_cache_capacity",
[this]() { page_cache_capacity.set_value(get_capacity()); });
@ -94,7 +75,7 @@ void StoragePageCache::prune() {
}
void StoragePageCache::set_capacity(size_t capacity) {
Status st = _cache->update_mem_quota(capacity);
Status st = _cache->update_mem_quota(capacity, false);
LOG_IF(INFO, !st.ok()) << "Fail to set cache capacity to " << capacity << ", reason: " << st.message();
}
@ -110,18 +91,6 @@ uint64_t StoragePageCache::get_hit_count() const {
return _cache->hit_count();
}
uint64_t StoragePageCache::get_insert_count() const {
return _cache->insert_count();
}
uint64_t StoragePageCache::get_insert_evict_count() const {
return _cache->insert_evict_count();
}
uint64_t StoragePageCache::get_release_evict_count() const {
return _cache->release_evict_count();
}
bool StoragePageCache::adjust_capacity(int64_t delta, size_t min_capacity) {
Status st = _cache->adjust_mem_quota(delta, min_capacity);
if (!st.ok()) {
@ -136,7 +105,7 @@ size_t StoragePageCache::get_pinned_count() const {
}
bool StoragePageCache::lookup(const std::string& key, PageCacheHandle* handle) {
MemCacheHandle* obj_handle = nullptr;
ObjectCacheHandle* obj_handle = nullptr;
Status st = _cache->lookup(key, &obj_handle);
if (!st.ok()) {
return false;
@ -146,7 +115,7 @@ bool StoragePageCache::lookup(const std::string& key, PageCacheHandle* handle) {
return true;
}
Status StoragePageCache::insert(const std::string& key, std::vector<uint8_t>* data, const MemCacheWriteOptions& opts,
Status StoragePageCache::insert(const std::string& key, std::vector<uint8_t>* data, const ObjectCacheWriteOptions& opts,
PageCacheHandle* handle) {
#ifndef BE_TEST
int64_t mem_size = malloc_usable_size(data->data()) + sizeof(*data);
@ -162,7 +131,7 @@ Status StoragePageCache::insert(const std::string& key, std::vector<uint8_t>* da
delete cache_item;
};
MemCacheHandle* obj_handle = nullptr;
ObjectCacheHandle* obj_handle = nullptr;
// Use mem size managed by memory allocator as this record charge size.
// At the same time, we should record this record size for data fetching when lookup.
Status st = _cache->insert(key, (void*)data, mem_size, deleter, &obj_handle, opts);
@ -173,9 +142,9 @@ Status StoragePageCache::insert(const std::string& key, std::vector<uint8_t>* da
return st;
}
Status StoragePageCache::insert(const std::string& key, void* data, int64_t size, MemCacheDeleter deleter,
const MemCacheWriteOptions& opts, PageCacheHandle* handle) {
MemCacheHandle* obj_handle = nullptr;
Status StoragePageCache::insert(const std::string& key, void* data, int64_t size, ObjectCacheDeleter deleter,
const ObjectCacheWriteOptions& opts, PageCacheHandle* handle) {
ObjectCacheHandle* obj_handle = nullptr;
Status st = _cache->insert(key, data, size, deleter, &obj_handle, opts);
if (st.ok()) {
*handle = PageCacheHandle(_cache, obj_handle);

View File

@ -43,7 +43,7 @@ namespace starrocks {
class PageCacheHandle;
class MemTracker;
struct MemCacheWriteOptions;
struct ObjectCacheWriteOptions;
// Page cache min size is 256MB
static constexpr int64_t kcacheMinSize = 268435456;
@ -66,9 +66,9 @@ public:
// Client should call create_global_cache before.
static StoragePageCache* instance() { return DataCache::GetInstance()->page_cache(); }
StoragePageCache(LocalMemCacheEngine* cache_engine) : _cache(cache_engine), _initialized(true) {}
StoragePageCache(LocalCacheEngine* cache_engine) : _cache(cache_engine), _initialized(true) {}
void init(LocalMemCacheEngine* cache_engine) {
void init(LocalCacheEngine* cache_engine) {
_cache = cache_engine;
_initialized.store(true, std::memory_order_relaxed);
}
@ -87,11 +87,11 @@ public:
// This function is thread-safe, and when two clients insert two same key
// concurrently, this function can assure that only one page is cached.
// The in_memory page will have higher priority.
Status insert(const std::string& key, std::vector<uint8_t>* data, const MemCacheWriteOptions& opts,
Status insert(const std::string& key, std::vector<uint8_t>* data, const ObjectCacheWriteOptions& opts,
PageCacheHandle* handle);
Status insert(const std::string& key, void* data, int64_t size, MemCacheDeleter deleter,
const MemCacheWriteOptions& opts, PageCacheHandle* handle);
Status insert(const std::string& key, void* data, int64_t size, ObjectCacheDeleter deleter,
const ObjectCacheWriteOptions& opts, PageCacheHandle* handle);
size_t memory_usage() const { return _cache->mem_usage(); }
@ -103,12 +103,6 @@ public:
uint64_t get_hit_count() const;
uint64_t get_insert_count() const;
uint64_t get_insert_evict_count() const;
uint64_t get_release_evict_count() const;
bool adjust_capacity(int64_t delta, size_t min_capacity = 0);
void prune();
@ -121,7 +115,7 @@ public:
size_t get_pinned_count() const;
private:
LocalMemCacheEngine* _cache = nullptr;
LocalCacheEngine* _cache = nullptr;
std::atomic<bool> _initialized = false;
};
@ -131,12 +125,7 @@ private:
class PageCacheHandle {
public:
PageCacheHandle() = default;
PageCacheHandle(LocalMemCacheEngine* cache, MemCacheHandle* handle) : _cache(cache), _handle(handle) {}
// Don't allow copy and assign
PageCacheHandle(const PageCacheHandle&) = delete;
const PageCacheHandle& operator=(const PageCacheHandle&) = delete;
PageCacheHandle(LocalCacheEngine* cache, ObjectCacheHandle* handle) : _cache(cache), _handle(handle) {}
~PageCacheHandle() {
if (_handle != nullptr) {
StoragePageCacheMetrics::released_page_handle_count++;
@ -156,12 +145,16 @@ public:
return *this;
}
LocalMemCacheEngine* cache() const { return _cache; }
LocalCacheEngine* cache() const { return _cache; }
const void* data() const { return _cache->value(_handle); }
private:
LocalMemCacheEngine* _cache = nullptr;
MemCacheHandle* _handle = nullptr;
LocalCacheEngine* _cache = nullptr;
ObjectCacheHandle* _handle = nullptr;
// Don't allow copy and assign
PageCacheHandle(const PageCacheHandle&) = delete;
const PageCacheHandle& operator=(const PageCacheHandle&) = delete;
};
} // namespace starrocks

View File

@ -29,7 +29,7 @@ Status PeerCacheEngine::init(const RemoteCacheOptions& options) {
}
Status PeerCacheEngine::read(const std::string& key, size_t off, size_t size, IOBuffer* buffer,
DiskCacheReadOptions* options) {
ReadCacheOptions* options) {
if (options->use_adaptor && !_cache_adaptor->check_read_cache()) {
return Status::ResourceBusy("resource is busy");
}

View File

@ -26,10 +26,9 @@ public:
Status init(const RemoteCacheOptions& options) override;
Status read(const std::string& key, size_t off, size_t size, IOBuffer* buffer,
DiskCacheReadOptions* options) override;
Status read(const std::string& key, size_t off, size_t size, IOBuffer* buffer, ReadCacheOptions* options) override;
Status write(const std::string& key, const IOBuffer& buffer, DiskCacheWriteOptions* options) override {
Status write(const std::string& key, const IOBuffer& buffer, WriteCacheOptions* options) override {
return Status::NotSupported("write data to peer cache is unsupported");
}

View File

@ -14,16 +14,12 @@
#pragma once
#include "cache/disk_cache/io_buffer.h"
#include "cache/disk_cache/local_disk_cache_engine.h"
#include "cache/block_cache/io_buffer.h"
#include "cache/cache_options.h"
#include "common/status.h"
namespace starrocks {
struct RemoteCacheOptions {
double skip_read_factor = 0;
};
class RemoteCacheEngine {
public:
virtual ~RemoteCacheEngine() = default;
@ -32,12 +28,12 @@ public:
virtual Status init(const RemoteCacheOptions& options) = 0;
// Write data to remote cache
virtual Status write(const std::string& key, const IOBuffer& buffer, DiskCacheWriteOptions* options) = 0;
virtual Status write(const std::string& key, const IOBuffer& buffer, WriteCacheOptions* options) = 0;
// Read data from remote cache, it returns the data size if successful; otherwise the error status
// will be returned.
virtual Status read(const std::string& key, size_t off, size_t size, IOBuffer* buffer,
DiskCacheReadOptions* options) = 0;
ReadCacheOptions* options) = 0;
// Remove data from cache.
virtual Status remove(const std::string& key) = 0;

View File

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cache/disk_cache/starcache_engine.h"
#include "cache/starcache_engine.h"
#include <filesystem>
@ -55,6 +55,8 @@ Status StarCacheEngine::init(const DiskCacheOptions& options) {
}
opt.lru_segment_freq_bits = 0;
_enable_tiered_cache = options.enable_tiered_cache;
_enable_datacache_persistence = options.enable_datacache_persistence;
_cache = std::make_shared<starcache::StarCache>();
RETURN_IF_ERROR(to_status(_cache->init(opt)));
@ -66,7 +68,7 @@ Status StarCacheEngine::init(const DiskCacheOptions& options) {
return Status::OK();
}
Status StarCacheEngine::write(const std::string& key, const IOBuffer& buffer, DiskCacheWriteOptions* options) {
Status StarCacheEngine::write(const std::string& key, const IOBuffer& buffer, WriteCacheOptions* options) {
if (!options) {
return to_status(_cache->set(key, buffer.const_raw_buf(), nullptr));
}
@ -78,8 +80,12 @@ Status StarCacheEngine::write(const std::string& key, const IOBuffer& buffer, Di
opts.async = options->async;
opts.keep_alive = options->allow_zero_copy;
opts.callback = options->callback;
opts.mode = starcache::WriteOptions::WriteMode::WRITE_THROUGH;
opts.evict_probability = 100;
if (!_enable_datacache_persistence && _enable_tiered_cache) {
opts.mode = starcache::WriteOptions::WriteMode::WRITE_BACK;
} else {
opts.mode = starcache::WriteOptions::WriteMode::WRITE_THROUGH;
}
opts.evict_probability = options->evict_probability;
opts.ignore_inline = true;
Status st;
{
@ -98,13 +104,14 @@ Status StarCacheEngine::write(const std::string& key, const IOBuffer& buffer, Di
}
Status StarCacheEngine::read(const std::string& key, size_t off, size_t size, IOBuffer* buffer,
DiskCacheReadOptions* options) {
ReadCacheOptions* options) {
if (!options) {
return to_status(_cache->read(key, off, size, &buffer->raw_buf(), nullptr));
}
starcache::ReadOptions opts;
opts.use_adaptor = options->use_adaptor;
opts.mode = starcache::ReadOptions::ReadMode::READ_THROUGH;
opts.mode = _enable_tiered_cache ? starcache::ReadOptions::ReadMode::READ_BACK
: starcache::ReadOptions::ReadMode::READ_THROUGH;
auto st = to_status(_cache->read(key, off, size, &buffer->raw_buf(), &opts));
if (st.ok()) {
options->stats.read_mem_bytes = opts.stats.read_mem_bytes;
@ -113,6 +120,52 @@ Status StarCacheEngine::read(const std::string& key, size_t off, size_t size, IO
return st;
}
Status StarCacheEngine::insert(const std::string& key, void* value, size_t size, ObjectCacheDeleter deleter,
ObjectCacheHandlePtr* handle, const ObjectCacheWriteOptions& options) {
starcache::ObjectHandle* obj_hdl = new starcache::ObjectHandle;
auto obj_deleter = [deleter, key, value] {
// For temporary compatibility with old deleters.
CacheKey cache_key(key);
deleter(cache_key, value);
};
starcache::WriteOptions opts;
opts.priority = options.priority;
opts.ttl_seconds = options.ttl_seconds;
opts.overwrite = options.overwrite;
opts.evict_probability = options.evict_probability;
Status st = to_status(_cache->set_object(key, value, size, obj_deleter, obj_hdl, &opts));
if (!st.ok()) {
delete obj_hdl;
} else if (handle) {
// Try release the old handle before fill it with a new one.
_try_release_obj_handle(*handle);
*handle = reinterpret_cast<ObjectCacheHandlePtr>(obj_hdl);
}
return st;
}
Status StarCacheEngine::lookup(const std::string& key, ObjectCacheHandlePtr* handle, ObjectCacheReadOptions* options) {
starcache::ObjectHandle* obj_hdl = new starcache::ObjectHandle;
// Skip checking options temporarily because there is no valid members in `ObjectCacheReadOptions` now.
Status st = to_status(_cache->get_object(key, obj_hdl, nullptr));
if (!st.ok()) {
delete obj_hdl;
} else if (handle) {
_try_release_obj_handle(*handle);
*handle = reinterpret_cast<ObjectCacheHandlePtr>(obj_hdl);
}
return st;
}
void StarCacheEngine::release(ObjectCacheHandlePtr handle) {
_try_release_obj_handle(handle);
}
const void* StarCacheEngine::value(ObjectCacheHandlePtr handle) {
auto obj_hdl = reinterpret_cast<starcache::ObjectHandle*>(handle);
return obj_hdl->ptr();
}
bool StarCacheEngine::exist(const std::string& key) const {
return _cache->exist(key);
}
@ -121,6 +174,22 @@ Status StarCacheEngine::remove(const std::string& key) {
return to_status(_cache->remove(key));
}
Status StarCacheEngine::update_mem_quota(size_t quota_bytes, bool flush_to_disk) {
Status st = to_status(_cache->update_mem_quota(quota_bytes, flush_to_disk));
_refresh_quota();
return st;
}
Status StarCacheEngine::adjust_mem_quota(int64_t delta, size_t min_capacity) {
auto starcache_metrics = _cache->metrics();
size_t capacity = starcache_metrics.mem_quota_bytes;
int64_t new_capacity = capacity + delta;
if (new_capacity < (int64_t)min_capacity) {
return Status::InvalidArgument("target capacity is less than the minimum capacity");
}
return to_status(_cache->update_mem_quota(new_capacity, false));
}
Status StarCacheEngine::update_disk_spaces(const std::vector<DirSpace>& spaces) {
std::vector<starcache::DirSpace> disk_spaces;
disk_spaces.reserve(spaces.size());
@ -140,11 +209,14 @@ const StarCacheMetrics StarCacheEngine::starcache_metrics(int level) const {
return _cache->metrics(level);
}
const DataCacheDiskMetrics StarCacheEngine::cache_metrics() const {
const DataCacheMetrics StarCacheEngine::cache_metrics() const {
auto starcache_metrics = _cache->metrics(0);
DataCacheDiskMetrics metrics = {.status = static_cast<DataCacheStatus>(starcache_metrics.status),
.disk_quota_bytes = starcache_metrics.disk_quota_bytes,
.disk_used_bytes = starcache_metrics.disk_used_bytes};
DataCacheMetrics metrics = {.status = static_cast<DataCacheStatus>(starcache_metrics.status),
.mem_quota_bytes = starcache_metrics.mem_quota_bytes,
.mem_used_bytes = starcache_metrics.mem_used_bytes,
.disk_quota_bytes = starcache_metrics.disk_quota_bytes,
.disk_used_bytes = starcache_metrics.disk_used_bytes,
.meta_used_bytes = starcache_metrics.meta_used_bytes};
return metrics;
}
@ -167,9 +239,18 @@ Status StarCacheEngine::shutdown() {
void StarCacheEngine::_refresh_quota() {
auto metrics = starcache_metrics(0);
_mem_quota.store(metrics.mem_quota_bytes, std::memory_order_relaxed);
_disk_quota.store(metrics.disk_quota_bytes, std::memory_order_relaxed);
}
void StarCacheEngine::_try_release_obj_handle(ObjectCacheHandlePtr handle) {
if (handle) {
auto obj_hdl = reinterpret_cast<starcache::ObjectHandle*>(handle);
obj_hdl->release();
delete obj_hdl;
}
}
void StarCacheEngine::disk_spaces(std::vector<DirSpace>* spaces) const {
spaces->clear();
auto metrics = starcache_metrics(0);
@ -178,6 +259,18 @@ void StarCacheEngine::disk_spaces(std::vector<DirSpace>* spaces) const {
}
}
size_t StarCacheEngine::mem_quota() const {
starcache::CacheMetrics metrics = _cache->metrics(0);
// TODO: optimizer later
return metrics.mem_quota_bytes;
}
size_t StarCacheEngine::mem_usage() const {
// TODO: add meta size?
starcache::CacheMetrics metrics = _cache->metrics(0);
return metrics.mem_used_bytes;
}
size_t StarCacheEngine::lookup_count() const {
starcache::CacheMetrics metrics = _cache->metrics(1);
return metrics.detail_l1->hit_count + metrics.detail_l1->miss_count;
@ -188,6 +281,17 @@ size_t StarCacheEngine::hit_count() const {
return metrics.detail_l1->hit_count;
}
const ObjectCacheMetrics StarCacheEngine::metrics() const {
auto starcache_metrics = _cache->metrics(2);
ObjectCacheMetrics m;
m.capacity = starcache_metrics.mem_quota_bytes;
m.usage = starcache_metrics.mem_used_bytes;
m.lookup_count = starcache_metrics.detail_l1->hit_count + starcache_metrics.detail_l1->miss_count;
m.hit_count = starcache_metrics.detail_l1->hit_count;
m.object_item_count = starcache_metrics.detail_l2->object_item_count;
return m;
}
Status StarCacheEngine::prune() {
return to_status(_cache->update_mem_quota(0, false));
}

View File

@ -14,51 +14,48 @@
#pragma once
#include "cache/disk_cache/local_disk_cache_engine.h"
#include "cache/local_cache_engine.h"
#include "common/status.h"
#ifdef WITH_STARCACHE
#include "starcache/star_cache.h"
#include "starcache/time_based_cache_adaptor.h"
#else
namespace starcache {
class StarCache;
class TimeBasedCacheAdaptor;
struct CacheMetrics;
} // namespace starcache
#endif
namespace starrocks {
#ifdef WITH_STARCACHE
using StarCacheMetrics = starcache::CacheMetrics;
#endif
class StarCacheEngine : public LocalDiskCacheEngine {
class StarCacheEngine : public LocalCacheEngine {
public:
StarCacheEngine() = default;
~StarCacheEngine() override = default;
virtual ~StarCacheEngine() override = default;
Status init(const DiskCacheOptions& options);
bool is_initialized() const override { return _initialized.load(std::memory_order_relaxed); }
Status write(const std::string& key, const IOBuffer& buffer, DiskCacheWriteOptions* options) override;
Status read(const std::string& key, size_t off, size_t size, IOBuffer* buffer,
DiskCacheReadOptions* options) override;
Status write(const std::string& key, const IOBuffer& buffer, WriteCacheOptions* options) override;
Status read(const std::string& key, size_t off, size_t size, IOBuffer* buffer, ReadCacheOptions* options) override;
Status insert(const std::string& key, void* value, size_t size, ObjectCacheDeleter deleter,
ObjectCacheHandlePtr* handle, const ObjectCacheWriteOptions& options) override;
Status lookup(const std::string& key, ObjectCacheHandlePtr* handle, ObjectCacheReadOptions* options) override;
void release(ObjectCacheHandlePtr handle) override;
const void* value(ObjectCacheHandlePtr handle) override;
bool exist(const std::string& key) const override;
Status remove(const std::string& key) override;
Status adjust_mem_quota(int64_t delta, size_t min_capacity) override;
Status update_mem_quota(size_t quota_bytes, bool flush_to_disk) override;
Status update_disk_spaces(const std::vector<DirSpace>& spaces) override;
Status update_inline_cache_count_limit(int32_t limit) override;
#ifdef WITH_STARCACHE
const StarCacheMetrics starcache_metrics(int level) const;
#endif
const DataCacheDiskMetrics cache_metrics() const override;
const DataCacheMetrics cache_metrics() const override;
void record_read_remote(size_t size, int64_t latency_us) override;
@ -66,25 +63,38 @@ public:
Status shutdown() override;
LocalCacheEngineType engine_type() override { return LocalCacheEngineType::STARCACHE; }
std::shared_ptr<starcache::StarCache> starcache_instance() { return _cache; }
bool has_mem_cache() const override { return _mem_quota.load(std::memory_order_relaxed) > 0; }
bool has_disk_cache() const override { return _disk_quota.load(std::memory_order_relaxed) > 0; }
bool available() const override { return is_initialized() && has_disk_cache(); }
bool available() const override { return is_initialized() && (has_mem_cache() || has_disk_cache()); }
bool mem_cache_available() const override { return is_initialized() && has_mem_cache(); }
void disk_spaces(std::vector<DirSpace>* spaces) const override;
size_t mem_quota() const override;
size_t mem_usage() const override;
size_t lookup_count() const override;
size_t hit_count() const override;
const ObjectCacheMetrics metrics() const override;
Status prune() override;
private:
void _refresh_quota();
void _try_release_obj_handle(ObjectCacheHandlePtr handle);
std::shared_ptr<starcache::StarCache> _cache;
std::unique_ptr<starcache::TimeBasedCacheAdaptor> _cache_adaptor;
bool _enable_tiered_cache = false;
bool _enable_datacache_persistence = false;
std::atomic<bool> _initialized = false;
std::atomic<size_t> _mem_quota = 0;
std::atomic<size_t> _disk_quota = 0;
};
} // namespace starrocks

View File

@ -74,8 +74,6 @@ public:
bool is_index() const { return _type == TAccessPathType::type::INDEX; }
bool is_root() const { return _type == TAccessPathType::type::ROOT; }
bool is_from_predicate() const { return _from_predicate; }
bool is_extended() const { return _extended; }

View File

@ -529,11 +529,11 @@ void NullableColumn::put_mysql_row_buffer(MysqlRowBuffer* buf, size_t idx, bool
}
void NullableColumn::check_or_die() const {
DCHECK_EQ(_null_column->size(), _data_column->size());
CHECK_EQ(_null_column->size(), _data_column->size());
// when _has_null=true, the column may have no null value, so don't check.
if (!_has_null) {
auto null_data = _null_column->immutable_data();
DCHECK(!SIMD::contain_nonzero(null_data, 0));
CHECK(!SIMD::contain_nonzero(null_data, 0));
}
_data_column->check_or_die();
_null_column->check_or_die();

View File

@ -333,6 +333,12 @@ CONF_mBool(enable_ordinal_index_memory_page_cache, "true");
CONF_mBool(enable_string_prefix_zonemap, "true");
// Prefix length used for string ZoneMap min/max when enabled
CONF_mInt32(string_prefix_zonemap_prefix_len, "16");
// Adaptive creation of string zonemap index based on page overlap quality.
// If the estimated overlap ratio across consecutive pages is greater than this threshold,
// skip writing the page-level string zonemap index. Range: [0.0, 1.0].
CONF_mDouble(string_zonemap_overlap_threshold, "0.8");
// Minimum number of non-empty pages before applying the adaptive check.
CONF_mInt32(string_zonemap_min_pages_for_adaptive_check, "16");
// ========================== ZONEMAP END ===================================
@ -412,10 +418,7 @@ CONF_Bool(enable_event_based_compaction_framework, "true");
CONF_Bool(enable_size_tiered_compaction_strategy, "true");
CONF_mBool(enable_pk_size_tiered_compaction_strategy, "true");
// Enable parallel execution within tablet for primary key tables.
CONF_mBool(enable_pk_parallel_execution, "true");
// The minimum threshold of data size for enabling pk parallel execution.
// Default is 300MB.
CONF_mInt64(pk_parallel_execution_threshold_bytes, "314572800");
CONF_mBool(enable_pk_parallel_execution, "false");
// We support real-time compaction strategy for primary key tables in shared-data mode.
// This real-time compaction strategy enables compacting rowsets across multiple levels simultaneously.
// The parameter `size_tiered_max_compaction_level` defines the maximum compaction level allowed in a single compaction task.
@ -586,8 +589,6 @@ CONF_mBool(enable_token_check, "true");
// to open/close system metrics
CONF_Bool(enable_system_metrics, "true");
CONF_Bool(enable_jvm_metrics, "false");
CONF_mBool(enable_prefetch, "true");
// Number of cores StarRocks will used, this will effect only when it's greater than 0.
@ -935,9 +936,6 @@ CONF_mInt64(tablet_internal_parallel_min_scan_dop, "4");
// Only the num rows of lake tablet less than lake_tablet_rows_splitted_ratio * splitted_scan_rows, than the lake tablet can be splitted.
CONF_mDouble(lake_tablet_rows_splitted_ratio, "1.5");
// Allow skipping invalid delete_predicate in order to get the segment data back, and do manual correction.
CONF_mBool(lake_tablet_ignore_invalid_delete_predicate, "false");
// The bitmap serialize version.
CONF_Int16(bitmap_serialize_version, "1");
// The max hdfs file handle.
@ -1096,8 +1094,6 @@ CONF_Int64(rpc_connect_timeout_ms, "30000");
CONF_Int32(max_batch_publish_latency_ms, "100");
// Config for opentelemetry tracing.
// Valid example: jaeger_endpoint = localhost:14268
// Invalid example: jaeger_endpoint = http://localhost:14268
CONF_String(jaeger_endpoint, "");
// Config for query debug trace
@ -1299,8 +1295,19 @@ CONF_Bool(datacache_block_buffer_enable, "true");
// To control how many threads will be created for datacache synchronous tasks.
// For the default value, it means for every 8 cpu, one thread will be created.
CONF_Double(datacache_scheduler_threads_per_cpu, "0.125");
// To control whether cache raw data both in memory and disk.
// If true, the raw data will be written to the tiered cache composed of memory cache and disk cache,
// and the memory cache hotter data than disk.
// If false, the raw data will be written to disk directly and read from disk without promotion.
// For object data, such as parquet footer object, which can only be cached in memory are not affected
// by this configuration.
CONF_Bool(datacache_tiered_cache_enable, "false");
// Whether to persist cached data
CONF_Bool(datacache_persistence_enable, "true");
// DataCache engines, alternatives: starcache, lrucache
// Set the default value empty to indicate whether it is manually configured by users.
// If not, we need to adjust the default engine based on build switches like "WITH_STARCACHE".
CONF_String_enum(datacache_engine, "", ",starcache,lrucache");
// The interval time (millisecond) for agent report datacache metrics to FE.
CONF_mInt32(report_datacache_metrics_interval_ms, "60000");
@ -1354,6 +1361,7 @@ CONF_Alias(datacache_block_size, block_cache_block_size);
CONF_Alias(datacache_max_concurrent_inserts, block_cache_max_concurrent_inserts);
CONF_Alias(datacache_checksum_enable, block_cache_checksum_enable);
CONF_Alias(datacache_direct_io_enable, block_cache_direct_io_enable);
CONF_Alias(datacache_engine, block_cache_engine);
CONF_mInt64(l0_l1_merge_ratio, "10");
// max wal file size in l0
@ -1604,7 +1612,7 @@ CONF_mBool(apply_del_vec_after_all_index_filter, "true");
// connector sink memory watermark
CONF_mDouble(connector_sink_mem_high_watermark_ratio, "0.3");
CONF_mDouble(connector_sink_mem_low_watermark_ratio, "0.1");
CONF_mDouble(connector_sink_mem_urgent_space_ratio, "0.05");
CONF_mDouble(connector_sink_mem_urgent_space_ratio, "0.1");
// Whether enable spill intermediate data for connector sink.
CONF_mBool(enable_connector_sink_spill, "true");

View File

@ -340,7 +340,7 @@ void TEST_clear_configs();
template <>
struct fmt::formatter<starrocks::config::MutableString> : formatter<std::string> {
auto format(const starrocks::config::MutableString& s, format_context& ctx) const {
auto format(const starrocks::config::MutableString& s, format_context& ctx) {
return formatter<std::string>::format(s.value(), ctx);
}
};

View File

@ -44,13 +44,6 @@
#ifdef USE_STAROS
#include "fslib/star_cache_handler.h"
#endif
#include <fmt/ranges.h>
#include <csignal>
// Need POSIX signal APIs like sigaction/siginfo_t.
// NOLINTNEXTLINE(modernize-deprecated-headers)
#include <signal.h>
#include "fs/encrypt_file.h"
#include "gutil/cpu.h"
#include "jemalloc/jemalloc.h"
@ -171,12 +164,6 @@ struct JemallocStats {
};
static void retrieve_jemalloc_stats(JemallocStats* stats) {
// On macOS, jemalloc may define je_mallctl as mallctl via macro in jemalloc.h
#ifdef __APPLE__
#ifndef je_mallctl
#define je_mallctl mallctl
#endif
#endif
uint64_t epoch = 1;
size_t sz = sizeof(epoch);
je_mallctl("epoch", &epoch, &sz, &epoch, sz);
@ -223,7 +210,6 @@ void jemalloc_tracker_daemon(void* arg_this) {
static void init_starrocks_metrics(const std::vector<StorePath>& store_paths) {
bool init_system_metrics = config::enable_system_metrics;
bool init_jvm_metrics = config::enable_jvm_metrics;
std::set<std::string> disk_devices;
std::vector<std::string> network_interfaces;
std::vector<std::string> paths;
@ -243,8 +229,7 @@ static void init_starrocks_metrics(const std::vector<StorePath>& store_paths) {
return;
}
}
StarRocksMetrics::instance()->initialize(paths, init_system_metrics, init_jvm_metrics, disk_devices,
network_interfaces);
StarRocksMetrics::instance()->initialize(paths, init_system_metrics, disk_devices, network_interfaces);
}
void sigterm_handler(int signo, siginfo_t* info, void* context) {

View File

@ -18,12 +18,6 @@
#include <glog/logging.h>
#include <glog/vlog_is_on.h>
#include <jemalloc/jemalloc.h>
#ifdef __APPLE__
#include <mach/mach_init.h>
#include <mach/mach_port.h>
#include <mach/thread_act.h>
#include <pthread.h>
#endif
#include <cerrno>
#include <cstdio>
@ -33,7 +27,7 @@
#include <mutex>
#include "cache/datacache.h"
#include "cache/mem_cache/page_cache.h"
#include "cache/object_cache/page_cache.h"
#include "common/config.h"
#include "gutil/endian.h"
#include "gutil/stringprintf.h"
@ -134,12 +128,7 @@ static void dontdump_unused_pages() {
static bool start_dump = false;
struct timeval tv;
gettimeofday(&tv, nullptr);
// On macOS, pthread_t is an opaque pointer; convert to a numeric id for fmt
#ifdef __APPLE__
uint64_t tid = static_cast<uint64_t>(pthread_mach_thread_np(pthread_self()));
#else
pthread_t tid = pthread_self();
#endif
const uint32_t MAX_BUFFER_SIZE = 1024;
char buffer[MAX_BUFFER_SIZE] = {};
// memory_buffer allocate 500 bytes from stack
@ -147,13 +136,7 @@ static void dontdump_unused_pages() {
if (!start_dump) {
int res = snprintf(buffer, MAX_BUFFER_SIZE, "arena.%d.purge", MALLCTL_ARENAS_ALL);
buffer[res] = '\0';
int ret =
#ifdef __APPLE__
mallctl
#else
je_mallctl
#endif
(buffer, nullptr, nullptr, nullptr, 0);
int ret = je_mallctl(buffer, nullptr, nullptr, nullptr, 0);
if (ret != 0) {
FMT_LOG("je_mallctl execute purge failed, errno:{}", ret);
@ -163,13 +146,7 @@ static void dontdump_unused_pages() {
res = snprintf(buffer, MAX_BUFFER_SIZE, "arena.%d.dontdump", MALLCTL_ARENAS_ALL);
buffer[res] = '\0';
ret =
#ifdef __APPLE__
mallctl
#else
je_mallctl
#endif
(buffer, nullptr, nullptr, nullptr, 0);
ret = je_mallctl(buffer, nullptr, nullptr, nullptr, 0);
if (ret != 0) {
FMT_LOG("je_mallctl execute dontdump failed, errno:{}", ret);
@ -222,10 +199,8 @@ bool init_glog(const char* basename, bool install_signal_handler) {
FLAGS_logbuflevel = 0;
// Buffer log messages for at most this many seconds.
FLAGS_logbufsecs = 30;
// Set roll num. Not available with Homebrew glog on macOS.
#ifndef __APPLE__
// Set roll num.
FLAGS_log_filenum_quota = config::sys_log_roll_num;
#endif
// Set log level.
std::string loglevel = config::sys_log_level;
@ -255,19 +230,13 @@ bool init_glog(const char* basename, bool install_signal_handler) {
std::string sizeflag = "SIZE-MB-";
bool ok = false;
if (rollmode.compare("TIME-DAY") == 0) {
#ifndef __APPLE__
FLAGS_log_split_method = "day";
#endif
ok = true;
} else if (rollmode.compare("TIME-HOUR") == 0) {
#ifndef __APPLE__
FLAGS_log_split_method = "hour";
#endif
ok = true;
} else if (rollmode.substr(0, sizeflag.length()).compare(sizeflag) == 0) {
#ifndef __APPLE__
FLAGS_log_split_method = "size";
#endif
std::string sizestr = rollmode.substr(sizeflag.size(), rollmode.size() - sizeflag.size());
if (sizestr.size() != 0) {
char* end = nullptr;
@ -309,10 +278,7 @@ bool init_glog(const char* basename, bool install_signal_handler) {
if (config::dump_trace_info) {
google::InstallFailureWriter(failure_writer);
google::InstallFailureFunction((google::logging_fail_func_t)failure_function);
#ifndef MACOS_DISABLE_GLOG_STACKTRACE
// This symbol may be unavailable on macOS builds using system glog.
google::InstallFailureHandlerAfterOutputLog(failure_handler_after_output_log);
#endif
}
logging_initialized = true;

View File

@ -27,45 +27,21 @@
namespace starrocks {
// detail implements for allocator
static int set_jemalloc_profiling(bool enable) {
int ret =
#ifdef __APPLE__
mallctl
#else
je_mallctl
#endif
("prof.active", nullptr, nullptr, &enable, 1);
ret |=
#ifdef __APPLE__
mallctl
#else
je_mallctl
#endif
("prof.thread_active_init", nullptr, nullptr, &enable, 1);
int ret = je_mallctl("prof.active", nullptr, nullptr, &enable, 1);
ret |= je_mallctl("prof.thread_active_init", nullptr, nullptr, &enable, 1);
return ret;
}
static int has_enable_heap_profile() {
int value = 0;
size_t size = sizeof(value);
#ifdef __APPLE__
mallctl
#else
je_mallctl
#endif
("prof.active", &value, &size, nullptr, 0);
je_mallctl("prof.active", &value, &size, nullptr, 0);
return value;
}
bool dump_snapshot(const std::string& filename) {
const char* fname = filename.c_str();
return (
#ifdef __APPLE__
mallctl
#else
je_mallctl
#endif
("prof.dump", nullptr, nullptr, &fname, sizeof(const char*))) == 0;
return je_mallctl("prof.dump", nullptr, nullptr, &fname, sizeof(const char*)) == 0;
}
// declare exec from script
@ -109,4 +85,4 @@ std::string HeapProf::to_dot_format(const std::string& heapdump_filename) {
return exec(fmt::format("{} --dot {} {}", jeprof, binary, heapdump_filename));
}
} // namespace starrocks
} // namespace starrocks

View File

@ -40,17 +40,12 @@ void Tracer::release_instance() {
Instance().shutdown();
}
static inline opentelemetry::nostd::shared_ptr<opentelemetry::trace::Tracer> create_no_op_tracer() {
return opentelemetry::trace::Provider::GetTracerProvider()->GetTracer("no-op", OPENTELEMETRY_SDK_VERSION);
}
void Tracer::init(const std::string& service_name) {
if (!config::jaeger_endpoint.empty()) {
opentelemetry::exporter::jaeger::JaegerExporterOptions opts;
vector<string> host_port = strings::Split(config::jaeger_endpoint, ":");
if (host_port.size() != 2) {
LOG(WARNING) << "bad jaeger_endpoint " << config::jaeger_endpoint;
_tracer = create_no_op_tracer();
return;
}
opts.endpoint = host_port[0];
@ -68,7 +63,7 @@ void Tracer::init(const std::string& service_name) {
new opentelemetry::sdk::trace::TracerProvider(std::move(processor), jaeger_resource));
_tracer = provider->GetTracer(service_name, OPENTELEMETRY_SDK_VERSION);
} else {
_tracer = create_no_op_tracer();
_tracer = opentelemetry::trace::Provider::GetTracerProvider()->GetTracer("no-op", OPENTELEMETRY_SDK_VERSION);
}
}

View File

@ -41,8 +41,7 @@ Status ConnectorChunkSink::init() {
}
Status ConnectorChunkSink::write_partition_chunk(const std::string& partition,
const std::vector<int8_t>& partition_field_null_list,
const ChunkPtr& chunk) {
const std::vector<int8_t>& partition_field_null_list, Chunk* chunk) {
// partition_field_null_list is used to distinguish with the secenario like NULL and string "null"
// They are under the same dir path, but should not in the same data file.
// We should record them in different files so that each data file could has its own meta info.
@ -65,13 +64,13 @@ Status ConnectorChunkSink::write_partition_chunk(const std::string& partition,
return Status::OK();
}
Status ConnectorChunkSink::add(const ChunkPtr& chunk) {
Status ConnectorChunkSink::add(Chunk* chunk) {
std::string partition = DEFAULT_PARTITION;
bool partitioned = !_partition_column_names.empty();
if (partitioned) {
ASSIGN_OR_RETURN(partition,
HiveUtils::make_partition_name(_partition_column_names, _partition_column_evaluators,
chunk.get(), _support_null_partition));
HiveUtils::make_partition_name(_partition_column_names, _partition_column_evaluators, chunk,
_support_null_partition));
}
RETURN_IF_ERROR(
@ -80,27 +79,13 @@ Status ConnectorChunkSink::add(const ChunkPtr& chunk) {
}
Status ConnectorChunkSink::finish() {
// Flushing data to disk to make more memory space for subsequent merge operations.
for (auto& [partition_key, writer] : _partition_chunk_writers) {
RETURN_IF_ERROR(writer->flush());
}
for (auto& [partition_key, writer] : _partition_chunk_writers) {
RETURN_IF_ERROR(writer->wait_flush());
}
for (auto& [partition_key, writer] : _partition_chunk_writers) {
RETURN_IF_ERROR(writer->finish());
}
return Status::OK();
}
void ConnectorChunkSink::push_rollback_action(const std::function<void()>& action) {
// Not a very frequent operation, so use unique_lock here is ok.
std::unique_lock<std::shared_mutex> wlck(_mutex);
_rollback_actions.push_back(std::move(action));
}
void ConnectorChunkSink::rollback() {
std::shared_lock<std::shared_mutex> rlck(_mutex);
for (auto& action : _rollback_actions) {
action();
}

View File

@ -47,7 +47,7 @@ public:
Status init();
virtual Status add(const ChunkPtr& chunk);
virtual Status add(Chunk* chunk);
Status finish();
@ -58,15 +58,13 @@ public:
virtual void callback_on_commit(const CommitResult& result) = 0;
Status write_partition_chunk(const std::string& partition, const vector<int8_t>& partition_field_null_list,
const ChunkPtr& chunk);
Chunk* chunk);
Status status();
void set_status(const Status& status);
protected:
void push_rollback_action(const std::function<void()>& action);
AsyncFlushStreamPoller* _io_poller = nullptr;
SinkOperatorMemoryManager* _op_mem_mgr = nullptr;

View File

@ -50,16 +50,13 @@ int ConnectorSinkSpillExecutor::calc_max_thread_num() {
}
void ChunkSpillTask::run() {
SCOPED_THREAD_LOCAL_MEM_TRACKER_SETTER(_mem_tracker);
auto res = _load_chunk_spiller->spill(*_chunk);
if (_cb) {
_cb(_chunk, res);
}
_chunk.reset();
}
void MergeBlockTask::run() {
SCOPED_THREAD_LOCAL_MEM_TRACKER_SETTER(_mem_tracker);
auto st = _writer->merge_blocks();
if (_cb) {
_cb(st);

View File

@ -71,12 +71,9 @@ protected:
class ChunkSpillTask final : public Runnable {
public:
ChunkSpillTask(LoadChunkSpiller* load_chunk_spiller, ChunkPtr chunk, MemTracker* mem_tracker,
ChunkSpillTask(LoadChunkSpiller* load_chunk_spiller, ChunkPtr chunk,
std::function<void(ChunkPtr chunk, const StatusOr<size_t>&)> cb)
: _load_chunk_spiller(load_chunk_spiller),
_chunk(std::move(chunk)),
_mem_tracker(mem_tracker),
_cb(std::move(cb)) {}
: _load_chunk_spiller(load_chunk_spiller), _chunk(chunk), _cb(std::move(cb)) {}
~ChunkSpillTask() override = default;
@ -85,20 +82,18 @@ public:
private:
LoadChunkSpiller* _load_chunk_spiller;
ChunkPtr _chunk;
MemTracker* _mem_tracker;
std::function<void(ChunkPtr, const StatusOr<size_t>&)> _cb;
};
class MergeBlockTask : public Runnable {
public:
MergeBlockTask(SpillPartitionChunkWriter* writer, MemTracker* mem_tracker, std::function<void(const Status&)> cb)
: _writer(writer), _mem_tracker(mem_tracker), _cb(std::move(cb)) {}
MergeBlockTask(SpillPartitionChunkWriter* writer, std::function<void(const Status&)> cb)
: _writer(writer), _cb(std::move(cb)) {}
void run() override;
private:
SpillPartitionChunkWriter* _writer;
MemTracker* _mem_tracker;
std::function<void(const Status&)> _cb;
};

View File

@ -47,7 +47,7 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> FileChunkSinkProvider::create_chun
std::shared_ptr<ConnectorChunkSinkContext> context, int32_t driver_id) {
auto ctx = std::dynamic_pointer_cast<FileChunkSinkContext>(context);
auto runtime_state = ctx->fragment_context->runtime_state();
std::shared_ptr<FileSystem> fs = FileSystem::CreateUniqueFromString(ctx->path, FSOptions(&ctx->cloud_conf)).value();
auto fs = FileSystem::CreateUniqueFromString(ctx->path, FSOptions(&ctx->cloud_conf)).value();
auto column_evaluators = ColumnEvaluator::clone(ctx->column_evaluators);
auto location_provider = std::make_shared<connector::LocationProvider>(
ctx->path, print_id(ctx->fragment_context->query_id()), runtime_state->be_number(), driver_id,
@ -56,17 +56,16 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> FileChunkSinkProvider::create_chun
std::shared_ptr<formats::FileWriterFactory> file_writer_factory;
if (boost::iequals(ctx->format, formats::PARQUET)) {
file_writer_factory = std::make_shared<formats::ParquetFileWriterFactory>(
fs, ctx->compression_type, ctx->options, ctx->column_names,
std::make_shared<std::vector<std::unique_ptr<ColumnEvaluator>>>(std::move(column_evaluators)),
std::move(fs), ctx->compression_type, ctx->options, ctx->column_names, std::move(column_evaluators),
std::nullopt, ctx->executor, runtime_state);
} else if (boost::iequals(ctx->format, formats::ORC)) {
file_writer_factory = std::make_shared<formats::ORCFileWriterFactory>(
fs, ctx->compression_type, ctx->options, ctx->column_names, std::move(column_evaluators), ctx->executor,
runtime_state);
std::move(fs), ctx->compression_type, ctx->options, ctx->column_names, std::move(column_evaluators),
ctx->executor, runtime_state);
} else if (boost::iequals(ctx->format, formats::CSV)) {
file_writer_factory = std::make_shared<formats::CSVFileWriterFactory>(
fs, ctx->compression_type, ctx->options, ctx->column_names, std::move(column_evaluators), ctx->executor,
runtime_state);
std::move(fs), ctx->compression_type, ctx->options, ctx->column_names, std::move(column_evaluators),
ctx->executor, runtime_state);
} else {
file_writer_factory = std::make_shared<formats::UnknownFileWriterFactory>(ctx->format);
}
@ -84,7 +83,6 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> FileChunkSinkProvider::create_chun
auto partition_chunk_writer_ctx =
std::make_shared<SpillPartitionChunkWriterContext>(SpillPartitionChunkWriterContext{
{file_writer_factory, location_provider, ctx->max_file_size, partition_columns.empty()},
fs,
ctx->fragment_context,
nullptr,
nullptr});

View File

@ -53,8 +53,7 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> HiveChunkSinkProvider::create_chun
std::shared_ptr<ConnectorChunkSinkContext> context, int32_t driver_id) {
auto ctx = std::dynamic_pointer_cast<HiveChunkSinkContext>(context);
auto runtime_state = ctx->fragment_context->runtime_state();
std::shared_ptr<FileSystem> fs =
FileSystem::CreateUniqueFromString(ctx->path, FSOptions(&ctx->cloud_conf)).value(); // must succeed
auto fs = FileSystem::CreateUniqueFromString(ctx->path, FSOptions(&ctx->cloud_conf)).value(); // must succeed
auto data_column_evaluators = ColumnEvaluator::clone(ctx->data_column_evaluators);
auto location_provider = std::make_shared<connector::LocationProvider>(
ctx->path, print_id(ctx->fragment_context->query_id()), runtime_state->be_number(), driver_id,
@ -66,17 +65,16 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> HiveChunkSinkProvider::create_chun
ctx->options[formats::ParquetWriterOptions::USE_LEGACY_DECIMAL_ENCODING] = "true";
ctx->options[formats::ParquetWriterOptions::USE_INT96_TIMESTAMP_ENCODING] = "true";
file_writer_factory = std::make_shared<formats::ParquetFileWriterFactory>(
fs, ctx->compression_type, ctx->options, ctx->data_column_names,
std::make_shared<std::vector<std::unique_ptr<ColumnEvaluator>>>(std::move(data_column_evaluators)),
std::nullopt, ctx->executor, runtime_state);
std::move(fs), ctx->compression_type, ctx->options, ctx->data_column_names,
std::move(data_column_evaluators), std::nullopt, ctx->executor, runtime_state);
} else if (boost::iequals(ctx->format, formats::ORC)) {
file_writer_factory = std::make_shared<formats::ORCFileWriterFactory>(
fs, ctx->compression_type, ctx->options, ctx->data_column_names, std::move(data_column_evaluators),
ctx->executor, runtime_state);
std::move(fs), ctx->compression_type, ctx->options, ctx->data_column_names,
std::move(data_column_evaluators), ctx->executor, runtime_state);
} else if (boost::iequals(ctx->format, formats::TEXTFILE)) {
file_writer_factory = std::make_shared<formats::CSVFileWriterFactory>(
fs, ctx->compression_type, ctx->options, ctx->data_column_names, std::move(data_column_evaluators),
ctx->executor, runtime_state);
std::move(fs), ctx->compression_type, ctx->options, ctx->data_column_names,
std::move(data_column_evaluators), ctx->executor, runtime_state);
} else {
file_writer_factory = std::make_shared<formats::UnknownFileWriterFactory>(ctx->format);
}
@ -87,7 +85,6 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> HiveChunkSinkProvider::create_chun
auto partition_chunk_writer_ctx = std::make_shared<SpillPartitionChunkWriterContext>(
SpillPartitionChunkWriterContext{{file_writer_factory, location_provider, ctx->max_file_size,
ctx->partition_column_names.empty()},
fs,
ctx->fragment_context,
nullptr,
nullptr});

View File

@ -37,7 +37,7 @@ IcebergChunkSink::IcebergChunkSink(std::vector<std::string> partition_columns, s
_transform_exprs(std::move(transform_exprs)) {}
void IcebergChunkSink::callback_on_commit(const CommitResult& result) {
push_rollback_action(std::move(result.rollback_action));
_rollback_actions.push_back(std::move(result.rollback_action));
if (result.io_status.ok()) {
_state->update_num_rows_load_sink(result.file_statistics.record_count);
@ -81,9 +81,8 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> IcebergChunkSinkProvider::create_c
std::shared_ptr<ConnectorChunkSinkContext> context, int32_t driver_id) {
auto ctx = std::dynamic_pointer_cast<IcebergChunkSinkContext>(context);
auto runtime_state = ctx->fragment_context->runtime_state();
std::shared_ptr<FileSystem> fs = FileSystem::CreateUniqueFromString(ctx->path, FSOptions(&ctx->cloud_conf)).value();
auto column_evaluators = std::make_shared<std::vector<std::unique_ptr<ColumnEvaluator>>>(
ColumnEvaluator::clone(ctx->column_evaluators));
auto fs = FileSystem::CreateUniqueFromString(ctx->path, FSOptions(&ctx->cloud_conf)).value();
auto column_evaluators = ColumnEvaluator::clone(ctx->column_evaluators);
auto location_provider = std::make_shared<connector::LocationProvider>(
ctx->path, print_id(ctx->fragment_context->query_id()), runtime_state->be_number(), driver_id,
boost::to_lower_copy(ctx->format));
@ -94,8 +93,8 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> IcebergChunkSinkProvider::create_c
std::shared_ptr<formats::FileWriterFactory> file_writer_factory;
if (boost::iequals(ctx->format, formats::PARQUET)) {
file_writer_factory = std::make_shared<formats::ParquetFileWriterFactory>(
fs, ctx->compression_type, ctx->options, ctx->column_names, column_evaluators, ctx->parquet_field_ids,
ctx->executor, runtime_state);
std::move(fs), ctx->compression_type, ctx->options, ctx->column_names, std::move(column_evaluators),
ctx->parquet_field_ids, ctx->executor, runtime_state);
} else {
file_writer_factory = std::make_shared<formats::UnknownFileWriterFactory>(ctx->format);
}
@ -105,10 +104,8 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> IcebergChunkSinkProvider::create_c
auto partition_chunk_writer_ctx =
std::make_shared<SpillPartitionChunkWriterContext>(SpillPartitionChunkWriterContext{
{file_writer_factory, location_provider, ctx->max_file_size, partition_columns.empty()},
fs,
ctx->fragment_context,
runtime_state->desc_tbl().get_tuple_descriptor(ctx->tuple_desc_id),
column_evaluators,
ctx->sort_ordering});
partition_chunk_writer_factory = std::make_unique<SpillPartitionChunkWriterFactory>(partition_chunk_writer_ctx);
} else {
@ -124,14 +121,14 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> IcebergChunkSinkProvider::create_c
std::move(partition_chunk_writer_factory), runtime_state);
}
Status IcebergChunkSink::add(const ChunkPtr& chunk) {
Status IcebergChunkSink::add(Chunk* chunk) {
std::string partition = DEFAULT_PARTITION;
bool partitioned = !_partition_column_names.empty();
std::vector<int8_t> partition_field_null_list;
if (partitioned) {
ASSIGN_OR_RETURN(partition, HiveUtils::iceberg_make_partition_name(
_partition_column_names, _partition_column_evaluators,
dynamic_cast<IcebergChunkSink*>(this)->transform_expr(), chunk.get(),
dynamic_cast<IcebergChunkSink*>(this)->transform_expr(), chunk,
_support_null_partition, partition_field_null_list));
}

View File

@ -45,7 +45,7 @@ public:
const std::vector<std::string>& transform_expr() const { return _transform_exprs; }
Status add(const ChunkPtr& chunk) override;
Status add(Chunk* chunk) override;
private:
std::vector<std::string> _transform_exprs;

View File

@ -625,7 +625,7 @@ void LakeDataSource::init_counter(RuntimeState* state) {
ADD_CHILD_COUNTER(_runtime_profile, "ShortKeyRangeNumber", TUnit::UNIT, segment_init_name);
_column_iterator_init_timer = ADD_CHILD_TIMER(_runtime_profile, "ColumnIteratorInit", segment_init_name);
_bitmap_index_iterator_init_timer = ADD_CHILD_TIMER(_runtime_profile, "BitmapIndexIteratorInit", segment_init_name);
_zone_map_filter_timer = ADD_CHILD_TIMER(_runtime_profile, "ZoneMapIndexFilter", segment_init_name);
_zone_map_filter_timer = ADD_CHILD_TIMER(_runtime_profile, "ZoneMapIndexFiter", segment_init_name);
_rows_key_range_filter_timer = ADD_CHILD_TIMER(_runtime_profile, "ShortKeyFilter", segment_init_name);
_bf_filter_timer = ADD_CHILD_TIMER(_runtime_profile, "BloomFilterFilter", segment_init_name);

View File

@ -23,7 +23,6 @@
#include "formats/file_writer.h"
#include "runtime/runtime_state.h"
#include "storage/chunk_helper.h"
#include "storage/convert_helper.h"
#include "storage/load_spill_block_manager.h"
#include "storage/storage_engine.h"
#include "storage/types.h"
@ -65,19 +64,18 @@ void PartitionChunkWriter::commit_file() {
_file_writer = nullptr;
VLOG(3) << "commit to remote file, filename: " << _out_stream->filename()
<< ", size: " << result.file_statistics.file_size;
_out_stream = nullptr;
}
Status BufferPartitionChunkWriter::init() {
return Status::OK();
}
Status BufferPartitionChunkWriter::write(const ChunkPtr& chunk) {
if (_file_writer && _file_writer->get_written_bytes() >= _max_file_size) {
Status BufferPartitionChunkWriter::write(Chunk* chunk) {
RETURN_IF_ERROR(create_file_writer_if_needed());
if (_file_writer->get_written_bytes() >= _max_file_size) {
commit_file();
}
RETURN_IF_ERROR(create_file_writer_if_needed());
return _file_writer->write(chunk.get());
return _file_writer->write(chunk);
}
Status BufferPartitionChunkWriter::flush() {
@ -85,10 +83,6 @@ Status BufferPartitionChunkWriter::flush() {
return Status::OK();
}
Status BufferPartitionChunkWriter::wait_flush() {
return Status::OK();
}
Status BufferPartitionChunkWriter::finish() {
commit_file();
return Status::OK();
@ -98,15 +92,11 @@ SpillPartitionChunkWriter::SpillPartitionChunkWriter(std::string partition,
std::vector<int8_t> partition_field_null_list,
const std::shared_ptr<SpillPartitionChunkWriterContext>& ctx)
: PartitionChunkWriter(std::move(partition), std::move(partition_field_null_list), ctx),
_fs(ctx->fs),
_fragment_context(ctx->fragment_context),
_column_evaluators(ctx->column_evaluators),
_sort_ordering(ctx->sort_ordering) {
_chunk_spill_token = ExecEnv::GetInstance()->connector_sink_spill_executor()->create_token();
_block_merge_token = StorageEngine::instance()->load_spill_block_merge_executor()->create_token();
_tuple_desc = ctx->tuple_desc;
_writer_id = generate_uuid();
_spill_mode = _sort_ordering != nullptr;
}
SpillPartitionChunkWriter::~SpillPartitionChunkWriter() {
@ -119,22 +109,19 @@ SpillPartitionChunkWriter::~SpillPartitionChunkWriter() {
}
Status SpillPartitionChunkWriter::init() {
std::string root_location = _location_provider->root_location();
_load_spill_block_mgr =
std::make_unique<LoadSpillBlockManager>(_fragment_context->query_id(), _writer_id, root_location, _fs);
std::string root_location =
_is_default_partition ? _location_provider->root_location() : _location_provider->root_location(_partition);
_load_spill_block_mgr = std::make_unique<LoadSpillBlockManager>(
_fragment_context->query_id(), _fragment_context->fragment_instance_id(), root_location);
RETURN_IF_ERROR(_load_spill_block_mgr->init());
_load_chunk_spiller = std::make_unique<LoadChunkSpiller>(_load_spill_block_mgr.get(),
_fragment_context->runtime_state()->runtime_profile());
return Status::OK();
}
Status SpillPartitionChunkWriter::write(const ChunkPtr& chunk) {
Status SpillPartitionChunkWriter::write(Chunk* chunk) {
RETURN_IF_ERROR(create_file_writer_if_needed());
if (!_spill_mode) {
return _write_chunk(chunk.get());
}
_chunks.push_back(chunk);
_chunks.push_back(chunk->clone_unique());
_chunk_bytes_usage += chunk->bytes_usage();
if (!_base_chunk) {
_base_chunk = _chunks.back();
@ -154,25 +141,15 @@ Status SpillPartitionChunkWriter::write(const ChunkPtr& chunk) {
Status SpillPartitionChunkWriter::flush() {
RETURN_IF(!_file_writer, Status::OK());
// Change to spill mode if memory is insufficent.
if (!_spill_mode) {
_spill_mode = true;
commit_file();
return Status::OK();
}
return _spill();
}
Status SpillPartitionChunkWriter::wait_flush() {
_chunk_spill_token->wait();
return Status::OK();
}
Status SpillPartitionChunkWriter::finish() {
_chunk_spill_token->wait();
// If no chunks have been spilled, flush data to remote file directly.
if (_load_chunk_spiller->empty()) {
VLOG(2) << "flush to remote directly when finish, query_id: " << print_id(_fragment_context->query_id())
<< ", writer_id: " << print_id(_writer_id);
<< ", fragment_instance_id: " << print_id(_fragment_context->fragment_instance_id());
RETURN_IF_ERROR(_flush_to_file());
commit_file();
return Status::OK();
@ -180,12 +157,11 @@ Status SpillPartitionChunkWriter::finish() {
auto cb = [this](const Status& st) {
LOG_IF(ERROR, !st.ok()) << "fail to merge spill blocks, query_id: " << print_id(_fragment_context->query_id())
<< ", writer_id: " << print_id(_writer_id);
<< ", fragment_instance_id: " << print_id(_fragment_context->fragment_instance_id());
_handle_err(st);
commit_file();
};
auto merge_task = std::make_shared<MergeBlockTask>(this, _fragment_context->runtime_state()->instance_mem_tracker(),
std::move(cb));
auto merge_task = std::make_shared<MergeBlockTask>(this, cb);
return _block_merge_token->submit(merge_task);
}
@ -198,14 +174,12 @@ bool SpillPartitionChunkWriter::is_finished() {
}
Status SpillPartitionChunkWriter::merge_blocks() {
RETURN_IF_ERROR(flush());
_chunk_spill_token->wait();
auto write_func = [this](Chunk* chunk) { return _flush_chunk(chunk, false); };
auto flush_func = [this]() {
// Commit file after each merge function to ensure the data written to one file is ordered,
// because data generated by different merge function may be unordered.
if (_sort_ordering) {
commit_file();
}
auto flush_func = []() {
// do nothing because we check and commit when writing chunk.
return Status::OK();
};
Status st = _load_chunk_spiller->merge_write(_max_file_size, _sort_ordering != nullptr, false /* do_agg */,
@ -235,7 +209,7 @@ Status SpillPartitionChunkWriter::_sort() {
Status SpillPartitionChunkWriter::_spill() {
RETURN_IF(_chunks.empty(), Status::OK());
RETURN_IF_ERROR(_merge_chunks());
_merge_chunks();
if (_sort_ordering) {
RETURN_IF_ERROR(_sort());
}
@ -247,19 +221,14 @@ Status SpillPartitionChunkWriter::_spill() {
Status st = _flush_chunk(chunk.get(), true);
_handle_err(st);
} else {
VLOG(3) << "spill chunk data, filename: " << out_stream()->filename() << ", size: " << chunk->bytes_usage()
<< ", rows: " << chunk->num_rows() << ", partition: " << _partition
<< ", writer_id: " << _writer_id;
VLOG(3) << "spill chunk data, filename: " << out_stream()->filename() << ", size: " << chunk->bytes_usage();
}
_spilling_bytes_usage.fetch_sub(chunk->bytes_usage(), std::memory_order_relaxed);
};
auto spill_task = std::make_shared<ChunkSpillTask>(_load_chunk_spiller.get(), _result_chunk,
_fragment_context->runtime_state()->instance_mem_tracker(),
std::move(callback));
auto spill_task = std::make_shared<ChunkSpillTask>(_load_chunk_spiller.get(), _result_chunk, callback);
RETURN_IF_ERROR(_chunk_spill_token->submit(spill_task));
_spilling_bytes_usage.fetch_add(_result_chunk->bytes_usage(), std::memory_order_relaxed);
_chunk_bytes_usage = 0;
_result_chunk.reset();
return Status::OK();
}
@ -271,10 +240,9 @@ Status SpillPartitionChunkWriter::_flush_to_file() {
RETURN_IF_ERROR(_flush_chunk(chunk.get(), false));
}
} else {
RETURN_IF_ERROR(_merge_chunks());
_merge_chunks();
RETURN_IF_ERROR(_sort());
RETURN_IF_ERROR(_flush_chunk(_result_chunk.get(), true));
commit_file();
}
_chunks.clear();
_chunk_bytes_usage = 0;
@ -286,7 +254,7 @@ Status SpillPartitionChunkWriter::_flush_chunk(Chunk* chunk, bool split) {
if (chunk->get_slot_id_to_index_map().empty()) {
auto& slot_map = _base_chunk->get_slot_id_to_index_map();
for (auto& it : slot_map) {
chunk->set_slot_id_to_index(it.first, _col_index_map[it.second]);
chunk->set_slot_id_to_index(it.first, it.second);
}
}
@ -304,7 +272,7 @@ Status SpillPartitionChunkWriter::_flush_chunk(Chunk* chunk, bool split) {
}
Status SpillPartitionChunkWriter::_write_chunk(Chunk* chunk) {
if (!_sort_ordering && _file_writer->get_written_bytes() >= _max_file_size) {
if (_file_writer->get_written_bytes() >= _max_file_size) {
commit_file();
}
RETURN_IF_ERROR(create_file_writer_if_needed());
@ -312,9 +280,9 @@ Status SpillPartitionChunkWriter::_write_chunk(Chunk* chunk) {
return Status::OK();
}
Status SpillPartitionChunkWriter::_merge_chunks() {
void SpillPartitionChunkWriter::_merge_chunks() {
if (_chunks.empty()) {
return Status::OK();
return;
}
// Create a target chunk with schema to make it can use some
@ -323,35 +291,11 @@ Status SpillPartitionChunkWriter::_merge_chunks() {
[](int sum, const ChunkPtr& chunk) { return sum + chunk->num_rows(); });
_result_chunk = _create_schema_chunk(_chunks.front(), num_rows);
std::unordered_map<Column*, size_t> col_ptr_index_map;
auto& columns = _chunks.front()->columns();
for (size_t i = 0; i < columns.size(); ++i) {
col_ptr_index_map[columns[i]->get_ptr()] = i;
}
for (auto& chunk : _chunks) {
for (size_t i = 0; i < _result_chunk->num_columns(); ++i) {
auto* dst_col = _result_chunk->get_column_by_index(i).get();
ColumnPtr src_col;
if (_column_evaluators) {
ASSIGN_OR_RETURN(src_col, (*_column_evaluators)[i]->evaluate(chunk.get()));
} else {
src_col = chunk->get_column_by_index(i);
}
dst_col->append(*src_col);
if (chunk == _chunks.front()) {
auto it = col_ptr_index_map.find(src_col.get());
if (it != col_ptr_index_map.end()) {
_col_index_map[it->second] = i;
} else {
return Status::InternalError("unknown column index: " + std::to_string(i));
}
}
}
_result_chunk->append(*chunk, 0, chunk->num_rows());
chunk.reset();
}
_chunks.clear();
return Status::OK();
}
bool SpillPartitionChunkWriter::_mem_insufficent() {

View File

@ -14,15 +14,19 @@
#pragma once
#include <fmt/format.h>
#include <map>
#include "column/chunk.h"
#include "common/status.h"
#include "connector/utils.h"
#include "formats/file_writer.h"
#include "fs/fs.h"
#include "runtime/exec_env.h"
#include "runtime/runtime_state.h"
#include "storage/load_chunk_spiller.h"
#include "util/threadpool.h"
#include "util/uid_util.h"
namespace starrocks::connector {
@ -47,10 +51,8 @@ struct PartitionChunkWriterContext {
struct BufferPartitionChunkWriterContext : public PartitionChunkWriterContext {};
struct SpillPartitionChunkWriterContext : public PartitionChunkWriterContext {
std::shared_ptr<FileSystem> fs;
pipeline::FragmentContext* fragment_context = nullptr;
TupleDescriptor* tuple_desc = nullptr;
std::shared_ptr<std::vector<std::unique_ptr<ColumnEvaluator>>> column_evaluators;
std::shared_ptr<SortOrdering> sort_ordering;
};
@ -63,12 +65,10 @@ public:
virtual Status init() = 0;
virtual Status write(const ChunkPtr& chunk) = 0;
virtual Status write(Chunk* chunk) = 0;
virtual Status flush() = 0;
virtual Status wait_flush() = 0;
virtual Status finish() = 0;
virtual bool is_finished() = 0;
@ -120,12 +120,10 @@ public:
Status init() override;
Status write(const ChunkPtr& chunk) override;
Status write(Chunk* chunk) override;
Status flush() override;
Status wait_flush() override;
Status finish() override;
bool is_finished() override { return true; }
@ -144,12 +142,10 @@ public:
Status init() override;
Status write(const ChunkPtr& chunk) override;
Status write(Chunk* chunk) override;
Status flush() override;
Status wait_flush() override;
Status finish() override;
bool is_finished() override;
@ -162,12 +158,7 @@ public:
_file_writer->get_written_bytes();
}
int64_t get_flushable_bytes() override {
if (!_spill_mode) {
return _file_writer ? _file_writer->get_written_bytes() : 0;
}
return _chunk_bytes_usage;
}
int64_t get_flushable_bytes() override { return _chunk_bytes_usage; }
Status merge_blocks();
@ -182,7 +173,7 @@ private:
Status _write_chunk(Chunk* chunk);
Status _merge_chunks();
void _merge_chunks();
SchemaPtr _make_schema();
@ -193,16 +184,13 @@ private:
void _handle_err(const Status& st);
private:
std::shared_ptr<FileSystem> _fs = nullptr;
pipeline::FragmentContext* _fragment_context = nullptr;
TupleDescriptor* _tuple_desc = nullptr;
std::shared_ptr<std::vector<std::unique_ptr<ColumnEvaluator>>> _column_evaluators;
std::shared_ptr<SortOrdering> _sort_ordering;
std::unique_ptr<ThreadPoolToken> _chunk_spill_token;
std::unique_ptr<ThreadPoolToken> _block_merge_token;
std::unique_ptr<LoadSpillBlockManager> _load_spill_block_mgr;
std::shared_ptr<LoadChunkSpiller> _load_chunk_spiller;
TUniqueId _writer_id;
std::list<ChunkPtr> _chunks;
int64_t _chunk_bytes_usage = 0;
@ -210,8 +198,6 @@ private:
ChunkPtr _result_chunk;
ChunkPtr _base_chunk;
SchemaPtr _schema;
std::unordered_map<int, int> _col_index_map; // result chunk index -> chunk index
bool _spill_mode = false;
static const int64_t kWaitMilliseconds;
};

View File

@ -50,10 +50,8 @@ bool SinkOperatorMemoryManager::kill_victim() {
// The flush will decrease the writer flushable memory bytes, so it usually
// will not be choosed in a short time.
const auto filename = victim->out_stream()->filename();
size_t flush_bytes = victim->get_flushable_bytes();
const auto result = victim->flush();
LOG(INFO) << "kill victim: " << filename << ", result: " << result << ", flushable_bytes: " << flush_bytes;
auto result = victim->flush();
LOG(INFO) << "kill victim: " << victim->out_stream()->filename() << ", result: " << result;
return true;
}
@ -119,13 +117,13 @@ bool SinkMemoryManager::_apply_on_mem_tracker(SinkOperatorMemoryManager* child_m
auto available_memory = [&]() { return mem_tracker->limit() - mem_tracker->consumption(); };
auto low_watermark = static_cast<int64_t>(mem_tracker->limit() * _low_watermark_ratio);
int64_t flush_watermark = mem_tracker->limit() * _urgent_space_ratio;
int64_t flush_watermark = _query_tracker->limit() * _urgent_space_ratio;
while (available_memory() <= low_watermark) {
child_manager->update_writer_occupied_memory();
int64_t total_occupied_memory = _total_writer_occupied_memory();
LOG_EVERY_SECOND(INFO) << "consumption: " << mem_tracker->consumption()
<< ", total_occupied_memory: " << total_occupied_memory
<< ", flush_watermark: " << flush_watermark;
LOG_EVERY_SECOND(WARNING) << "consumption: " << mem_tracker->consumption()
<< ", writer_allocated_memory: " << total_occupied_memory
<< ", flush_watermark: " << flush_watermark;
if (total_occupied_memory < flush_watermark) {
break;
}
@ -135,14 +133,7 @@ bool SinkMemoryManager::_apply_on_mem_tracker(SinkOperatorMemoryManager* child_m
}
}
child_manager->update_releasable_memory();
if (available_memory() <= low_watermark && _total_releasable_memory() > 0) {
LOG_EVERY_SECOND(WARNING) << "memory usage is still high after flush, : available_memory" << available_memory()
<< ", memory_low_watermark: " << low_watermark
<< ", total_releasable_memory: " << _total_releasable_memory();
return false;
}
return true;
return available_memory() > low_watermark;
}
} // namespace starrocks::connector

View File

@ -99,15 +99,6 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) {
}
}
if (tnode.hash_join_node.__isset.asof_join_condition) {
auto asof_join_condition = tnode.hash_join_node.asof_join_condition;
RETURN_IF_ERROR(
Expr::create_expr_tree(_pool, asof_join_condition.left, &_asof_join_condition_probe_expr_ctx, state));
RETURN_IF_ERROR(
Expr::create_expr_tree(_pool, asof_join_condition.right, &_asof_join_condition_build_expr_ctx, state));
_asof_join_condition_op = tnode.hash_join_node.asof_join_condition.opcode;
}
if (tnode.hash_join_node.__isset.partition_exprs) {
// the same column can appear more than once in either lateral side of eq_join_conjuncts, but multiple
// occurrences are accounted for once when determining local shuffle partition_exprs for bucket shuffle join.
@ -205,13 +196,6 @@ Status HashJoinNode::prepare(RuntimeState* state) {
RETURN_IF_ERROR(Expr::prepare(_build_expr_ctxs, state));
RETURN_IF_ERROR(Expr::prepare(_probe_expr_ctxs, state));
RETURN_IF_ERROR(Expr::prepare(_other_join_conjunct_ctxs, state));
if (_asof_join_condition_build_expr_ctx != nullptr) {
RETURN_IF_ERROR(_asof_join_condition_build_expr_ctx->prepare(state));
}
if (_asof_join_condition_probe_expr_ctx != nullptr) {
RETURN_IF_ERROR(_asof_join_condition_probe_expr_ctx->prepare(state));
}
HashTableParam param;
_init_hash_table_param(&param, state);
@ -250,22 +234,6 @@ void HashJoinNode::_init_hash_table_param(HashTableParam* param, RuntimeState* r
expr_context->root()->get_slot_ids(&expr_slots);
predicate_slots.insert(expr_slots.begin(), expr_slots.end());
}
if (_asof_join_condition_build_expr_ctx && _asof_join_condition_probe_expr_ctx) {
std::vector<SlotId> build_slots, probe_slots;
_asof_join_condition_probe_expr_ctx->root()->get_slot_ids(&probe_slots);
_asof_join_condition_build_expr_ctx->root()->get_slot_ids(&build_slots);
DCHECK_EQ(probe_slots.size(), 1);
DCHECK_EQ(build_slots.size(), 1);
LogicalType probe_type = _asof_join_condition_probe_expr_ctx->root()->type().type;
LogicalType build_type = _asof_join_condition_build_expr_ctx->root()->type().type;
SlotId build_slot = build_slots[0], probe_slot = probe_slots[0];
param->asof_join_condition_desc = {probe_slot, probe_type, build_slot, build_type, _asof_join_condition_op};
predicate_slots.insert({build_slot, probe_slot});
}
param->predicate_slots = std::move(predicate_slots);
for (auto i = 0; i < _build_expr_ctxs.size(); i++) {
@ -287,13 +255,6 @@ Status HashJoinNode::open(RuntimeState* state) {
RETURN_IF_ERROR(Expr::open(_build_expr_ctxs, state));
RETURN_IF_ERROR(Expr::open(_probe_expr_ctxs, state));
RETURN_IF_ERROR(Expr::open(_other_join_conjunct_ctxs, state));
if (_asof_join_condition_build_expr_ctx != nullptr) {
RETURN_IF_ERROR(_asof_join_condition_build_expr_ctx->open(state));
}
if (_asof_join_condition_probe_expr_ctx != nullptr) {
RETURN_IF_ERROR(_asof_join_condition_probe_expr_ctx->open(state));
}
{
build_timer.stop();
@ -484,12 +445,6 @@ void HashJoinNode::close(RuntimeState* state) {
Expr::close(_build_expr_ctxs, state);
Expr::close(_probe_expr_ctxs, state);
Expr::close(_other_join_conjunct_ctxs, state);
if (_asof_join_condition_build_expr_ctx != nullptr) {
_asof_join_condition_build_expr_ctx->close(state);
}
if (_asof_join_condition_probe_expr_ctx != nullptr) {
_asof_join_condition_probe_expr_ctx->close(state);
}
_ht.close();
@ -537,9 +492,7 @@ pipeline::OpFactories HashJoinNode::_decompose_to_pipeline(pipeline::PipelineBui
_other_join_conjunct_ctxs, _conjunct_ctxs, child(1)->row_desc(), child(0)->row_desc(),
child(1)->type(), child(0)->type(), child(1)->conjunct_ctxs().empty(), _build_runtime_filters,
_output_slots, _output_slots, context->degree_of_parallelism(), _distribution_mode,
_enable_late_materialization, _enable_partition_hash_join, _is_skew_join, _common_expr_ctxs,
_asof_join_condition_op, _asof_join_condition_probe_expr_ctx,
_asof_join_condition_build_expr_ctx);
_enable_late_materialization, _enable_partition_hash_join, _is_skew_join, _common_expr_ctxs);
auto hash_joiner_factory = std::make_shared<starrocks::pipeline::HashJoinerFactory>(param);
// Create a shared RefCountedRuntimeFilterCollector
@ -629,12 +582,10 @@ pipeline::OpFactories HashJoinNode::_decompose_to_pipeline(pipeline::PipelineBui
}
// Use ChunkAccumulateOperator, when any following condition occurs:
// - not left/asof left outer join,
// - not left outer join,
// - left outer join, with conjuncts or runtime filters.
bool need_accumulate_chunk =
(_join_type != TJoinOp::LEFT_OUTER_JOIN && _join_type != TJoinOp::ASOF_LEFT_OUTER_JOIN) ||
!_conjunct_ctxs.empty() || !_other_join_conjunct_ctxs.empty() ||
lhs_operators.back()->has_runtime_filters();
bool need_accumulate_chunk = _join_type != TJoinOp::LEFT_OUTER_JOIN || !_conjunct_ctxs.empty() ||
!_other_join_conjunct_ctxs.empty() || lhs_operators.back()->has_runtime_filters();
if (need_accumulate_chunk) {
may_add_chunk_accumulate_operator(lhs_operators, context, id());
}
@ -955,7 +906,6 @@ Status HashJoinNode::_process_other_conjunct(ChunkPtr* chunk) {
switch (_join_type) {
case TJoinOp::LEFT_OUTER_JOIN:
case TJoinOp::FULL_OUTER_JOIN:
case TJoinOp::ASOF_LEFT_OUTER_JOIN:
return _process_outer_join_with_other_conjunct(chunk, _output_probe_column_count, _output_build_column_count);
case TJoinOp::RIGHT_OUTER_JOIN:
case TJoinOp::LEFT_SEMI_JOIN:

View File

@ -113,10 +113,6 @@ private:
TJoinDistributionMode::type _distribution_mode = TJoinDistributionMode::NONE;
std::set<SlotId> _output_slots;
ExprContext* _asof_join_condition_build_expr_ctx = nullptr;
ExprContext* _asof_join_condition_probe_expr_ctx = nullptr;
TExprOpcode::type _asof_join_condition_op = TExprOpcode::INVALID_OPCODE;
bool _is_push_down = false;
bool _enable_late_materialization = false;

View File

@ -85,10 +85,7 @@ HashJoiner::HashJoiner(const HashJoinerParam& param)
_build_runtime_filters(param._build_runtime_filters.begin(), param._build_runtime_filters.end()),
_enable_late_materialization(param._enable_late_materialization),
_max_dop(param._max_dop),
_is_skew_join(param._is_skew_join),
_asof_join_condition_op(param._asof_join_condition_op),
_asof_join_condition_probe_expr_ctx(param._asof_join_condition_probe_expr_ctx),
_asof_join_condition_build_expr_ctx(param._asof_join_condition_build_expr_ctx) {
_is_skew_join(param._is_skew_join) {
_is_push_down = param._hash_join_node.is_push_down;
if (_join_type == TJoinOp::LEFT_ANTI_JOIN && param._hash_join_node.is_rewritten_from_not_in) {
_join_type = TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN;
@ -178,22 +175,6 @@ void HashJoiner::_init_hash_table_param(HashTableParam* param, RuntimeState* sta
expr_context->root()->get_slot_ids(&expr_slots);
predicate_slots.insert(expr_slots.begin(), expr_slots.end());
}
if (_asof_join_condition_build_expr_ctx && _asof_join_condition_probe_expr_ctx) {
std::vector<SlotId> build_slots, probe_slots;
_asof_join_condition_probe_expr_ctx->root()->get_slot_ids(&probe_slots);
_asof_join_condition_build_expr_ctx->root()->get_slot_ids(&build_slots);
DCHECK_EQ(probe_slots.size(), 1);
DCHECK_EQ(build_slots.size(), 1);
LogicalType probe_type = _asof_join_condition_probe_expr_ctx->root()->type().type;
LogicalType build_type = _asof_join_condition_build_expr_ctx->root()->type().type;
SlotId build_slot = build_slots[0], probe_slot = probe_slots[0];
param->asof_join_condition_desc = {probe_slot, probe_type, build_slot, build_type, _asof_join_condition_op};
predicate_slots.insert({build_slot, probe_slot});
}
param->predicate_slots = std::move(predicate_slots);
for (auto i = 0; i < _build_expr_ctxs.size(); i++) {
@ -205,7 +186,6 @@ void HashJoiner::_init_hash_table_param(HashTableParam* param, RuntimeState* sta
}
}
}
Status HashJoiner::append_chunk_to_ht(RuntimeState* state, const ChunkPtr& chunk) {
if (_phase != HashJoinPhase::BUILD) {
return Status::OK();
@ -526,7 +506,6 @@ Status HashJoiner::_process_other_conjunct(ChunkPtr* chunk, JoinHashTable& hash_
switch (_join_type) {
case TJoinOp::LEFT_OUTER_JOIN:
case TJoinOp::FULL_OUTER_JOIN:
case TJoinOp::ASOF_LEFT_OUTER_JOIN:
return _process_outer_join_with_other_conjunct(chunk, _output_probe_column_count, _output_build_column_count,
hash_table);
case TJoinOp::RIGHT_OUTER_JOIN:
@ -556,13 +535,7 @@ Status HashJoiner::_create_runtime_in_filters(RuntimeState* state) {
SCOPED_TIMER(build_metrics().build_runtime_filter_timer);
size_t ht_row_count = get_ht_row_count();
// Use FE session variable if set, otherwise fall back to BE config
size_t max_conditions = config::max_pushdown_conditions_per_column;
if (state->query_options().__isset.max_pushdown_conditions_per_column) {
max_conditions = state->query_options().max_pushdown_conditions_per_column;
}
if (ht_row_count > max_conditions) {
if (ht_row_count > config::max_pushdown_conditions_per_column) {
return Status::OK();
}

View File

@ -73,8 +73,7 @@ struct HashJoinerParam {
std::set<SlotId> build_output_slots, std::set<SlotId> probe_output_slots, size_t max_dop,
const TJoinDistributionMode::type distribution_mode, bool enable_late_materialization,
bool enable_partition_hash_join, bool is_skew_join,
const std::map<SlotId, ExprContext*>& common_expr_ctxs, TExprOpcode::type asof_join_condition_op,
ExprContext* asof_join_condition_probe_expr_ctx, ExprContext* asof_join_condition_build_expr_ctx)
const std::map<SlotId, ExprContext*>& common_expr_ctxs)
: _pool(pool),
_hash_join_node(hash_join_node),
_is_null_safes(std::move(is_null_safes)),
@ -95,10 +94,7 @@ struct HashJoinerParam {
_enable_late_materialization(enable_late_materialization),
_enable_partition_hash_join(enable_partition_hash_join),
_is_skew_join(is_skew_join),
_common_expr_ctxs(common_expr_ctxs),
_asof_join_condition_op(asof_join_condition_op),
_asof_join_condition_probe_expr_ctx(asof_join_condition_probe_expr_ctx),
_asof_join_condition_build_expr_ctx(asof_join_condition_build_expr_ctx) {}
_common_expr_ctxs(common_expr_ctxs) {}
HashJoinerParam(HashJoinerParam&&) = default;
HashJoinerParam(HashJoinerParam&) = default;
@ -127,9 +123,6 @@ struct HashJoinerParam {
const bool _enable_partition_hash_join;
const bool _is_skew_join;
const std::map<SlotId, ExprContext*> _common_expr_ctxs;
TExprOpcode::type _asof_join_condition_op;
ExprContext* _asof_join_condition_probe_expr_ctx;
ExprContext* _asof_join_condition_build_expr_ctx;
};
inline bool could_short_circuit(TJoinOp::type join_type) {
@ -503,10 +496,6 @@ private:
size_t _max_dop = 0;
bool _is_skew_join = false;
TExprOpcode::type _asof_join_condition_op = TExprOpcode::INVALID_OPCODE;
ExprContext* _asof_join_condition_probe_expr_ctx = nullptr;
ExprContext* _asof_join_condition_build_expr_ctx = nullptr;
};
} // namespace starrocks

View File

@ -14,7 +14,7 @@
#include "exec/hdfs_scanner/hdfs_scanner.h"
#include "cache/disk_cache/block_cache_hit_rate_counter.hpp"
#include "cache/block_cache/block_cache_hit_rate_counter.hpp"
#include "column/column_helper.h"
#include "column/type_traits.h"
#include "connector/deletion_vector/deletion_vector.h"

View File

@ -17,7 +17,6 @@
#include <atomic>
#include <boost/algorithm/string.hpp>
#include "cache/cache_options.h"
#include "connector/deletion_vector/deletion_bitmap.h"
#include "exec/olap_scan_prepare.h"
#include "exec/pipeline/scan/morsel.h"

View File

@ -52,10 +52,6 @@ private:
template <LogicalType LT>
static std::pair<bool, JoinHashMapMethodUnaryType> _try_use_linear_chained(RuntimeState* state,
JoinHashTableItems* table_items);
// Helper method to get fallback hash map method type based on join type
template <LogicalType LT>
static std::pair<bool, JoinHashMapMethodUnaryType> _get_fallback_method(bool is_asof_join_type);
};
std::tuple<JoinKeyConstructorUnaryType, JoinHashMapMethodUnaryType>
@ -165,25 +161,16 @@ JoinHashMapMethodUnaryType JoinHashMapSelector::_determine_hash_map_method(
return hash_map_type;
}
return _get_fallback_method<LT>(is_asof_join(table_items->join_type)).second;
return JoinHashMapMethodTypeTraits<JoinHashMapMethodType::BUCKET_CHAINED, LT>::unary_type;
}
});
}
template <LogicalType LT>
std::pair<bool, JoinHashMapMethodUnaryType> JoinHashMapSelector::_get_fallback_method(bool is_asof_join_type) {
return {false, is_asof_join_type
? JoinHashMapMethodTypeTraits<JoinHashMapMethodType::LINEAR_CHAINED_ASOF, LT>::unary_type
: JoinHashMapMethodTypeTraits<JoinHashMapMethodType::BUCKET_CHAINED, LT>::unary_type};
}
template <LogicalType LT>
std::pair<bool, JoinHashMapMethodUnaryType> JoinHashMapSelector::_try_use_range_direct_mapping(
RuntimeState* state, JoinHashTableItems* table_items) {
bool is_asof_join_type = is_asof_join(table_items->join_type);
if (!state->enable_hash_join_range_direct_mapping_opt()) {
return _get_fallback_method<LT>(is_asof_join_type);
return {false, JoinHashMapMethodUnaryType::BUCKET_CHAINED_INT};
}
using KeyConstructor = typename JoinKeyConstructorTypeTraits<JoinKeyConstructorType::ONE_KEY, LT>::BuildType;
@ -194,12 +181,12 @@ std::pair<bool, JoinHashMapMethodUnaryType> JoinHashMapSelector::_try_use_range_
// `max_value - min_value + 1` will be overflow.
if (min_value == std::numeric_limits<int64_t>::min() && max_value == std::numeric_limits<int64_t>::max()) {
return _get_fallback_method<LT>(is_asof_join_type);
return {false, JoinHashMapMethodUnaryType::BUCKET_CHAINED_INT};
}
const uint64_t value_interval = static_cast<uint64_t>(max_value) - min_value + 1;
if (value_interval >= std::numeric_limits<uint32_t>::max()) {
return _get_fallback_method<LT>(is_asof_join_type);
return {false, JoinHashMapMethodUnaryType::BUCKET_CHAINED_INT};
}
table_items->min_value = min_value;
@ -239,20 +226,19 @@ std::pair<bool, JoinHashMapMethodUnaryType> JoinHashMapSelector::_try_use_range_
}
}
return _get_fallback_method<LT>(is_asof_join_type);
return {false, JoinHashMapMethodUnaryType::BUCKET_CHAINED_INT};
}
template <LogicalType LT>
std::pair<bool, JoinHashMapMethodUnaryType> JoinHashMapSelector::_try_use_linear_chained(
RuntimeState* state, JoinHashTableItems* table_items) {
bool is_asof_join_type = is_asof_join(table_items->join_type);
if (!state->enable_hash_join_linear_chained_opt()) {
return _get_fallback_method<LT>(is_asof_join_type);
return {false, JoinHashMapMethodTypeTraits<JoinHashMapMethodType::BUCKET_CHAINED, LT>::unary_type};
}
const uint64_t bucket_size = JoinHashMapHelper::calc_bucket_size(table_items->row_count + 1);
if (bucket_size > LinearChainedJoinHashMap<LT>::max_supported_bucket_size()) {
return _get_fallback_method<LT>(is_asof_join_type);
return {false, JoinHashMapMethodTypeTraits<JoinHashMapMethodType::BUCKET_CHAINED, LT>::unary_type};
}
const bool is_left_anti_join_without_other_conjunct =
@ -361,7 +347,6 @@ void JoinHashTable::create(const HashTableParam& param) {
_table_items->with_other_conjunct = param.with_other_conjunct;
_table_items->join_type = param.join_type;
_table_items->enable_late_materialization = param.enable_late_materialization;
_table_items->asof_join_condition_desc = param.asof_join_condition_desc;
if (_table_items->join_type == TJoinOp::RIGHT_SEMI_JOIN || _table_items->join_type == TJoinOp::RIGHT_ANTI_JOIN ||
_table_items->join_type == TJoinOp::RIGHT_OUTER_JOIN) {
@ -369,21 +354,12 @@ void JoinHashTable::create(const HashTableParam& param) {
} else if (_table_items->join_type == TJoinOp::LEFT_SEMI_JOIN ||
_table_items->join_type == TJoinOp::LEFT_ANTI_JOIN ||
_table_items->join_type == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
_table_items->join_type == TJoinOp::LEFT_OUTER_JOIN ||
_table_items->join_type == TJoinOp::ASOF_LEFT_OUTER_JOIN) {
_table_items->join_type == TJoinOp::LEFT_OUTER_JOIN) {
_table_items->right_to_nullable = true;
} else if (_table_items->join_type == TJoinOp::FULL_OUTER_JOIN) {
_table_items->left_to_nullable = true;
_table_items->right_to_nullable = true;
}
if (is_asof_join(_table_items->join_type)) {
auto variant_index = get_asof_variant_index(_table_items->asof_join_condition_desc.build_logical_type,
_table_items->asof_join_condition_desc.condition_op);
DCHECK_LT(variant_index, 12) << "Invalid variant index";
_table_items->asof_index_vector = create_asof_index_vector(variant_index);
}
_table_items->join_keys = param.join_keys;
_init_probe_column(param);
@ -718,7 +694,6 @@ void JoinHashTable::remove_duplicate_index(Filter* filter) {
if (_is_empty_map) {
switch (_table_items->join_type) {
case TJoinOp::LEFT_OUTER_JOIN:
case TJoinOp::ASOF_LEFT_OUTER_JOIN:
case TJoinOp::LEFT_ANTI_JOIN:
case TJoinOp::FULL_OUTER_JOIN:
case TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN: {
@ -737,7 +712,6 @@ void JoinHashTable::remove_duplicate_index(Filter* filter) {
DCHECK_LT(0, _table_items->row_count);
switch (_table_items->join_type) {
case TJoinOp::LEFT_OUTER_JOIN:
case TJoinOp::ASOF_LEFT_OUTER_JOIN:
_remove_duplicate_index_for_left_outer_join(filter);
break;
case TJoinOp::LEFT_SEMI_JOIN:

View File

@ -52,8 +52,7 @@ public:
case TJoinOp::FULL_OUTER_JOIN:
case TJoinOp::LEFT_ANTI_JOIN:
case TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN:
case TJoinOp::LEFT_OUTER_JOIN:
case TJoinOp::ASOF_LEFT_OUTER_JOIN: {
case TJoinOp::LEFT_OUTER_JOIN: {
_probe_state->count = (*probe_chunk)->num_rows();
_probe_output<false>(probe_chunk, chunk);
_build_output<false>(chunk);
@ -321,21 +320,6 @@ private:
void _probe_from_ht_for_left_outer_left_anti_full_outer_join_with_other_conjunct(
RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data);
// for AsOf inner join
template <bool first_probe, bool is_collision_free_and_unique>
void _probe_from_ht_for_asof_inner_join(RuntimeState* state, const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
// for asof left outer join
template <bool first_probe, bool is_collision_free_and_unique>
void _probe_from_ht_for_asof_left_outer_join(RuntimeState* state, const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
// for asof left outer join with other conjunct
template <bool first_probe, bool is_collision_free_and_unique>
void _probe_from_ht_for_asof_left_outer_join_with_other_conjunct(RuntimeState* state,
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
JoinHashTableItems* _table_items = nullptr;
HashTableProbeState* _probe_state = nullptr;
};
@ -454,7 +438,6 @@ private:
JoinHashMapForNonSmallKey(BUCKET_CHAINED), //
JoinHashMapForNonSmallKey(LINEAR_CHAINED), //
JoinHashMapForNonSmallKey(LINEAR_CHAINED_SET), //
JoinHashMapForNonSmallKey(LINEAR_CHAINED_ASOF), //
JoinHashMapForIntBigintKey(RANGE_DIRECT_MAPPING), //
JoinHashMapForIntBigintKey(RANGE_DIRECT_MAPPING_SET), //
JoinHashMapForIntBigintKey(DENSE_RANGE_DIRECT_MAPPING) //

View File

@ -15,7 +15,7 @@
#pragma once
#include "column/column.h"
#include "exec/join/join_hash_table_descriptor.h"
#include "simd/gather.h"
#include "simd/simd.h"
#include "util/runtime_profile.h"
@ -389,14 +389,6 @@ void JoinHashMap<LT, CT, MT>::_search_ht(RuntimeState* state, ChunkPtr* probe_ch
_probe_state->probe_index.resize(state->chunk_size() + 8);
_probe_state->build_index.resize(state->chunk_size() + 8);
}
if (is_asof_join(_table_items->join_type)) {
_probe_state->asof_temporal_condition_column =
(*probe_chunk)->get_column_by_slot_id(_table_items->asof_join_condition_desc.probe_slot_id);
// Disable coroutines for ASOF joins unconditionally
_probe_state->active_coroutines = 0;
}
if (!_probe_state->has_remain) {
_probe_state->probe_row_count = (*probe_chunk)->num_rows();
_probe_state->active_coroutines = state->query_options().interleaving_group_size;
@ -517,13 +509,6 @@ void JoinHashMap<LT, CT, MT>::_search_ht_impl(RuntimeState* state, const ImmBuff
case TJoinOp::FULL_OUTER_JOIN:
DO_PROBE(_probe_from_ht_for_full_outer_join);
break;
case TJoinOp::ASOF_INNER_JOIN:
_probe_from_ht_for_asof_inner_join<first_probe, is_collision_free_and_unique>(state, build_data, data);
break;
case TJoinOp::ASOF_LEFT_OUTER_JOIN:
_probe_from_ht_for_asof_left_outer_join_with_other_conjunct<first_probe, is_collision_free_and_unique>(
state, build_data, data);
break;
default:
DO_PROBE(_probe_from_ht);
break;
@ -554,13 +539,6 @@ void JoinHashMap<LT, CT, MT>::_search_ht_impl(RuntimeState* state, const ImmBuff
is_collision_free_and_unique>(
state, build_data, data);
break;
case TJoinOp::ASOF_INNER_JOIN:
_probe_from_ht_for_asof_inner_join<first_probe, is_collision_free_and_unique>(state, build_data, data);
break;
case TJoinOp::ASOF_LEFT_OUTER_JOIN:
_probe_from_ht_for_asof_left_outer_join_with_other_conjunct<first_probe, is_collision_free_and_unique>(
state, build_data, data);
break;
default:
// can't reach here
_probe_from_ht<first_probe, is_collision_free_and_unique>(state, build_data, data);
@ -673,13 +651,12 @@ void JoinHashMap<LT, CT, MT>::_search_ht_impl(RuntimeState* state, const ImmBuff
} \
}
#define PROBE_OVER() \
_probe_state->has_remain = false; \
_probe_state->cur_probe_index = 0; \
_probe_state->cur_build_index = 0; \
_probe_state->count = match_count; \
_probe_state->cur_row_match_count = 0; \
_probe_state->asof_temporal_condition_column = nullptr;
#define PROBE_OVER() \
_probe_state->has_remain = false; \
_probe_state->cur_probe_index = 0; \
_probe_state->cur_build_index = 0; \
_probe_state->count = match_count; \
_probe_state->cur_row_match_count = 0;
#define MATCH_RIGHT_TABLE_ROWS() \
_probe_state->probe_index[match_count] = i; \
@ -842,92 +819,6 @@ HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht(Runt
PROBE_OVER()
}
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
template <bool first_probe, bool is_collision_free_and_unique>
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_asof_inner_join(RuntimeState* state,
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data) {
_probe_state->match_flag = JoinMatchFlag::NORMAL;
size_t match_count = 0;
constexpr bool one_to_many = false;
size_t i = _probe_state->cur_probe_index;
if constexpr (!first_probe) {
_probe_state->probe_index[0] = _probe_state->cur_probe_index;
_probe_state->build_index[0] = _probe_state->cur_build_index;
match_count = 1;
if (_probe_state->next[i] == 0) {
i++;
}
}
[[maybe_unused]] size_t probe_cont = 0;
if constexpr (first_probe) {
memset(_probe_state->probe_match_filter.data(), 0, _probe_state->probe_row_count * sizeof(uint8_t));
}
const size_t probe_row_count = _probe_state->probe_row_count;
const auto* probe_buckets = _probe_state->next.data();
LogicalType asof_temporal_probe_type = _table_items->asof_join_condition_desc.probe_logical_type;
TExprOpcode::type opcode = _table_items->asof_join_condition_desc.condition_op;
auto process_probe_rows = [&]<LogicalType ASOF_LT, TExprOpcode::type OpCode>() {
using AsofTemporalCppType = RunTimeCppType<ASOF_LT>;
const auto* asof_temporal_data_column =
ColumnHelper::get_data_column_by_type<ASOF_LT>(_probe_state->asof_temporal_condition_column.get());
const NullColumn* asof_temporal_col_nulls =
ColumnHelper::get_null_column(_probe_state->asof_temporal_condition_column);
const AsofTemporalCppType* asof_temporal_probe_values = asof_temporal_data_column->immutable_data().data();
if (!_probe_state->asof_temporal_condition_column || _probe_state->asof_temporal_condition_column->empty()) {
PROBE_OVER();
return;
}
constexpr size_t variant_index = get_asof_variant_index(ASOF_LT, OpCode);
auto& asof_index_vector = get_asof_index_vector_static<variant_index>(_table_items);
for (; i < probe_row_count; i++) {
uint32_t build_index = probe_buckets[i];
if (build_index == 0) {
continue;
}
if (asof_temporal_col_nulls && const_cast<NullColumn*>(asof_temporal_col_nulls)->get_data()[i] != 0) {
continue;
}
DCHECK_LT(i, asof_temporal_data_column->size());
AsofTemporalCppType probe_temporal_value = asof_temporal_probe_values[i];
uint32_t matched_build_row_index = asof_index_vector[build_index]->find_asof_match(probe_temporal_value);
if (matched_build_row_index != 0) {
_probe_state->probe_index[match_count] = i;
_probe_state->build_index[match_count] = matched_build_row_index;
match_count++;
if constexpr (first_probe) {
_probe_state->probe_match_filter[i] = 1;
}
probe_cont++;
}
}
if constexpr (first_probe) {
CHECK_MATCH()
}
PROBE_OVER();
};
AsofJoinProbeDispatcher::dispatch(asof_temporal_probe_type, opcode, process_probe_rows);
}
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_outer_join(
RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data) {
@ -1047,101 +938,6 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_outer_join(RuntimeState* s
}
PROBE_OVER()
}
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
template <bool first_probe, bool is_collision_free_and_unique>
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_asof_left_outer_join(RuntimeState* state,
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data) {
_probe_state->match_flag = JoinMatchFlag::NORMAL;
size_t match_count = 0;
constexpr bool one_to_many = false;
size_t i = _probe_state->cur_probe_index;
if constexpr (!first_probe) {
_probe_state->probe_index[0] = _probe_state->cur_probe_index;
_probe_state->build_index[0] = _probe_state->cur_build_index;
match_count = 1;
if (_probe_state->next[i] == 0) {
i++;
_probe_state->cur_row_match_count = 0;
}
}
[[maybe_unused]] size_t probe_cont = 0;
if constexpr (first_probe) {
memset(_probe_state->probe_match_filter.data(), 0, _probe_state->probe_row_count * sizeof(uint8_t));
}
uint32_t cur_row_match_count = _probe_state->cur_row_match_count;
const size_t probe_row_count = _probe_state->probe_row_count;
const auto* probe_buckets = _probe_state->next.data();
LogicalType asof_temporal_probe_type = _table_items->asof_join_condition_desc.probe_logical_type;
TExprOpcode::type opcode = _table_items->asof_join_condition_desc.condition_op;
auto process_probe_rows = [&]<LogicalType ASOF_LT, TExprOpcode::type OpCode>() {
using AsofTemporalCppType = RunTimeCppType<ASOF_LT>;
constexpr size_t variant_index = get_asof_variant_index(ASOF_LT, OpCode);
auto& asof_index_vector = get_asof_index_vector_static<variant_index>(_table_items);
const auto* asof_temporal_data_column =
ColumnHelper::get_data_column_by_type<ASOF_LT>(_probe_state->asof_temporal_condition_column.get());
const NullColumn* asof_temporal_col_nulls =
ColumnHelper::get_null_column(_probe_state->asof_temporal_condition_column);
const AsofTemporalCppType* asof_temporal_probe_values = asof_temporal_data_column->immutable_data().data();
if (!_probe_state->asof_temporal_condition_column || _probe_state->asof_temporal_condition_column->empty()) {
LOG(WARNING) << "ASOF LEFT OUTER: No valid asof column";
PROBE_OVER();
return;
}
for (; i < probe_row_count; i++) {
uint32_t build_index = probe_buckets[i];
if (build_index == 0 ||
(asof_temporal_col_nulls && const_cast<NullColumn*>(asof_temporal_col_nulls)->get_data()[i] != 0)) {
_probe_state->probe_index[match_count] = i;
_probe_state->build_index[match_count] = 0;
match_count++;
RETURN_IF_CHUNK_FULL2()
continue;
}
DCHECK_LT(i, asof_temporal_data_column->size());
AsofTemporalCppType probe_temporal_value = asof_temporal_probe_values[i];
uint32_t matched_build_row_index = asof_index_vector[build_index]->find_asof_match(probe_temporal_value);
if (matched_build_row_index != 0) {
_probe_state->probe_index[match_count] = i;
_probe_state->build_index[match_count] = matched_build_row_index;
match_count++;
cur_row_match_count++;
RETURN_IF_CHUNK_FULL2()
} else {
_probe_state->probe_index[match_count] = i;
_probe_state->build_index[match_count] = 0;
match_count++;
RETURN_IF_CHUNK_FULL2()
}
cur_row_match_count = 0;
}
_probe_state->cur_row_match_count = cur_row_match_count;
if constexpr (first_probe) {
CHECK_MATCH()
}
PROBE_OVER();
};
AsofJoinProbeDispatcher::dispatch(asof_temporal_probe_type, opcode, process_probe_rows);
}
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_semi_join(
RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data) {
@ -1856,101 +1652,6 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_outer_left_anti_full_outer
PROBE_OVER()
}
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
template <bool first_probe, bool is_collision_free_and_unique>
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_asof_left_outer_join_with_other_conjunct(
RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data) {
_probe_state->match_flag = JoinMatchFlag::NORMAL;
size_t match_count = 0;
constexpr bool one_to_many = false;
size_t i = _probe_state->cur_probe_index;
if constexpr (!first_probe) {
_probe_state->probe_index[0] = _probe_state->cur_probe_index;
_probe_state->build_index[0] = _probe_state->cur_build_index;
match_count = 1;
if (_probe_state->next[i] == 0) {
i++;
_probe_state->cur_row_match_count = 0;
}
} else {
_probe_state->cur_row_match_count = 0;
for (size_t j = 0; j < state->chunk_size(); j++) {
_probe_state->probe_match_index[j] = 0;
}
}
const size_t probe_row_count = _probe_state->probe_row_count;
const auto* probe_buckets = _probe_state->next.data();
LogicalType asof_temporal_probe_type = _table_items->asof_join_condition_desc.probe_logical_type;
TExprOpcode::type opcode = _table_items->asof_join_condition_desc.condition_op;
auto process_probe_rows = [&]<LogicalType ASOF_LT, TExprOpcode::type OpCode>() {
using AsofTemporalCppType = RunTimeCppType<ASOF_LT>;
constexpr size_t variant_index = get_asof_variant_index(ASOF_LT, OpCode);
auto& asof_index_vector = get_asof_index_vector_static<variant_index>(_table_items);
const auto* asof_temporal_data_column =
ColumnHelper::get_data_column_by_type<ASOF_LT>(_probe_state->asof_temporal_condition_column.get());
const NullColumn* asof_temporal_col_nulls =
ColumnHelper::get_null_column(_probe_state->asof_temporal_condition_column);
const AsofTemporalCppType* asof_temporal_probe_values = asof_temporal_data_column->immutable_data().data();
if (!_probe_state->asof_temporal_condition_column || _probe_state->asof_temporal_condition_column->empty()) {
LOG(WARNING) << "ASOF LEFT OUTER WITH OTHER CONJUNCT: No valid asof column";
for (; i < probe_row_count; i++) {
_probe_state->probe_index[match_count] = i;
_probe_state->build_index[match_count] = 0;
match_count++;
}
PROBE_OVER();
return;
}
for (; i < probe_row_count; i++) {
uint32_t build_index = probe_buckets[i];
if (build_index == 0 ||
(asof_temporal_col_nulls && const_cast<NullColumn*>(asof_temporal_col_nulls)->get_data()[i] != 0)) {
_probe_state->probe_index[match_count] = i;
_probe_state->build_index[match_count] = 0;
match_count++;
RETURN_IF_CHUNK_FULL()
continue;
}
DCHECK_LT(i, asof_temporal_data_column->size());
AsofTemporalCppType probe_temporal_value = asof_temporal_probe_values[i];
uint32_t matched_build_row_index = asof_index_vector[build_index]->find_asof_match(probe_temporal_value);
if (matched_build_row_index != 0) {
_probe_state->probe_index[match_count] = i;
_probe_state->build_index[match_count] = matched_build_row_index;
_probe_state->probe_match_index[i]++;
_probe_state->cur_row_match_count++;
match_count++;
RETURN_IF_CHUNK_FULL()
} else {
_probe_state->probe_index[match_count] = i;
_probe_state->build_index[match_count] = 0;
match_count++;
RETURN_IF_CHUNK_FULL()
}
_probe_state->cur_row_match_count = 0;
}
if constexpr (first_probe) {
CHECK_MATCH()
}
PROBE_OVER();
};
AsofJoinProbeDispatcher::dispatch(asof_temporal_probe_type, opcode, process_probe_rows);
}
// ------------------------------------------------------------------------------------
// JoinHashTable
// ------------------------------------------------------------------------------------

View File

@ -91,15 +91,6 @@ public:
}
}
template <typename CppType>
static std::pair<uint32_t, uint8_t> calc_bucket_num_and_fp(const CppType& value, uint32_t bucket_size,
uint32_t num_log_buckets) {
static constexpr uint64_t FP_BITS = 7;
using HashFunc = JoinKeyHash<CppType>;
const uint64_t hash = HashFunc()(value, bucket_size << FP_BITS, num_log_buckets + FP_BITS);
return {hash >> FP_BITS, (hash & 0x7F) | 0x80};
}
static Slice get_hash_key(const Columns& key_columns, size_t row_idx, uint8_t* buffer) {
size_t byte_size = 0;
for (const auto& key_column : key_columns) {

View File

@ -150,72 +150,6 @@ private:
template <LogicalType LT>
using LinearChainedJoinHashSet = LinearChainedJoinHashMap<LT, false>;
// The `LinearChainedAsofJoinHashMap` is specifically designed for ASOF JOIN operations on time-series data.
// It uses linear probing with separated fingerprint storage and maintains ASOF temporal indexes for efficient
// time-based matching.
// - `first` stores the build index for each distinct equi-join key.
// - `fps` stores the separated fingerprints for fast collision detection.
// - `asof_index_vector` maintains sorted temporal indexes for ASOF matching logic.
//
// Fingerprint Design
// - Uses 7-bit fingerprints stored separately in the `fps` array, with the highest bit always set to 1
// to ensure non-zero values (distinguishable from empty buckets).
// - Fingerprints range from 0x80 to 0xFF (128 possible values), providing 1/128 ≈ 0.78% collision rate.
// - The fingerprint is computed using an extended hash space: `bucket_size << 7` and `num_log_buckets + 7`.
//
// ASOF Temporal Processing
// - Each equi-join bucket maintains its own ASOF index containing temporal column values and row indexes.
// - ASOF indexes are sorted by temporal values to enable efficient binary search during probe phase.
// - Supports various ASOF operations: LT, LE, GT, GE for different temporal matching requirements.
//
// Memory Layout Comparison with LinearChainedJoinHashMap:
// - LinearChainedJoinHashMap: `first[bucket] = (8-bit FP << 24) | 24-bit build_index` (packed)
// - LinearChainedAsofJoinHashMap: `first[bucket] = 32-bit build_index`, `fps[bucket] = 8-bit FP` (separated)
// - Trade-off: Uses 25% more memory per bucket but supports unlimited bucket sizes and easier ASOF processing.
//
// Insert and Probe for ASOF JOIN
// - During insertion, linear probing locates the appropriate bucket for the equi-join key.
// - For each equi-join bucket, temporal values are added to the corresponding ASOF index.
// - During probing, equi-join keys are matched first, then ASOF binary search finds temporal matches.
// - No bucket size limitation (unlike LinearChainedJoinHashMap's 16M bucket limit).
//
// The following diagram illustrates the structure of `LinearChainedAsofJoinHashMap`:
//
// build keys + temporal first fps asof_index_vector
// ┌───────┐ ┌───┐ ┌─────────────────────┐
// │ index │ │FP │ │ sorted temporal │
// ├───────┤ ├───┤ │ [(t1,idx1),(t2,idx2)│
// ┌────┐ ┌─►│ │ │ │ ┌───►│ (t3,idx3),(t4,idx4)]│
// ┌──────┐ │ │ │ ├───────┤ ├───┤ │ ├─────────────────────┤
// │ key ├──►│hash├─┘ │ │ │ │ │ │ │
// └──────┘ │ │ ┌─►├───────┤ ├───┤ │ │ │
// └────┘ │ │ index ├──►│FP ├─┘ ├─────────────────────┤
// │ ├───────┤ ├───┤ │ sorted temporal │
// │ │ │ │ │ │ [(t5,idx5),(t6,idx6)│
// │ ├───────┤ ├───┤ ┌───►│ (t7,idx7)] │
// └─►│ index ├──►│FP ├─┘ └─────────────────────┘
// └───────┘ └───┘ ASOF binary search
// No size for temporal matching
// limitation
template <LogicalType LT>
class LinearChainedAsofJoinHashMap {
public:
using CppType = typename RunTimeTypeTraits<LT>::CppType;
using ColumnType = typename RunTimeTypeTraits<LT>::ColumnType;
static constexpr bool AreKeysInChainIdentical = true;
static void build_prepare(RuntimeState* state, JoinHashTableItems* table_items);
static void construct_hash_table(JoinHashTableItems* table_items, const ImmBuffer<CppType>& keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls);
static void lookup_init(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,
const ImmBuffer<CppType>& build_keys, const ImmBuffer<CppType>& probe_keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls);
static bool equal(const CppType& x, const CppType& y) { return true; }
};
// The bucket-chained linked list formed by first` and `next` is the same as that of `BucketChainedJoinHashMap`.
//
// `DirectMappingJoinHashMap` maps to a position in `first` using `key-MIN_VALUE`.

View File

@ -16,7 +16,6 @@
#include "exec/join/join_hash_map_helper.h"
#include "exec/join/join_hash_map_method.h"
#include "exec/join/join_hash_table_descriptor.h"
#include "simd/gather.h"
namespace starrocks {
@ -130,110 +129,80 @@ void LinearChainedJoinHashMap<LT, NeedBuildChained>::build_prepare(RuntimeState*
table_items->log_bucket_size = __builtin_ctz(table_items->bucket_size);
table_items->first.resize(table_items->bucket_size, 0);
table_items->next.resize(table_items->row_count + 1, 0);
if (is_asof_join(table_items->join_type)) {
table_items->resize_asof_index_vector(table_items->row_count + 1);
}
}
template <LogicalType LT, bool NeedBuildChained>
void LinearChainedJoinHashMap<LT, NeedBuildChained>::construct_hash_table(
JoinHashTableItems* table_items, const ImmBuffer<CppType>& keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls) {
const auto num_rows = 1 + table_items->row_count;
const uint32_t bucket_size_mask = table_items->bucket_size - 1;
auto* __restrict next = table_items->next.data();
auto* __restrict first = table_items->first.data();
const uint8_t* __restrict equi_join_key_nulls = is_nulls.has_value() ? is_nulls->data() : nullptr;
auto process = [&]<bool IsNullable>() {
const auto num_rows = 1 + table_items->row_count;
const uint32_t bucket_size_mask = table_items->bucket_size - 1;
auto linear_probe = [&]<bool BuildChained, bool ReturnAnchor>(const ImmBuffer<CppType>& keys_ref, uint32_t i,
auto&& is_null_pred) -> uint32_t {
if (i + 16 < num_rows && !is_null_pred(i + 16)) {
__builtin_prefetch(first + _get_bucket_num_from_hash(next[i + 16]));
}
const uint32_t hash = next[i];
const uint32_t fp = _get_fp_from_hash(hash);
uint32_t bucket_num = _get_bucket_num_from_hash(hash);
uint32_t probe_times = 1;
while (true) {
if (first[bucket_num] == 0) {
if constexpr (BuildChained) {
next[i] = 0;
}
first[bucket_num] = _combine_data_fp(i, fp);
break;
}
if (fp == _extract_fp(first[bucket_num]) && keys_ref[i] == keys_ref[_extract_data(first[bucket_num])]) {
if constexpr (BuildChained) {
next[i] = _extract_data(first[bucket_num]);
first[bucket_num] = _combine_data_fp(i, fp);
}
break;
}
bucket_num = (bucket_num + probe_times) & bucket_size_mask;
probe_times++;
}
if constexpr (ReturnAnchor) {
return _extract_data(first[bucket_num]);
} else {
return 0u;
}
};
auto compute_hash_values = [&]<bool HasEquiJoinKeyNulls, bool HasAsofTemporalNulls>(
const uint8_t* asof_temporal_nulls) {
for (uint32_t i = 1; i < num_rows; i++) {
if constexpr (HasAsofTemporalNulls) {
if (asof_temporal_nulls[i] != 0) continue;
}
if constexpr (std::is_same_v<CppType, Slice> && HasEquiJoinKeyNulls) {
if (equi_join_key_nulls[i] != 0) continue;
}
auto* __restrict next = table_items->next.data();
auto* __restrict first = table_items->first.data();
const uint8_t* __restrict is_nulls_data = IsNullable ? is_nulls->data() : nullptr;
auto need_calc_bucket_num = [&](const uint32_t index) {
// Only check `is_nulls_data[i]` for the nullable slice type. The hash calculation overhead for
// fixed-size types is small, and thus we do not check it to allow vectorization of the hash calculation.
next[i] = JoinHashMapHelper::calc_bucket_num<CppType>(keys[i], table_items->bucket_size << FP_BITS,
table_items->log_bucket_size + FP_BITS);
}
};
auto dispatch_hash_computation = [&](const uint8_t* asof_temporal_nulls) {
if (equi_join_key_nulls == nullptr) {
if (asof_temporal_nulls == nullptr) {
compute_hash_values.template operator()<false, false>(nullptr);
if constexpr (!IsNullable || !std::is_same_v<CppType, Slice>) {
return true;
} else {
compute_hash_values.template operator()<false, true>(asof_temporal_nulls);
return is_nulls_data[index] == 0;
}
} else {
if (asof_temporal_nulls == nullptr) {
compute_hash_values.template operator()<true, false>(nullptr);
} else {
compute_hash_values.template operator()<true, true>(asof_temporal_nulls);
}
}
};
auto build_hash_table_without_temporal_index = [&]<bool HasEquiJoinKeyNulls>() {
dispatch_hash_computation(nullptr);
auto is_null_row = [&](const uint32_t index) {
if constexpr (!HasEquiJoinKeyNulls) {
};
auto is_null = [&](const uint32_t index) {
if constexpr (!IsNullable) {
return false;
} else {
return equi_join_key_nulls[index] != 0;
return is_nulls_data[index] != 0;
}
};
for (uint32_t i = 1; i < num_rows; i++) {
if (is_null_row(i)) {
// Use `next` stores `bucket_num` temporarily.
if (need_calc_bucket_num(i)) {
next[i] = JoinHashMapHelper::calc_bucket_num<CppType>(keys[i], table_items->bucket_size << FP_BITS,
table_items->log_bucket_size + FP_BITS);
}
}
for (uint32_t i = 1; i < num_rows; i++) {
if (i + 16 < num_rows && !is_null(i + 16)) {
__builtin_prefetch(first + _get_bucket_num_from_hash(next[i + 16]));
}
if (is_null(i)) {
next[i] = 0;
continue;
}
(void)linear_probe.template operator()<NeedBuildChained, false>(keys, i, is_null_row);
const uint32_t hash = next[i];
const uint32_t fp = _get_fp_from_hash(hash);
uint32_t bucket_num = _get_bucket_num_from_hash(hash);
uint32_t probe_times = 1;
while (true) {
if (first[bucket_num] == 0) {
if constexpr (NeedBuildChained) {
next[i] = 0;
}
first[bucket_num] = _combine_data_fp(i, fp);
break;
}
if (fp == _extract_fp(first[bucket_num]) && keys[i] == keys[_extract_data(first[bucket_num])]) {
if constexpr (NeedBuildChained) {
next[i] = _extract_data(first[bucket_num]);
first[bucket_num] = _combine_data_fp(i, fp);
}
break;
}
bucket_num = (bucket_num + probe_times) & bucket_size_mask;
probe_times++;
}
}
if constexpr (!NeedBuildChained) {
@ -241,46 +210,11 @@ void LinearChainedJoinHashMap<LT, NeedBuildChained>::construct_hash_table(
}
};
if (!is_asof_join(table_items->join_type)) {
if (!is_nulls.has_value()) {
build_hash_table_without_temporal_index.template operator()<false>();
} else {
build_hash_table_without_temporal_index.template operator()<true>();
}
return;
if (!is_nulls.has_value()) {
process.template operator()<false>();
} else {
process.template operator()<true>();
}
auto build_hash_table_with_temporal_index = [&]() {
const ColumnPtr& asof_temporal_column =
table_items->build_chunk->get_column_by_slot_id(table_items->asof_join_condition_desc.build_slot_id);
const NullColumn* asof_temporal_col_nulls = ColumnHelper::get_null_column(asof_temporal_column);
const uint8_t* __restrict asof_temporal_nulls = nullptr;
if (asof_temporal_col_nulls != nullptr) {
auto* mutable_null_column = const_cast<NullColumn*>(asof_temporal_col_nulls);
asof_temporal_nulls = mutable_null_column->get_data().data();
}
dispatch_hash_computation(asof_temporal_nulls);
if (equi_join_key_nulls == nullptr) {
auto equi_join_bucket_locator = [&](JoinHashTableItems*, const ImmBuffer<CppType>& keys_ref, uint32_t i) {
return linear_probe.template operator()<false, true>(keys_ref, i, [](uint32_t) { return false; });
};
AsofJoinDispatcher::dispatch_and_process(
table_items, keys, is_nulls.has_value() ? &is_nulls.value() : nullptr, equi_join_bucket_locator);
} else {
auto is_null_predicate = [&](const uint32_t index) { return equi_join_key_nulls[index] != 0; };
auto equi_join_bucket_locator = [&, is_null_predicate](JoinHashTableItems*,
const ImmBuffer<CppType>& keys_ref, uint32_t i) {
return linear_probe.template operator()<false, true>(keys_ref, i, is_null_predicate);
};
AsofJoinDispatcher::dispatch_and_process(
table_items, keys, is_nulls.has_value() ? &is_nulls.value() : nullptr, equi_join_bucket_locator);
}
table_items->finalize_asof_index_vector();
};
build_hash_table_with_temporal_index();
}
template <LogicalType LT, bool NeedBuildChained>
@ -367,173 +301,6 @@ void LinearChainedJoinHashMap<LT, NeedBuildChained>::lookup_init(const JoinHashT
}
}
// ------------------------------------------------------------------------------------
// LinearChainedAsofJoinHashMap
// ------------------------------------------------------------------------------------
template <LogicalType LT>
void LinearChainedAsofJoinHashMap<LT>::build_prepare(RuntimeState* state, JoinHashTableItems* table_items) {
table_items->bucket_size = JoinHashMapHelper::calc_bucket_size(table_items->row_count + 1);
table_items->log_bucket_size = __builtin_ctz(table_items->bucket_size);
table_items->first.resize(table_items->bucket_size, 0);
table_items->fps.resize(table_items->bucket_size, 0);
table_items->next.resize(table_items->row_count + 1, 0);
table_items->resize_asof_index_vector(table_items->row_count + 1);
}
template <LogicalType LT>
void LinearChainedAsofJoinHashMap<LT>::construct_hash_table(JoinHashTableItems* table_items,
const ImmBuffer<CppType>& keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls) {
const uint32_t num_rows = table_items->row_count + 1;
auto* __restrict temp_bucket_numbers = table_items->next.data();
std::vector<uint8_t> temp_fingerprints(num_rows);
const uint8_t* __restrict equi_join_key_nulls = is_nulls.has_value() ? is_nulls->data() : nullptr;
const ColumnPtr& asof_temporal_column =
table_items->build_chunk->get_column_by_slot_id(table_items->asof_join_condition_desc.build_slot_id);
const NullColumn* asof_temporal_col_nulls = ColumnHelper::get_null_column(asof_temporal_column);
const uint8_t* __restrict asof_temporal_nulls =
asof_temporal_col_nulls ? const_cast<NullColumn*>(asof_temporal_col_nulls)->get_data().data() : nullptr;
static constexpr uint32_t BATCH_SIZE = 4096;
auto compute_batch_hash_values = [&]<bool HasEquiJoinKeyNulls, bool HasAsofTemporalNulls>() {
for (uint32_t i = 1; i < num_rows; i += BATCH_SIZE) {
const uint32_t batch_count = std::min<uint32_t>(BATCH_SIZE, num_rows - i);
auto* bucket_buffer = temp_bucket_numbers + i;
auto* fingerprint_buffer = temp_fingerprints.data() + i;
for (uint32_t j = 0; j < batch_count; j++) {
const uint32_t row_index = i + j;
if constexpr (HasEquiJoinKeyNulls && std::is_same_v<CppType, Slice>) {
if (equi_join_key_nulls[row_index] != 0) continue;
}
if constexpr (HasAsofTemporalNulls) {
if (asof_temporal_nulls[row_index] != 0) continue;
}
std::tie(bucket_buffer[j], fingerprint_buffer[j]) = JoinHashMapHelper::calc_bucket_num_and_fp<CppType>(
keys[row_index], table_items->bucket_size, table_items->log_bucket_size);
}
}
};
if (equi_join_key_nulls == nullptr) {
if (asof_temporal_nulls == nullptr) {
compute_batch_hash_values.template operator()<false, false>();
} else {
compute_batch_hash_values.template operator()<false, true>();
}
} else {
if (asof_temporal_nulls == nullptr) {
compute_batch_hash_values.template operator()<true, false>();
} else {
compute_batch_hash_values.template operator()<true, true>();
}
}
const uint32_t bucket_mask = table_items->bucket_size - 1;
auto* __restrict bucket_first_indices = table_items->first.data();
auto* __restrict bucket_fingerprints = table_items->fps.data();
auto equi_join_bucket_locator = [&](JoinHashTableItems*, const ImmBuffer<CppType>&,
uint32_t row_index) -> uint32_t {
uint32_t bucket_number = temp_bucket_numbers[row_index];
const uint8_t fingerprint = temp_fingerprints[row_index];
uint32_t probe_attempts = 1;
while (true) {
if (bucket_fingerprints[bucket_number] == 0) {
bucket_first_indices[bucket_number] = row_index;
bucket_fingerprints[bucket_number] = fingerprint;
break;
}
if (fingerprint == bucket_fingerprints[bucket_number] &&
keys[row_index] == keys[bucket_first_indices[bucket_number]]) {
break;
}
bucket_number = (bucket_number + probe_attempts) & bucket_mask;
probe_attempts++;
}
return bucket_first_indices[bucket_number];
};
AsofJoinDispatcher::dispatch_and_process(table_items, keys, is_nulls.has_value() ? &is_nulls.value() : nullptr,
equi_join_bucket_locator);
table_items->finalize_asof_index_vector();
}
template <LogicalType LT>
void LinearChainedAsofJoinHashMap<LT>::lookup_init(const JoinHashTableItems& table_items,
HashTableProbeState* probe_state,
const ImmBuffer<CppType>& build_keys,
const ImmBuffer<CppType>& probe_keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls) {
auto process = [&]<bool IsNullable>() {
const uint32_t bucket_size_mask = table_items.bucket_size - 1;
const uint32_t row_count = probe_state->probe_row_count;
const auto* firsts = table_items.first.data();
const auto* fps = table_items.fps.data();
auto* bucket_nums = probe_state->buckets.data();
auto* nexts = probe_state->next.data();
const uint8_t* is_nulls_data = IsNullable && is_nulls.has_value() ? is_nulls->data() : nullptr;
auto need_calc_bucket_num = [&](const uint32_t index) {
if constexpr (!IsNullable || !std::is_same_v<CppType, Slice>) {
// Only check `is_nulls_data[i]` for the nullable slice type. The hash calculation overhead for
// fixed-size types is small, and thus we do not check it to allow vectorization of the hash calculation.
return true;
} else {
return is_nulls_data[index] == 0;
}
};
auto is_null = [&](const uint32_t index) {
if constexpr (!IsNullable) {
return false;
} else {
return is_nulls_data[index] != 0;
}
};
for (uint32_t i = 0; i < row_count; i++) {
if (need_calc_bucket_num(i)) {
std::tie(bucket_nums[i], nexts[i]) = JoinHashMapHelper::calc_bucket_num_and_fp<CppType>(
probe_keys[i], table_items.bucket_size, table_items.log_bucket_size);
}
}
for (uint32_t i = 0; i < row_count; i++) {
if (is_null(i)) {
nexts[i] = 0;
continue;
}
const uint8_t fp = nexts[i];
uint32_t bucket_num = bucket_nums[i];
uint32_t probe_times = 1;
while (true) {
if (fps[bucket_num] == 0) {
nexts[i] = 0;
break;
}
if (fp == fps[bucket_num] && probe_keys[i] == build_keys[firsts[bucket_num]]) {
nexts[i] = firsts[bucket_num];
break;
}
bucket_num = (bucket_num + probe_times) & bucket_size_mask;
probe_times++;
}
}
};
if (!is_nulls.has_value()) {
process.template operator()<false>();
} else {
process.template operator()<true>();
}
}
// ------------------------------------------------------------------------------------
// DirectMappingJoinHashMap
// ------------------------------------------------------------------------------------
@ -546,9 +313,6 @@ void DirectMappingJoinHashMap<LT>::build_prepare(RuntimeState* state, JoinHashTa
table_items->log_bucket_size = __builtin_ctz(table_items->bucket_size);
table_items->first.resize(table_items->bucket_size, 0);
table_items->next.resize(table_items->row_count + 1, 0);
if (is_asof_join(table_items->join_type)) {
table_items->resize_asof_index_vector(table_items->row_count + 1);
}
}
template <LogicalType LT>
@ -557,36 +321,20 @@ void DirectMappingJoinHashMap<LT>::construct_hash_table(JoinHashTableItems* tabl
static constexpr CppType MIN_VALUE = RunTimeTypeLimits<LT>::min_value();
const auto num_rows = 1 + table_items->row_count;
if (is_asof_join(table_items->join_type)) {
auto equi_join_bucket_locator = [](JoinHashTableItems* table_items, const ImmBuffer<CppType>& keys,
uint32_t row_index) -> uint32_t {
const size_t bucket_num = keys[row_index] - MIN_VALUE;
if (table_items->first[bucket_num] == 0) {
table_items->first[bucket_num] = row_index;
}
return table_items->first[bucket_num];
};
AsofJoinDispatcher::dispatch_and_process(table_items, keys, is_nulls.has_value() ? &is_nulls.value() : nullptr,
equi_join_bucket_locator);
table_items->finalize_asof_index_vector();
if (!is_nulls.has_value()) {
for (uint32_t i = 1; i < num_rows; i++) {
const size_t bucket_num = keys[i] - MIN_VALUE;
table_items->next[i] = table_items->first[bucket_num];
table_items->first[bucket_num] = i;
}
} else {
if (!is_nulls.has_value()) {
for (uint32_t i = 1; i < num_rows; i++) {
const auto* is_nulls_data = is_nulls->data();
for (uint32_t i = 1; i < num_rows; i++) {
if (is_nulls_data[i] == 0) {
const size_t bucket_num = keys[i] - MIN_VALUE;
table_items->next[i] = table_items->first[bucket_num];
table_items->first[bucket_num] = i;
}
} else {
const auto* is_nulls_data = is_nulls->data();
for (uint32_t i = 1; i < num_rows; i++) {
if (is_nulls_data[i] == 0) {
const size_t bucket_num = keys[i] - MIN_VALUE;
table_items->next[i] = table_items->first[bucket_num];
table_items->first[bucket_num] = i;
}
}
}
}
}
@ -627,9 +375,6 @@ void RangeDirectMappingJoinHashMap<LT>::build_prepare(RuntimeState* state, JoinH
table_items->bucket_size = value_interval;
table_items->first.resize(table_items->bucket_size, 0);
table_items->next.resize(table_items->row_count + 1, 0);
if (is_asof_join(table_items->join_type)) {
table_items->resize_asof_index_vector(table_items->row_count + 1);
}
}
template <LogicalType LT>
@ -638,36 +383,20 @@ void RangeDirectMappingJoinHashMap<LT>::construct_hash_table(JoinHashTableItems*
const std::optional<ImmBuffer<uint8_t>> is_nulls) {
const uint64_t min_value = table_items->min_value;
const auto num_rows = 1 + table_items->row_count;
if (is_asof_join(table_items->join_type)) {
auto equi_join_bucket_locator = [min_value](JoinHashTableItems* ti, const ImmBuffer<CppType>& k,
uint32_t row_index) -> uint32_t {
const uint64_t index = static_cast<uint64_t>(k[row_index] - min_value);
if (ti->first[index] == 0) {
ti->first[index] = row_index;
}
return ti->first[index];
};
AsofJoinDispatcher::dispatch_and_process(table_items, keys, is_nulls.has_value() ? &is_nulls.value() : nullptr,
equi_join_bucket_locator);
table_items->finalize_asof_index_vector();
if (!is_nulls.has_value()) {
for (uint32_t i = 1; i < num_rows; i++) {
const size_t bucket_num = keys[i] - min_value;
table_items->next[i] = table_items->first[bucket_num];
table_items->first[bucket_num] = i;
}
} else {
if (!is_nulls.has_value()) {
for (uint32_t i = 1; i < num_rows; i++) {
const auto* is_nulls_data = is_nulls->data();
for (uint32_t i = 1; i < num_rows; i++) {
if (is_nulls_data[i] == 0) {
const size_t bucket_num = keys[i] - min_value;
table_items->next[i] = table_items->first[bucket_num];
table_items->first[bucket_num] = i;
}
} else {
const auto* is_nulls_data = is_nulls->data();
for (uint32_t i = 1; i < num_rows; i++) {
if (is_nulls_data[i] == 0) {
const size_t bucket_num = keys[i] - min_value;
table_items->next[i] = table_items->first[bucket_num];
table_items->first[bucket_num] = i;
}
}
}
}
}
@ -788,9 +517,6 @@ void DenseRangeDirectMappingJoinHashMap<LT>::build_prepare(RuntimeState* state,
table_items->dense_groups.resize((value_interval + 31) / 32);
table_items->first.resize(table_items->row_count + 1, 0);
table_items->next.resize(table_items->row_count + 1, 0);
if (is_asof_join(table_items->join_type)) {
table_items->resize_asof_index_vector(table_items->row_count + 1);
}
}
template <LogicalType LT>
@ -798,107 +524,56 @@ void DenseRangeDirectMappingJoinHashMap<LT>::construct_hash_table(JoinHashTableI
const ImmBuffer<CppType>& keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls) {
const uint64_t min_value = table_items->min_value;
const uint32_t num_rows = table_items->row_count + 1;
const auto num_rows = 1 + table_items->row_count;
const bool is_asof_join_type = is_asof_join(table_items->join_type);
const uint8_t* equi_join_key_nulls_data = is_nulls ? is_nulls->data() : nullptr;
const bool has_equi_join_key_nulls = (equi_join_key_nulls_data != nullptr);
const uint8_t* asof_temporal_null_data = nullptr;
bool has_asof_temporal_nulls = false;
if (is_asof_join_type) {
const ColumnPtr& asof_temporal_col =
table_items->build_chunk->get_column_by_slot_id(table_items->asof_join_condition_desc.build_slot_id);
const NullColumn* asof_temporal_col_nulls = ColumnHelper::get_null_column(asof_temporal_col);
if (asof_temporal_col_nulls != nullptr) {
has_asof_temporal_nulls = true;
auto* mutable_null_column = const_cast<NullColumn*>(asof_temporal_col_nulls);
asof_temporal_null_data = mutable_null_column->get_data().data();
}
}
auto get_dense_slot = [min_value](JoinHashTableItems* table_items, const ImmBuffer<CppType>& keys,
uint32_t row) ALWAYS_INLINE {
const uint32_t bucket_num = keys[row] - min_value;
const uint32_t group_index = bucket_num / 32;
const uint32_t index_in_group = bucket_num % 32;
// Keep the low `index_in_group`-th bits of the bitset to count the number of ones from 0 to index_in_group-1.
const uint32_t cur_bitset = table_items->dense_groups[group_index].bitset & ((1u << index_in_group) - 1);
const uint32_t offset_in_group = BitUtil::count_one_bits(cur_bitset);
return table_items->dense_groups[group_index].start_index + offset_in_group;
};
// Initialize `bitset` of each group.
auto init_group_bitsets = [&]<bool HasNullableKey, bool HasAsofTemporalNulls>() {
for (uint32_t row = 1; row < num_rows; ++row) {
if constexpr (HasNullableKey) {
if (equi_join_key_nulls_data[row] != 0) continue;
}
if constexpr (HasAsofTemporalNulls) {
if (asof_temporal_null_data[row] != 0) continue;
}
const uint32_t bucket_num = keys[row] - min_value;
const uint32_t group_index = bucket_num / 32;
const uint32_t index_in_group = bucket_num % 32;
table_items->dense_groups[group_index].bitset |= (1u << index_in_group);
const uint8_t* is_nulls_data = !is_nulls.has_value() ? nullptr : is_nulls->data();
auto is_null = [&]<bool Nullable>(const uint32_t index) {
if constexpr (Nullable) {
return is_nulls_data[index] != 0;
} else {
return false;
}
};
auto build_hash_chains = [&]<bool HasNullableKey>() {
auto process = [&]<bool Nullable>() {
// Initialize `bitset` of each group.
for (uint32_t i = 1; i < num_rows; i++) {
if (!is_null.template operator()<Nullable>(i)) {
const uint32_t bucket_num = keys[i] - min_value;
const uint32_t group_index = bucket_num / 32;
const uint32_t index_in_group = bucket_num % 32;
table_items->dense_groups[group_index].bitset |= 1 << index_in_group;
}
}
// Calculate `start_index` of each group.
for (uint32_t start_index = 0; auto& group : table_items->dense_groups) {
group.start_index = start_index;
start_index += BitUtil::count_one_bits(group.bitset);
}
// Initialize `first` and `next` arrays by `bitset` and `start_index` of each group.
for (uint32_t row = 1; row < num_rows; ++row) {
if constexpr (HasNullableKey) {
if (equi_join_key_nulls_data[row] != 0) continue;
}
const uint32_t index = get_dense_slot(table_items, keys, row);
for (size_t i = 1; i < num_rows; i++) {
if (!is_null.template operator()<Nullable>(i)) {
const uint32_t bucket_num = keys[i] - min_value;
const uint32_t group_index = bucket_num / 32;
const uint32_t index_in_group = bucket_num % 32;
table_items->next[row] = table_items->first[index];
table_items->first[index] = row;
// Keep the low `index_in_group`-th bits of the bitset to count the number of ones from 0 to index_in_group-1.
const uint32_t cur_bitset = table_items->dense_groups[group_index].bitset & ((1 << index_in_group) - 1);
const uint32_t offset_in_group = BitUtil::count_one_bits(cur_bitset);
const uint32_t index = table_items->dense_groups[group_index].start_index + offset_in_group;
table_items->next[i] = table_items->first[index];
table_items->first[index] = i;
}
}
};
auto dispatch_bitset_init = [&]<bool HasNullableKey>() {
if (has_asof_temporal_nulls) {
init_group_bitsets.template operator()<HasNullableKey, true>();
} else {
init_group_bitsets.template operator()<HasNullableKey, false>();
}
};
if (has_equi_join_key_nulls) {
dispatch_bitset_init.template operator()<true>();
if (!is_nulls.has_value()) {
process.template operator()<false>();
} else {
dispatch_bitset_init.template operator()<false>();
}
// Calculate `start_index` of each group.
for (uint32_t start_index = 0; auto& group : table_items->dense_groups) {
group.start_index = start_index;
start_index += BitUtil::count_one_bits(group.bitset);
}
if (is_asof_join_type) {
auto equi_join_bucket_locator = [get_dense_slot](JoinHashTableItems* ti, const ImmBuffer<CppType>& k,
uint32_t row) -> uint32_t {
const uint32_t index = get_dense_slot(ti, k, row);
if (ti->first[index] == 0) {
ti->first[index] = row;
}
return ti->first[index];
};
AsofJoinDispatcher::dispatch_and_process(table_items, keys, is_nulls.has_value() ? &is_nulls.value() : nullptr,
equi_join_bucket_locator);
table_items->finalize_asof_index_vector();
} else {
if (!has_equi_join_key_nulls) {
build_hash_chains.template operator()<false>();
} else {
build_hash_chains.template operator()<true>();
}
process.template operator()<true>();
}
}

View File

@ -14,9 +14,6 @@
#include "join_hash_table_descriptor.h"
#include "exec/sorting/sort_helper.h"
#include "util/orlp/pdqsort.h"
namespace starrocks {
// if the same hash values are clustered, after the first probe, all related hash buckets are cached, without too many
@ -65,82 +62,4 @@ void HashTableProbeState::consider_probe_time_locality() {
++probe_chunks;
}
template <typename CppType, TExprOpcode::type OpCode>
void AsofIndex<CppType, OpCode>::sort() {
auto comparator = [](const Entry& lhs, const Entry& rhs) {
if constexpr (is_descending) {
return SorterComparator<CppType>::compare(lhs.asof_value, rhs.asof_value) > 0;
} else {
return SorterComparator<CppType>::compare(lhs.asof_value, rhs.asof_value) < 0;
}
};
::pdqsort(_entries.begin(), _entries.end(), comparator);
}
template <typename CppType, TExprOpcode::type OpCode>
uint32_t AsofIndex<CppType, OpCode>::find_asof_match(CppType probe_value) const {
if (_entries.empty()) {
return 0;
}
size_t size = _entries.size();
size_t low = 0;
#pragma GCC unroll 3
while (size >= 8) {
_bound_search_iteration(probe_value, low, size);
}
while (size > 0) {
_bound_search_iteration(probe_value, low, size);
}
uint32_t result = (low < _entries.size()) ? _entries[low].row_index : 0;
return result;
}
template <typename CppType, TExprOpcode::type OpCode>
void AsofIndex<CppType, OpCode>::_bound_search_iteration(CppType probe_value, size_t& low, size_t& size) const {
size_t half = size / 2;
size_t other_half = size - half;
size_t probe_pos = low + half;
size_t other_low = low + other_half;
const CppType& entry_value = _entries[probe_pos].asof_value;
size = half;
bool condition_result;
if constexpr (is_descending) {
if constexpr (is_strict) {
condition_result = (SorterComparator<CppType>::compare(probe_value, entry_value) <= 0);
low = condition_result ? other_low : low;
} else {
condition_result = (SorterComparator<CppType>::compare(probe_value, entry_value) < 0);
low = condition_result ? other_low : low;
}
} else {
if constexpr (is_strict) {
condition_result = (SorterComparator<CppType>::compare(probe_value, entry_value) >= 0);
low = condition_result ? other_low : low;
} else {
condition_result = (SorterComparator<CppType>::compare(probe_value, entry_value) > 0);
low = condition_result ? other_low : low;
}
}
}
#define INSTANTIATE_ASOF_INDEX(CppType) \
template class AsofIndex<CppType, TExprOpcode::LT>; \
template class AsofIndex<CppType, TExprOpcode::LE>; \
template class AsofIndex<CppType, TExprOpcode::GT>; \
template class AsofIndex<CppType, TExprOpcode::GE>;
INSTANTIATE_ASOF_INDEX(int64_t)
INSTANTIATE_ASOF_INDEX(DateValue)
INSTANTIATE_ASOF_INDEX(TimestampValue)
#undef INSTANTIATE_ASOF_INDEX
} // namespace starrocks

View File

@ -20,16 +20,14 @@
#include <coroutine>
#include <cstdint>
#include <optional>
#include <set>
#include <variant>
#include "column/chunk.h"
#include "column/column_hash.h"
#include "column/column_helper.h"
#include "column/vectorized_fwd.h"
#include "common/statusor.h"
#include "exec/sorting/sort_helper.h"
#include "simd/simd.h"
#include "util/phmap/phmap.h"
#include "util/runtime_profile.h"
namespace starrocks {
@ -44,67 +42,12 @@ struct JoinKeyDesc {
ColumnRef* col_ref = nullptr;
};
struct AsofJoinConditionDesc {
SlotId probe_slot_id;
LogicalType probe_logical_type;
SlotId build_slot_id;
LogicalType build_logical_type;
TExprOpcode::type condition_op = TExprOpcode::INVALID_OPCODE;
};
struct HashTableSlotDescriptor {
SlotDescriptor* slot;
bool need_output;
bool need_lazy_materialize = false;
};
template <typename CppType, TExprOpcode::type OpCode>
class AsofIndex {
public:
struct Entry {
CppType asof_value;
uint32_t row_index;
Entry() = default;
Entry(CppType value, uint32_t index) : asof_value(value), row_index(index) {}
};
private:
using Entries = std::vector<Entry>;
static constexpr bool is_descending = (OpCode == TExprOpcode::GE || OpCode == TExprOpcode::GT);
static constexpr bool is_strict = (OpCode == TExprOpcode::LT || OpCode == TExprOpcode::GT);
Entries _entries;
public:
void add_row(CppType asof_value, uint32_t row_index) { _entries.emplace_back(asof_value, row_index); }
void sort();
uint32_t find_asof_match(CppType probe_value) const;
size_t size() const { return _entries.size(); }
bool empty() const { return _entries.empty(); }
void clear() { _entries.clear(); }
private:
void _bound_search_iteration(CppType probe_value, size_t& low, size_t& size) const;
};
#define ASOF_INDEX_BUFFER_TYPES(T) \
Buffer<std::unique_ptr<AsofIndex<T, TExprOpcode::LT>>>, Buffer<std::unique_ptr<AsofIndex<T, TExprOpcode::LE>>>, \
Buffer<std::unique_ptr<AsofIndex<T, TExprOpcode::GT>>>, \
Buffer<std::unique_ptr<AsofIndex<T, TExprOpcode::GE>>>
using AsofIndexBufferVariant =
std::variant<ASOF_INDEX_BUFFER_TYPES(int64_t), // 0-3: Buffer<AsofIndex<int64_t, OP>*>
ASOF_INDEX_BUFFER_TYPES(DateValue), // 4-7: Buffer<AsofIndex<DateValue, OP>*>
ASOF_INDEX_BUFFER_TYPES(TimestampValue) // 8-11: Buffer<AsofIndex<TimestampValue, OP>*>
>;
#undef ASOF_INDEX_BUFFER_TYPES
struct JoinHashTableItems {
//TODO: memory continues problem?
ChunkPtr build_chunk = nullptr;
@ -120,7 +63,6 @@ struct JoinHashTableItems {
// about the bucket-chained hash table of this kind.
Buffer<uint32_t> first;
Buffer<uint32_t> next;
Buffer<uint8_t> fps;
Buffer<uint8_t> key_bitset;
struct DenseGroup {
@ -156,27 +98,9 @@ struct JoinHashTableItems {
bool enable_late_materialization = false;
bool is_collision_free_and_unique = false;
AsofJoinConditionDesc asof_join_condition_desc;
AsofIndexBufferVariant asof_index_vector;
float get_keys_per_bucket() const { return keys_per_bucket; }
bool ht_cache_miss_serious() const { return cache_miss_serious; }
void resize_asof_index_vector(size_t size) {
std::visit([size](auto& buffer) { buffer.resize(size); }, asof_index_vector);
}
void finalize_asof_index_vector() {
std::visit(
[](auto& buffer) {
for (auto& ptr : buffer) {
if (ptr) ptr->sort();
}
},
asof_index_vector);
}
void calculate_ht_info(size_t key_bytes) {
if (used_buckets != 0) {
// to avoid redo
@ -251,7 +175,6 @@ struct HashTableProbeState {
RuntimeProfile::Counter* output_probe_column_timer = nullptr;
RuntimeProfile::Counter* output_build_column_timer = nullptr;
RuntimeProfile::Counter* probe_counter = nullptr;
ColumnPtr asof_temporal_condition_column = nullptr;
HashTableProbeState()
: build_index_column(UInt32Column::create()),
@ -347,215 +270,9 @@ struct HashTableParam {
std::set<SlotId> predicate_slots;
std::vector<JoinKeyDesc> join_keys;
AsofJoinConditionDesc asof_join_condition_desc;
RuntimeProfile::Counter* search_ht_timer = nullptr;
RuntimeProfile::Counter* output_build_column_timer = nullptr;
RuntimeProfile::Counter* output_probe_column_timer = nullptr;
RuntimeProfile::Counter* probe_counter = nullptr;
};
inline bool is_asof_join(TJoinOp::type join_type) {
return join_type == TJoinOp::ASOF_INNER_JOIN || join_type == TJoinOp::ASOF_LEFT_OUTER_JOIN;
}
constexpr size_t get_asof_variant_index(LogicalType logical_type, TExprOpcode::type opcode) {
size_t base = (logical_type == TYPE_BIGINT) ? 0 : (logical_type == TYPE_DATE) ? 4 : 8;
size_t offset =
(opcode == TExprOpcode::LT) ? 0 : (opcode == TExprOpcode::LE) ? 1 : (opcode == TExprOpcode::GT) ? 2 : 3;
return base + offset;
}
#define CREATE_ASOF_VECTOR_CASE(TYPE, BASE_INDEX) \
case BASE_INDEX + 0: \
return Buffer<std::unique_ptr<AsofIndex<TYPE, TExprOpcode::LT>>>{}; \
case BASE_INDEX + 1: \
return Buffer<std::unique_ptr<AsofIndex<TYPE, TExprOpcode::LE>>>{}; \
case BASE_INDEX + 2: \
return Buffer<std::unique_ptr<AsofIndex<TYPE, TExprOpcode::GT>>>{}; \
case BASE_INDEX + 3: \
return Buffer<std::unique_ptr<AsofIndex<TYPE, TExprOpcode::GE>>>{};
inline AsofIndexBufferVariant create_asof_index_vector(size_t variant_index) {
switch (variant_index) {
CREATE_ASOF_VECTOR_CASE(int64_t, 0)
CREATE_ASOF_VECTOR_CASE(DateValue, 4)
CREATE_ASOF_VECTOR_CASE(TimestampValue, 8)
default:
__builtin_unreachable();
}
}
#undef CREATE_ASOF_BUFFER_CASE
template <size_t VariantIndex>
constexpr auto& get_asof_index_vector_static(JoinHashTableItems* table_items) {
static_assert(VariantIndex < 12, "Invalid variant index");
return std::get<VariantIndex>(table_items->asof_index_vector);
}
template <size_t VariantIndex>
#define CREATE_ASOF_INDEX_CASE(TYPE, BASE_INDEX) \
if constexpr (VariantIndex == BASE_INDEX + 0) { \
vector[asof_lookup_index] = std::make_unique<AsofIndex<TYPE, TExprOpcode::LT>>(); \
} else if constexpr (VariantIndex == BASE_INDEX + 1) { \
vector[asof_lookup_index] = std::make_unique<AsofIndex<TYPE, TExprOpcode::LE>>(); \
} else if constexpr (VariantIndex == BASE_INDEX + 2) { \
vector[asof_lookup_index] = std::make_unique<AsofIndex<TYPE, TExprOpcode::GT>>(); \
} else if constexpr (VariantIndex == BASE_INDEX + 3) { \
vector[asof_lookup_index] = std::make_unique<AsofIndex<TYPE, TExprOpcode::GE>>(); \
} else
void create_asof_index(JoinHashTableItems* table_items, uint32_t asof_lookup_index) {
auto& vector = get_asof_index_vector_static<VariantIndex>(table_items);
CREATE_ASOF_INDEX_CASE(int64_t, 0)
CREATE_ASOF_INDEX_CASE(DateValue, 4)
CREATE_ASOF_INDEX_CASE(TimestampValue, 8) { static_assert(VariantIndex < 12, "Invalid variant index"); }
}
#undef CREATE_ASOF_INDEX_CASE
template <LogicalType LT, TExprOpcode::type OP>
class AsofJoinTemporalRowProcessor {
public:
template <typename EquiJoinIndexLocator>
static void process_rows(JoinHashTableItems* table_items, const auto& keys,
const ImmBuffer<uint8_t>* equi_join_key_nulls,
EquiJoinIndexLocator&& equi_join_index_locator) {
using AsofCppType = RunTimeCppType<LT>;
static constexpr size_t variant_index = get_asof_variant_index(LT, OP);
const ColumnPtr& asof_temporal_col =
table_items->build_chunk->get_column_by_slot_id(table_items->asof_join_condition_desc.build_slot_id);
const auto* data_col = ColumnHelper::get_data_column_by_type<LT>(asof_temporal_col.get());
const NullColumn* asof_temporal_col_nulls_column = ColumnHelper::get_null_column(asof_temporal_col);
const Buffer<uint8_t>* asof_temporal_col_nulls =
asof_temporal_col_nulls_column ? &const_cast<NullColumn*>(asof_temporal_col_nulls_column)->get_data()
: nullptr;
const AsofCppType* __restrict asof_temporal_data = data_col->immutable_data().data();
const bool has_equi_join_key_nulls = (equi_join_key_nulls != nullptr);
const bool has_asof_temporal_nulls = (asof_temporal_col_nulls != nullptr);
auto process_rows_impl = [&]<bool HasEquiJoinKeyNulls, bool HasAsofTemporalNulls>() {
auto& asof_index_vector = get_asof_index_vector_static<variant_index>(table_items);
const uint32_t num_rows = table_items->row_count + 1;
const uint8_t* __restrict asof_temporal_null_data =
HasAsofTemporalNulls ? asof_temporal_col_nulls->data() : nullptr;
const uint8_t* __restrict equi_key_null_data = HasEquiJoinKeyNulls ? equi_join_key_nulls->data() : nullptr;
auto is_null_row = [&](uint32_t i) {
if constexpr (HasEquiJoinKeyNulls) {
if (equi_key_null_data[i] != 0) return true;
}
if constexpr (HasAsofTemporalNulls) {
if (asof_temporal_null_data[i] != 0) return true;
}
return false;
};
for (uint32_t i = 1; i < num_rows; ++i) {
if (is_null_row(i)) continue;
uint32_t equi_join_bucket_index = equi_join_index_locator(table_items, keys, i);
if (!asof_index_vector[equi_join_bucket_index]) {
create_asof_index<variant_index>(table_items, equi_join_bucket_index);
}
asof_index_vector[equi_join_bucket_index]->add_row(asof_temporal_data[i], i);
}
};
if (!has_equi_join_key_nulls && !has_asof_temporal_nulls) {
process_rows_impl.template operator()<false, false>();
} else if (has_equi_join_key_nulls && !has_asof_temporal_nulls) {
process_rows_impl.template operator()<true, false>();
} else if (!has_equi_join_key_nulls) {
process_rows_impl.template operator()<false, true>();
} else {
process_rows_impl.template operator()<true, true>();
}
}
};
struct AsofJoinTemporalTypeOpcodeDispatcher {
template <typename Func>
static void dispatch(LogicalType asof_type, TExprOpcode::type opcode, Func&& func) {
switch (asof_type) {
case TYPE_BIGINT:
dispatch_impl<TYPE_BIGINT>(opcode, std::forward<Func>(func));
break;
case TYPE_DATE:
dispatch_impl<TYPE_DATE>(opcode, std::forward<Func>(func));
break;
case TYPE_DATETIME:
dispatch_impl<TYPE_DATETIME>(opcode, std::forward<Func>(func));
break;
default:
LOG(ERROR) << "ASOF JOIN: Unsupported type: " << asof_type;
CHECK(false) << "ASOF JOIN: Unsupported type";
__builtin_unreachable();
}
}
private:
template <LogicalType ASOF_LT, typename Func>
static void dispatch_impl(TExprOpcode::type opcode, Func&& func) {
switch (opcode) {
case TExprOpcode::LT:
func(std::integral_constant<LogicalType, ASOF_LT>{},
std::integral_constant<TExprOpcode::type, TExprOpcode::LT>{});
break;
case TExprOpcode::LE:
func(std::integral_constant<LogicalType, ASOF_LT>{},
std::integral_constant<TExprOpcode::type, TExprOpcode::LE>{});
break;
case TExprOpcode::GT:
func(std::integral_constant<LogicalType, ASOF_LT>{},
std::integral_constant<TExprOpcode::type, TExprOpcode::GT>{});
break;
case TExprOpcode::GE:
func(std::integral_constant<LogicalType, ASOF_LT>{},
std::integral_constant<TExprOpcode::type, TExprOpcode::GE>{});
break;
default:
__builtin_unreachable();
}
}
};
class AsofJoinDispatcher {
public:
template <typename EquiJoinIndexLocator>
static void dispatch_and_process(JoinHashTableItems* table_items, const auto& keys,
const ImmBuffer<uint8_t>* equi_join_key_nulls,
EquiJoinIndexLocator&& equi_join_index_locator) {
LogicalType asof_type = table_items->asof_join_condition_desc.build_logical_type;
TExprOpcode::type opcode = table_items->asof_join_condition_desc.condition_op;
auto body = [&](auto tag_lt, auto tag_op) {
static constexpr LogicalType Lt = decltype(tag_lt)::value;
static constexpr TExprOpcode::type Op = decltype(tag_op)::value;
AsofJoinTemporalRowProcessor<Lt, Op>::process_rows(
table_items, keys, equi_join_key_nulls,
std::forward<EquiJoinIndexLocator>(equi_join_index_locator));
};
AsofJoinTemporalTypeOpcodeDispatcher::dispatch(asof_type, opcode, body);
}
};
class AsofJoinProbeDispatcher {
public:
template <typename Func>
static void dispatch(LogicalType asof_type, TExprOpcode::type opcode, Func&& body) {
AsofJoinTemporalTypeOpcodeDispatcher::dispatch(asof_type, opcode, [&](auto tag_lt, auto tag_op) {
static constexpr LogicalType Lt = decltype(tag_lt)::value;
static constexpr TExprOpcode::type Op = decltype(tag_op)::value;
body.template operator()<Lt, Op>();
});
}
};
} // namespace starrocks

View File

@ -45,8 +45,7 @@ namespace starrocks {
M(RANGE_DIRECT_MAPPING_SET) \
M(DENSE_RANGE_DIRECT_MAPPING) \
M(LINEAR_CHAINED) \
M(LINEAR_CHAINED_SET) \
M(LINEAR_CHAINED_ASOF)
M(LINEAR_CHAINED_SET)
#define APPLY_JOIN_KEY_CONSTRUCTOR_UNARY_TYPE(M) \
M(ONE_KEY_BOOLEAN) \
@ -118,20 +117,7 @@ namespace starrocks {
M(LINEAR_CHAINED_SET_DECIMAL32) \
M(LINEAR_CHAINED_SET_DECIMAL64) \
M(LINEAR_CHAINED_SET_DECIMAL128) \
M(LINEAR_CHAINED_SET_VARCHAR) \
\
M(LINEAR_CHAINED_ASOF_INT) \
M(LINEAR_CHAINED_ASOF_BIGINT) \
M(LINEAR_CHAINED_ASOF_LARGEINT) \
M(LINEAR_CHAINED_ASOF_FLOAT) \
M(LINEAR_CHAINED_ASOF_DOUBLE) \
M(LINEAR_CHAINED_ASOF_DATE) \
M(LINEAR_CHAINED_ASOF_DATETIME) \
M(LINEAR_CHAINED_ASOF_DECIMALV2) \
M(LINEAR_CHAINED_ASOF_DECIMAL32) \
M(LINEAR_CHAINED_ASOF_DECIMAL64) \
M(LINEAR_CHAINED_ASOF_DECIMAL128) \
M(LINEAR_CHAINED_ASOF_VARCHAR)
M(LINEAR_CHAINED_SET_VARCHAR)
enum class JoinKeyConstructorType {
#define NAME_TO_ENUM(NAME) NAME,
@ -308,27 +294,6 @@ REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_SET, TYPE_DECIMAL64, LinearChainedJ
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_SET, TYPE_DECIMAL128, LinearChainedJoinHashSet,
LINEAR_CHAINED_SET_DECIMAL128);
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_SET, TYPE_VARCHAR, LinearChainedJoinHashSet, LINEAR_CHAINED_SET_VARCHAR);
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_ASOF, TYPE_INT, LinearChainedAsofJoinHashMap, LINEAR_CHAINED_ASOF_INT);
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_ASOF, TYPE_BIGINT, LinearChainedAsofJoinHashMap,
LINEAR_CHAINED_ASOF_BIGINT);
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_ASOF, TYPE_LARGEINT, LinearChainedAsofJoinHashMap,
LINEAR_CHAINED_ASOF_LARGEINT);
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_ASOF, TYPE_FLOAT, LinearChainedAsofJoinHashMap, LINEAR_CHAINED_ASOF_FLOAT);
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_ASOF, TYPE_DOUBLE, LinearChainedAsofJoinHashMap,
LINEAR_CHAINED_ASOF_DOUBLE);
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_ASOF, TYPE_DATE, LinearChainedAsofJoinHashMap, LINEAR_CHAINED_ASOF_DATE);
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_ASOF, TYPE_DATETIME, LinearChainedAsofJoinHashMap,
LINEAR_CHAINED_ASOF_DATETIME);
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_ASOF, TYPE_DECIMALV2, LinearChainedAsofJoinHashMap,
LINEAR_CHAINED_ASOF_DECIMALV2);
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_ASOF, TYPE_DECIMAL32, LinearChainedAsofJoinHashMap,
LINEAR_CHAINED_ASOF_DECIMAL32);
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_ASOF, TYPE_DECIMAL64, LinearChainedAsofJoinHashMap,
LINEAR_CHAINED_ASOF_DECIMAL64);
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_ASOF, TYPE_DECIMAL128, LinearChainedAsofJoinHashMap,
LINEAR_CHAINED_ASOF_DECIMAL128);
REGISTER_JOIN_MAP_METHOD_TYPE(LINEAR_CHAINED_ASOF, TYPE_VARCHAR, LinearChainedAsofJoinHashMap,
LINEAR_CHAINED_ASOF_VARCHAR);
#undef REGISTER_JOIN_MAP_TYPE

View File

@ -27,14 +27,6 @@ Status HashJoinerFactory::prepare(RuntimeState* state) {
RETURN_IF_ERROR(Expr::open(_param._common_expr_ctxs, state));
RETURN_IF_ERROR(Expr::open(_param._other_join_conjunct_ctxs, state));
RETURN_IF_ERROR(Expr::open(_param._conjunct_ctxs, state));
if (_param._asof_join_condition_build_expr_ctx != nullptr) {
RETURN_IF_ERROR(_param._asof_join_condition_build_expr_ctx->prepare(state));
RETURN_IF_ERROR(_param._asof_join_condition_build_expr_ctx->open(state));
}
if (_param._asof_join_condition_probe_expr_ctx != nullptr) {
RETURN_IF_ERROR(_param._asof_join_condition_probe_expr_ctx->prepare(state));
RETURN_IF_ERROR(_param._asof_join_condition_probe_expr_ctx->open(state));
}
return Status::OK();
}
@ -44,12 +36,6 @@ void HashJoinerFactory::close(RuntimeState* state) {
Expr::close(_param._other_join_conjunct_ctxs, state);
Expr::close(_param._probe_expr_ctxs, state);
Expr::close(_param._build_expr_ctxs, state);
if (_param._asof_join_condition_build_expr_ctx != nullptr) {
_param._asof_join_condition_build_expr_ctx->close(state);
}
if (_param._asof_join_condition_probe_expr_ctx != nullptr) {
_param._asof_join_condition_probe_expr_ctx->close(state);
}
}
HashJoinerPtr HashJoinerFactory::create_builder(int32_t builder_dop, int32_t builder_driver_seq) {

View File

@ -80,19 +80,12 @@ Status ChunkSource::buffer_next_batch_chunks_blocking(RuntimeState* state, size_
if (_status.is_end_of_file()) {
chunk->owner_info().set_owner_id(owner_id, true);
_chunk_buffer.put(_scan_operator_seq, std::move(chunk), std::move(_chunk_token));
break;
} else if (_status.is_time_out()) {
chunk->owner_info().set_owner_id(owner_id, false);
_chunk_buffer.put(_scan_operator_seq, std::move(chunk), std::move(_chunk_token));
_status = Status::OK();
break;
} else if (_status.is_eagain()) {
// EAGAIN is normal case, but sleep a while to avoid busy loop
SleepFor(MonoDelta::FromNanoseconds(workgroup::WorkGroup::YIELD_PREEMPT_MAX_TIME_SPENT));
_status = Status::OK();
} else {
break;
}
break;
}
// schema won't be used by the computing layer, here we just reset it.

View File

@ -17,6 +17,7 @@
#include "exec/connector_scan_node.h"
#include "exec/pipeline/pipeline_driver.h"
#include "exec/pipeline/scan/balanced_chunk_buffer.h"
#include "runtime/exec_env.h"
#include "runtime/runtime_state.h"
namespace starrocks::pipeline {
@ -762,7 +763,7 @@ Status ConnectorChunkSource::_read_chunk(RuntimeState* state, ChunkPtr* chunk) {
RETURN_IF_ERROR(_open_data_source(state, &mem_alloc_failed));
if (mem_alloc_failed) {
_mem_alloc_failed_count += 1;
return Status::EAgain("");
return Status::TimedOut("");
}
if (state->is_cancelled()) {
return Status::Cancelled("canceled state");

View File

@ -178,7 +178,7 @@ void OlapChunkSource::_init_counter(RuntimeState* state) {
ADD_CHILD_COUNTER(_runtime_profile, "RemainingRowsAfterShortKeyFilter", TUnit::UNIT, segment_init_name);
_column_iterator_init_timer = ADD_CHILD_TIMER(_runtime_profile, "ColumnIteratorInit", segment_init_name);
_bitmap_index_iterator_init_timer = ADD_CHILD_TIMER(_runtime_profile, "BitmapIndexIteratorInit", segment_init_name);
_zone_map_filter_timer = ADD_CHILD_TIMER(_runtime_profile, "ZoneMapIndexFilter", segment_init_name);
_zone_map_filter_timer = ADD_CHILD_TIMER(_runtime_profile, "ZoneMapIndexFiter", segment_init_name);
_rows_key_range_filter_timer = ADD_CHILD_TIMER(_runtime_profile, "ShortKeyFilter", segment_init_name);
_rows_key_range_counter =
ADD_CHILD_COUNTER(_runtime_profile, "ShortKeyRangeNumber", TUnit::UNIT, segment_init_name);
@ -409,10 +409,6 @@ Status OlapChunkSource::_init_column_access_paths(Schema* schema) {
} else {
LOG(WARNING) << "failed to convert column access path: " << res.status();
}
} else if (path->is_root() && !path->children().empty()) {
// Check if this is a ROOT path for JSON field that has been pruned
// For JSON fields, the root column might be pruned but sub-paths are still needed
VLOG_ROW << "Skipping pruned JSON root path: " << root;
} else {
LOG(WARNING) << "failed to find column in schema: " << root;
}

View File

@ -0,0 +1,110 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "exec/pipeline/scan/olap_meta_scan_operator.h"
#include <utility>
#include "exec/olap_meta_scanner.h"
#include "exec/pipeline/scan/olap_meta_chunk_source.h"
#include "exec/pipeline/scan/olap_meta_scan_context.h"
namespace starrocks::pipeline {
OlapMetaScanOperatorFactory::OlapMetaScanOperatorFactory(int32_t id, ScanNode* meta_scan_node, size_t dop,
std::shared_ptr<OlapMetaScanContextFactory> ctx_factory)
: ScanOperatorFactory(id, meta_scan_node), _ctx_factory(std::move(ctx_factory)) {}
Status OlapMetaScanOperatorFactory::do_prepare(RuntimeState* state) {
return Status::OK();
}
void OlapMetaScanOperatorFactory::do_close(RuntimeState* state) {}
OperatorPtr OlapMetaScanOperatorFactory::do_create(int32_t dop, int32_t driver_sequence) {
return std::make_shared<OlapMetaScanOperator>(this, _id, driver_sequence, dop, _scan_node,
_ctx_factory->get_or_create(driver_sequence));
}
OlapMetaScanOperator::OlapMetaScanOperator(OperatorFactory* factory, int32_t id, int32_t driver_sequence, int32_t dop,
ScanNode* meta_scan_node, OlapMetaScanContextPtr ctx)
: ScanOperator(factory, id, driver_sequence, dop, meta_scan_node), _ctx(std::move(ctx)) {}
OlapMetaScanOperator::~OlapMetaScanOperator() = default;
bool OlapMetaScanOperator::has_output() const {
if (!_ctx->is_prepare_finished()) {
return false;
}
return ScanOperator::has_output();
}
bool OlapMetaScanOperator::is_finished() const {
if (!_ctx->is_prepare_finished()) {
return false;
}
return ScanOperator::is_finished();
}
Status OlapMetaScanOperator::do_prepare(RuntimeState* state) {
return Status::OK();
}
void OlapMetaScanOperator::do_close(RuntimeState* state) {}
ChunkSourcePtr OlapMetaScanOperator::create_chunk_source(MorselPtr morsel, int32_t chunk_source_index) {
return std::make_shared<OlapMetaChunkSource>(this, _runtime_profile.get(), std::move(morsel), _ctx);
}
ChunkPtr OlapMetaScanOperator::get_chunk_from_buffer() {
ChunkPtr chunk = nullptr;
if (_ctx->get_chunk_buffer().try_get(_driver_sequence, &chunk)) {
return chunk;
}
return nullptr;
}
size_t OlapMetaScanOperator::num_buffered_chunks() const {
return _ctx->get_chunk_buffer().size(_driver_sequence);
}
size_t OlapMetaScanOperator::buffer_size() const {
return _ctx->get_chunk_buffer().limiter()->size();
}
size_t OlapMetaScanOperator::buffer_capacity() const {
return _ctx->get_chunk_buffer().limiter()->capacity();
}
size_t OlapMetaScanOperator::buffer_memory_usage() const {
return _ctx->get_chunk_buffer().memory_usage();
}
size_t OlapMetaScanOperator::default_buffer_capacity() const {
return _ctx->get_chunk_buffer().limiter()->default_capacity();
}
ChunkBufferTokenPtr OlapMetaScanOperator::pin_chunk(int num_chunks) {
return _ctx->get_chunk_buffer().limiter()->pin(num_chunks);
}
bool OlapMetaScanOperator::is_buffer_full() const {
return _ctx->get_chunk_buffer().limiter()->is_full();
}
void OlapMetaScanOperator::set_buffer_finished() {
_ctx->get_chunk_buffer().set_finished(_driver_sequence);
}
} // namespace starrocks::pipeline

View File

@ -0,0 +1,73 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "exec/olap_meta_scan_node.h"
#include "exec/pipeline/pipeline_builder.h"
#include "exec/pipeline/scan/balanced_chunk_buffer.h"
#include "exec/pipeline/scan/olap_meta_scan_context.h"
#include "exec/pipeline/scan/scan_operator.h"
#include "gen_cpp/Types_types.h"
namespace starrocks::pipeline {
class OlapMetaScanOperatorFactory final : public ScanOperatorFactory {
public:
OlapMetaScanOperatorFactory(int32_t id, ScanNode* meta_scan_node, size_t dop,
std::shared_ptr<OlapMetaScanContextFactory> ctx_factory);
~OlapMetaScanOperatorFactory() override = default;
bool with_morsels() const override { return true; }
Status do_prepare(RuntimeState* state) override;
void do_close(RuntimeState* state) override;
OperatorPtr do_create(int32_t dop, int32_t driver_sequence) override;
private:
std::shared_ptr<OlapMetaScanContextFactory> _ctx_factory;
};
class OlapMetaScanOperator final : public ScanOperator {
public:
OlapMetaScanOperator(OperatorFactory* factory, int32_t id, int32_t driver_sequence, int32_t dop,
ScanNode* scan_node, OlapMetaScanContextPtr scan_ctx);
~OlapMetaScanOperator() override;
bool has_output() const override;
bool is_finished() const override;
Status do_prepare(RuntimeState* state) override;
void do_close(RuntimeState* state) override;
ChunkSourcePtr create_chunk_source(MorselPtr morsel, int32_t chunk_source_index) override;
private:
void attach_chunk_source(int32_t source_index) override {}
void detach_chunk_source(int32_t source_index) override {}
bool has_shared_chunk_source() const override { return false; }
ChunkPtr get_chunk_from_buffer() override;
size_t num_buffered_chunks() const override;
size_t buffer_size() const override;
size_t buffer_capacity() const override;
size_t buffer_memory_usage() const override;
size_t default_buffer_capacity() const override;
ChunkBufferTokenPtr pin_chunk(int num_chunks) override;
bool is_buffer_full() const override;
void set_buffer_finished() override;
OlapMetaScanContextPtr _ctx;
};
} // namespace starrocks::pipeline

View File

@ -109,7 +109,7 @@ StatusOr<ChunkPtr> ConnectorSinkOperator::pull_chunk(RuntimeState* state) {
}
Status ConnectorSinkOperator::push_chunk(RuntimeState* state, const ChunkPtr& chunk) {
RETURN_IF_ERROR(_connector_chunk_sink->add(chunk));
RETURN_IF_ERROR(_connector_chunk_sink->add(chunk.get()));
return Status::OK();
}

View File

@ -18,14 +18,6 @@
#include <memory>
#include <utility>
// On macOS, system headers may define a macro named current_task(),
// which conflicts with the method name below. Undefine to avoid collisions.
#ifdef __APPLE__
#ifdef current_task
#undef current_task
#endif
#endif
#include "column/vectorized_fwd.h"
#include "common/statusor.h"
#include "exec/spill/spiller.h"

View File

@ -94,19 +94,17 @@ Status ScanNode::prepare(RuntimeState* state) {
}
// Distribute morsels from a single queue to multiple queues
static StatusOr<std::map<int, pipeline::MorselQueuePtr>> uniform_distribute_morsels(
pipeline::MorselQueuePtr morsel_queue, int dop) {
std::map<int, pipeline::MorselQueuePtr> queue_per_driver;
static std::map<int, pipeline::MorselQueuePtr> uniform_distribute_morsels(pipeline::MorselQueuePtr morsel_queue,
int dop) {
std::map<int, pipeline::Morsels> morsels_per_driver;
int driver_seq = 0;
while (!morsel_queue->empty()) {
auto maybe_morsel_status_or = morsel_queue->try_get();
if (UNLIKELY(!maybe_morsel_status_or.ok())) {
return maybe_morsel_status_or.status();
}
morsels_per_driver[driver_seq].push_back(std::move(maybe_morsel_status_or.value()));
auto maybe_morsel = morsel_queue->try_get();
DCHECK(maybe_morsel.ok());
morsels_per_driver[driver_seq].push_back(std::move(maybe_morsel.value()));
driver_seq = (driver_seq + 1) % dop;
}
std::map<int, pipeline::MorselQueuePtr> queue_per_driver;
auto morsel_queue_type = morsel_queue->type();
DCHECK(morsel_queue_type == pipeline::MorselQueue::Type::FIXED ||
@ -146,7 +144,7 @@ StatusOr<pipeline::MorselQueueFactoryPtr> ScanNode::convert_scan_range_to_morsel
// If not so much morsels, try to assign morsel uniformly among operators to avoid data skew
if (!always_shared_scan() && scan_dop > 1 && is_fixed_or_dynamic_morsel_queue &&
morsel_queue->num_original_morsels() <= io_parallelism) {
ASSIGN_OR_RETURN(auto morsel_queue_map, uniform_distribute_morsels(std::move(morsel_queue), scan_dop));
auto morsel_queue_map = uniform_distribute_morsels(std::move(morsel_queue), scan_dop);
return std::make_unique<pipeline::IndividualMorselQueueFactory>(std::move(morsel_queue_map),
/*could_local_shuffle*/ true);
} else {

View File

@ -21,7 +21,7 @@
#include "runtime/exec_env.h"
#ifdef WITH_STARCACHE
#include "cache/disk_cache/starcache_engine.h"
#include "cache/starcache_engine.h"
#endif
namespace starrocks {
@ -68,14 +68,9 @@ Status SchemaBeDataCacheMetricsScanner::get_next(ChunkPtr* chunk, bool* eos) {
row.emplace_back(_be_id);
// TODO: Support LRUCacheEngine
auto* mem_cache = DataCache::GetInstance()->local_mem_cache();
DataCacheMemMetrics mem_metrics;
if (mem_cache != nullptr && mem_cache->is_initialized()) {
mem_metrics = mem_cache->cache_metrics();
}
auto* disk_cache = DataCache::GetInstance()->local_disk_cache();
if (disk_cache != nullptr && disk_cache->is_initialized()) {
auto* starcache = reinterpret_cast<StarCacheEngine*>(disk_cache);
auto* cache = DataCache::GetInstance()->local_disk_cache();
if (cache != nullptr && cache->is_initialized() && cache->engine_type() == LocalCacheEngineType::STARCACHE) {
auto* starcache = reinterpret_cast<StarCacheEngine*>(cache);
// retrieve different priority's used bytes from level = 2 metrics
metrics = starcache->starcache_metrics(2);
@ -84,8 +79,8 @@ Status SchemaBeDataCacheMetricsScanner::get_next(ChunkPtr* chunk, bool* eos) {
row.emplace_back(Slice(status));
row.emplace_back(metrics.disk_quota_bytes);
row.emplace_back(metrics.disk_used_bytes);
row.emplace_back(mem_metrics.mem_quota_bytes);
row.emplace_back(mem_metrics.mem_used_bytes);
row.emplace_back(metrics.mem_quota_bytes);
row.emplace_back(metrics.mem_used_bytes);
row.emplace_back(metrics.meta_used_bytes);
const auto& dir_spaces = metrics.disk_dir_spaces;

View File

@ -14,11 +14,7 @@
#pragma once
#ifdef __APPLE__
#include <sys/mount.h>
#else
#include <sys/statfs.h>
#endif
#include <atomic>
#include <memory>
@ -105,11 +101,7 @@ private:
struct statfs stat1, stat2;
statfs(path1.c_str(), &stat1);
statfs(path2.c_str(), &stat2);
#ifdef __APPLE__
return stat1.f_fsid.val[0] == stat2.f_fsid.val[0] && stat1.f_fsid.val[1] == stat2.f_fsid.val[1];
#else
return stat1.f_fsid.__val[0] == stat2.f_fsid.__val[0] && stat1.f_fsid.__val[1] == stat2.f_fsid.__val[1];
#endif
}
std::vector<DirPtr> _dirs;
@ -125,4 +117,4 @@ private:
Status::RuntimeError(fmt::format("acquire size error: dir {} try acquire:{} usage:{} capacity:{}", dir->dir(), \
acquire_size, dir->get_current_size(), dir->get_max_size()))
} // namespace starrocks::spill
} // namespace starrocks::spill

View File

@ -325,6 +325,8 @@ void PartitionedSpillerWriter::_add_partition(SpilledPartitionPtr&& partition_pt
}
void PartitionedSpillerWriter::_remove_partition(const SpilledPartition* partition) {
auto affinity_group = partition->block_group->get_affinity_group();
DCHECK(affinity_group != kDefaultBlockAffinityGroup);
_id_to_partitions.erase(partition->partition_id);
size_t level = partition->level;
auto& partitions = _level_to_partitions[level];
@ -332,12 +334,6 @@ void PartitionedSpillerWriter::_remove_partition(const SpilledPartition* partiti
auto iter = std::find_if(partitions.begin(), partitions.end(),
[partition](auto& val) { return val->partition_id == partition->partition_id; });
_total_partition_num -= (iter != partitions.end());
if (partition->block_group != nullptr) {
auto affinity_group = partition->block_group->get_affinity_group();
DCHECK(affinity_group != kDefaultBlockAffinityGroup);
WARN_IF_ERROR(_spiller->block_manager()->release_affinity_group(affinity_group),
fmt::format("release affinity group {} error", affinity_group));
}
partitions.erase(iter);
if (partitions.empty()) {
_level_to_partitions.erase(level);
@ -345,6 +341,8 @@ void PartitionedSpillerWriter::_remove_partition(const SpilledPartition* partiti
_min_level = level + 1;
}
}
WARN_IF_ERROR(_spiller->block_manager()->release_affinity_group(affinity_group),
fmt::format("release affinity group {} error", affinity_group));
}
Status PartitionedSpillerWriter::_choose_partitions_to_flush(bool is_final_flush,

View File

@ -46,9 +46,9 @@
#include "column/column_helper.h"
#include "column/map_column.h"
#include "column/nullable_column.h"
#include "common/config.h"
#include "common/statusor.h"
#include "common/tracer.h"
#include "config.h"
#include "exec/pipeline/query_context.h"
#include "exec/tablet_sink_colocate_sender.h"
#include "exprs/expr.h"

View File

@ -19,10 +19,10 @@
#include "column/chunk.h"
#include "column/column_viewer.h"
#include "column/nullable_column.h"
#include "common/config.h"
#include "common/statusor.h"
#include "common/tracer.h"
#include "common/utils.h"
#include "config.h"
#include "exec/tablet_sink.h"
#include "exprs/expr_context.h"
#include "gutil/strings/fastmem.h"

View File

@ -27,7 +27,6 @@ set(EXPR_FILES
agg/factory/aggregate_resolver_minmaxany.cpp
agg/factory/aggregate_resolver_others.cpp
agg/factory/aggregate_resolver_sumcount.cpp
agg/factory/aggregate_resolver_distinct.cpp
agg/factory/aggregate_resolver_stream.cpp
agg/factory/aggregate_resolver_utility.cpp
agg/factory/aggregate_resolver_variance.cpp

View File

@ -16,12 +16,12 @@
#include <type_traits>
#include "column/array_column.h"
#include "column/type_traits.h"
#include "gutil/strings/fastmem.h"
#include "types/logical_type.h"
namespace starrocks {
// Type traits from aggregate functions
template <LogicalType lt, typename = guard::Guard>
struct AggDataTypeTraits {};
@ -41,7 +41,6 @@ struct AggDataTypeTraits<lt, FixedLengthLTGuard<lt>> {
static RefType get_ref(const ValueType& value) { return value; }
static void update_max(ValueType& current, const RefType& input) { current = std::max<ValueType>(current, input); }
static void update_min(ValueType& current, const RefType& input) { current = std::min<ValueType>(current, input); }
static bool is_equal(const RefType& lhs, const RefType& rhs) { return lhs == rhs; }
@ -57,9 +56,7 @@ struct AggDataTypeTraits<lt, ObjectFamilyLTGuard<lt>> {
using RefType = RunTimeCppType<lt>;
static void assign_value(ValueType& value, RefType ref) { value = *ref; }
static void assign_value(ColumnType* column, size_t row, const RefType& ref) { *column->get_object(row) = *ref; }
static void assign_value(ColumnType* column, size_t row, const ValueType& ref) { *column->get_object(row) = ref; }
static void append_value(ColumnType* column, const ValueType& value) { column->append(&value); }
@ -68,47 +65,12 @@ struct AggDataTypeTraits<lt, ObjectFamilyLTGuard<lt>> {
static const RefType get_row_ref(const ColumnType& column, size_t row) { return column.get_object(row); }
static void update_max(ValueType& current, const RefType& input) { current = std::max<ValueType>(current, *input); }
static void update_min(ValueType& current, const RefType& input) { current = std::min<ValueType>(current, *input); }
static bool is_equal(const RefType& lhs, const RefType& rhs) { return *lhs == *rhs; }
static bool equals(const ValueType& lhs, const RefType& rhs) { return lhs == *rhs; }
};
// For pointer ref types
template <LogicalType lt>
struct AggDataTypeTraits<lt, ArrayGuard<lt>> {
using CppType = RunTimeCppType<lt>;
using ColumnType = RunTimeColumnType<lt>;
using ValueType = typename ColumnType::MutablePtr;
struct RefType {
const ColumnType* column;
const size_t row;
RefType(const ColumnType* c, size_t r) : column(c), row(r) {}
};
static void assign_value(ValueType& value, const RefType& ref) {
value = ArrayColumn::static_pointer_cast(ref.column->clone_empty());
value->append_datum(ref.column->get(ref.row).template get<CppType>());
}
static void append_value(ColumnType* column, const ValueType& value) {
column->append_datum(value->get(0).template get<CppType>());
}
static RefType get_row_ref(const ColumnType& column, size_t row) { return RefType(&column, row); }
static bool is_equal(const ValueType& lhs, const ValueType& rhs) {
return lhs->get(0).template get<CppType>() == rhs->get(0).template get<CppType>();
}
static bool equals(const ValueType& lhs, const ValueType& rhs) {
return lhs->get(0).template get<CppType>() == rhs->get(0).template get<CppType>();
}
};
template <LogicalType lt>
struct AggDataTypeTraits<lt, StringLTGuard<lt>> {
using ColumnType = RunTimeColumnType<lt>;
@ -134,7 +96,6 @@ struct AggDataTypeTraits<lt, StringLTGuard<lt>> {
memcpy(current.data(), input.data, input.size);
}
}
static void update_min(ValueType& current, const RefType& input) {
if (Slice(current.data(), current.size()).compare(input) > 0) {
current.resize(input.size);
@ -149,4 +110,5 @@ template <LogicalType lt>
using AggDataValueType = typename AggDataTypeTraits<lt>::ValueType;
template <LogicalType lt>
using AggDataRefType = typename AggDataTypeTraits<lt>::RefType;
} // namespace starrocks
} // namespace starrocks

View File

@ -75,7 +75,7 @@ public:
TYPE_BIGINT, TYPE_LARGEINT, TYPE_FLOAT, TYPE_DOUBLE,
TYPE_VARCHAR, TYPE_CHAR, TYPE_DATE, TYPE_DATETIME,
TYPE_DECIMALV2, TYPE_DECIMAL32, TYPE_DECIMAL64, TYPE_DECIMAL128,
TYPE_DECIMAL256, TYPE_HLL, TYPE_OBJECT, TYPE_ARRAY};
TYPE_DECIMAL256, TYPE_HLL, TYPE_OBJECT};
return kTypes;
}

View File

@ -1,48 +0,0 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "column/type_traits.h"
#include "exprs/agg/distinct.h"
#include "exprs/agg/factory/aggregate_factory.hpp"
#include "exprs/agg/factory/aggregate_resolver.hpp"
#include "types/logical_type.h"
namespace starrocks {
struct DistinctDispatcher {
template <LogicalType lt>
void operator()(AggregateFuncResolver* resolver) {
if constexpr (lt_is_aggregate<lt>) {
using DistinctState = DistinctAggregateState<lt, SumResultLT<lt>>;
using DistinctState2 = DistinctAggregateStateV2<lt, SumResultLT<lt>>;
resolver->add_aggregate_mapping<lt, TYPE_BIGINT, DistinctState>(
"multi_distinct_count", false, AggregateFactory::MakeCountDistinctAggregateFunction<lt>());
resolver->add_aggregate_mapping<lt, TYPE_BIGINT, DistinctState2>(
"multi_distinct_count2", false, AggregateFactory::MakeCountDistinctAggregateFunctionV2<lt>());
resolver->add_aggregate_mapping<lt, SumResultLT<lt>, DistinctState>(
"multi_distinct_sum", false, AggregateFactory::MakeSumDistinctAggregateFunction<lt>());
resolver->add_aggregate_mapping<lt, SumResultLT<lt>, DistinctState2>(
"multi_distinct_sum2", false, AggregateFactory::MakeSumDistinctAggregateFunctionV2<lt>());
}
}
};
void AggregateFuncResolver::register_distinct() {
for (auto type : aggregate_types()) {
type_dispatch_all(type, DistinctDispatcher(), this);
}
}
} // namespace starrocks

View File

@ -13,6 +13,7 @@
// limitations under the License.
#include "column/type_traits.h"
#include "exprs/agg/distinct.h"
#include "exprs/agg/factory/aggregate_factory.hpp"
#include "exprs/agg/factory/aggregate_resolver.hpp"
#include "exprs/agg/sum.h"
@ -51,6 +52,25 @@ struct StorageSumDispatcher {
}
};
struct DistinctDispatcher {
template <LogicalType lt>
void operator()(AggregateFuncResolver* resolver) {
if constexpr (lt_is_aggregate<lt>) {
using DistinctState = DistinctAggregateState<lt, SumResultLT<lt>>;
using DistinctState2 = DistinctAggregateStateV2<lt, SumResultLT<lt>>;
resolver->add_aggregate_mapping<lt, TYPE_BIGINT, DistinctState>(
"multi_distinct_count", false, AggregateFactory::MakeCountDistinctAggregateFunction<lt>());
resolver->add_aggregate_mapping<lt, TYPE_BIGINT, DistinctState2>(
"multi_distinct_count2", false, AggregateFactory::MakeCountDistinctAggregateFunctionV2<lt>());
resolver->add_aggregate_mapping<lt, SumResultLT<lt>, DistinctState>(
"multi_distinct_sum", false, AggregateFactory::MakeSumDistinctAggregateFunction<lt>());
resolver->add_aggregate_mapping<lt, SumResultLT<lt>, DistinctState2>(
"multi_distinct_sum2", false, AggregateFactory::MakeSumDistinctAggregateFunctionV2<lt>());
}
}
};
void AggregateFuncResolver::register_sumcount() {
for (auto type : aggregate_types()) {
type_dispatch_all(type, SumDispatcher(), this);
@ -73,4 +93,10 @@ void AggregateFuncResolver::register_sumcount() {
AggregateFactory::MakeCountNullableAggregateFunction<true>());
}
void AggregateFuncResolver::register_distinct() {
for (auto type : aggregate_types()) {
type_dispatch_all(type, DistinctDispatcher(), this);
}
}
} // namespace starrocks

View File

@ -24,7 +24,7 @@ namespace starrocks {
struct WindowDispatcher {
template <LogicalType lt>
void operator()(AggregateFuncResolver* resolver) {
if constexpr (lt_is_aggregate<lt> || is_object_type(lt) || lt_is_array<lt>) {
if constexpr (lt_is_aggregate<lt> || is_object_type(lt)) {
resolver->add_aggregate_mapping_notnull<lt, lt>(
"first_value", true, AggregateFactory::MakeFirstValueWindowFunction<lt, false>());
// use first_value_in for first_value with ingnore nulls.

View File

@ -13,7 +13,6 @@
// limitations under the License.
#pragma once
#include "column/array_column.h"
#include "column/column_helper.h"
#include "column/nullable_column.h"
#include "column/vectorized_fwd.h"
@ -102,10 +101,6 @@ struct ValueWindowStrategy<LT, JsonGuard<LT>> {
/// The dst Object column hasn't been resized.
static constexpr bool use_append = true;
};
template <LogicalType LT>
struct ValueWindowStrategy<LT, ArrayGuard<LT>> {
static constexpr bool use_append = true;
};
template <LogicalType LT, typename State, typename T = RunTimeCppType<LT>>
class ValueWindowFunction : public WindowFunction<State> {
@ -129,7 +124,7 @@ public:
Column* data_column = nullable_column->mutable_data_column();
auto* column = down_cast<InputColumnType*>(data_column);
auto& value = AggregateFunctionStateHelper<State>::data(state).value;
auto value = AggregateFunctionStateHelper<State>::data(state).value;
for (size_t i = start; i < end; ++i) {
AggDataTypeTraits<LT>::append_value(column, value);
}
@ -567,14 +562,8 @@ class LeadLagWindowFunction final : public ValueWindowFunction<LT, LeadLagState<
if (default_column->is_nullable()) {
this->data(state).default_is_null = true;
} else {
if constexpr (lt_is_array<LT>) {
const auto* column = down_cast<const ArrayColumn*>(ColumnHelper::get_data_column(arg2));
AggDataTypeTraits<LT>::assign_value(this->data(state).default_value,
AggDataTypeTraits<LT>::get_row_ref(*column, 0));
} else {
auto value = ColumnHelper::get_const_value<LT>(arg2);
AggDataTypeTraits<LT>::assign_value(this->data(state).default_value, value);
}
auto value = ColumnHelper::get_const_value<LT>(arg2);
AggDataTypeTraits<LT>::assign_value(this->data(state).default_value, value);
}
if constexpr (ignoreNulls) {
@ -680,13 +669,7 @@ class LeadLagWindowFunction final : public ValueWindowFunction<LT, LeadLagState<
if (this->data(state).default_is_null) {
this->data(state).is_null = true;
} else {
if constexpr (lt_is_array<LT>) {
AggDataTypeTraits<LT>::assign_value(
this->data(state).value,
AggDataTypeTraits<LT>::get_row_ref(*this->data(state).default_value, 0));
} else {
this->data(state).value = this->data(state).default_value;
}
this->data(state).value = this->data(state).default_value;
}
} else {
const Column* data_column = ColumnHelper::get_data_column(columns[0]);
@ -703,13 +686,7 @@ class LeadLagWindowFunction final : public ValueWindowFunction<LT, LeadLagState<
this->data(state).is_null = true;
} else {
this->data(state).is_null = false;
if constexpr (lt_is_array<LT>) {
AggDataTypeTraits<LT>::assign_value(
this->data(state).value,
AggDataTypeTraits<LT>::get_row_ref(*this->data(state).default_value, 0));
} else {
this->data(state).value = this->data(state).default_value;
}
this->data(state).value = this->data(state).default_value;
}
return;
}

View File

@ -44,7 +44,7 @@ namespace starrocks {
\
virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new CLASS_NAME(*this)); }
[[maybe_unused]] static std::optional<LogicalType> eliminate_trivial_cast_for_decimal_mul(const Expr* e) {
static std::optional<LogicalType> eliminate_trivial_cast_for_decimal_mul(const Expr* e) {
DIAGNOSTIC_PUSH
#if defined(__GNUC__) && !defined(__clang__)
DIAGNOSTIC_IGNORE("-Wmaybe-uninitialized")

View File

@ -176,8 +176,7 @@ StatusOr<ColumnPtr> CastStringToArray::evaluate_checked(ExprContext* context, Ch
// return null if not valid array
if (!is_valid_array(str, stack)) {
if (_throw_exception_if_err) {
return Status::InternalError(
fmt::format("invalid array input: {}", std::string_view(str.get_data(), str.get_size())));
return Status::InternalError(fmt::format("invalid array input: {}", str));
} else {
has_null = true;
null_column->append(1);

View File

@ -140,15 +140,6 @@ struct DecimalNonDecimalCast<overflow_mode, DecimalType, NonDecimalType, Decimal
using NonDecimalColumnType = RunTimeColumnType<NonDecimalType>;
static inline ColumnPtr decimal_from(const ColumnPtr& column, int precision, int scale) {
if (scale == 0) {
return _decimal_from<true>(column, precision, scale);
} else {
return _decimal_from<false>(column, precision, scale);
}
}
template <bool ZeroScale>
static inline ColumnPtr _decimal_from(const ColumnPtr& column, int precision, int scale) {
const auto num_rows = column->size();
typename DecimalColumnType::MutablePtr result = DecimalColumnType::create(precision, scale, num_rows);
const auto data = &ColumnHelper::cast_to_raw<NonDecimalType>(column.get())->immutable_data().front();
@ -172,16 +163,9 @@ struct DecimalNonDecimalCast<overflow_mode, DecimalType, NonDecimalType, Decimal
DecimalV3Cast::from_integer<SignedBooleanType, DecimalCppType, check_overflow<overflow_mode>>(
(SignedBooleanType)(data[i]), scale_factor, &result_data[i]);
} else if constexpr (lt_is_integer<NonDecimalType>) {
if constexpr (ZeroScale) {
// Fast path for integer-to-decimal conversion with scale 0.
overflow =
DecimalV3Cast::to_decimal_trivial<NonDecimalCppType, DecimalCppType,
check_overflow<overflow_mode>>(data[i], &result_data[i]);
} else {
overflow = DecimalV3Cast::from_integer<NonDecimalCppType, DecimalCppType,
check_overflow<overflow_mode>>(data[i], scale_factor,
&result_data[i]);
}
overflow =
DecimalV3Cast::from_integer<NonDecimalCppType, DecimalCppType, check_overflow<overflow_mode>>(
data[i], scale_factor, &result_data[i]);
} else if constexpr (lt_is_float<NonDecimalType>) {
overflow = DecimalV3Cast::from_float<NonDecimalCppType, DecimalCppType>(data[i], scale_factor,
&result_data[i]);
@ -234,7 +218,6 @@ struct DecimalNonDecimalCast<overflow_mode, DecimalType, NonDecimalType, Decimal
}
}
}
if constexpr (check_overflow<overflow_mode>) {
ColumnBuilder<DecimalType> builder(std::move(result), std::move(null_column), has_null);
return builder.build(column->is_constant());

View File

@ -20,14 +20,10 @@
#include "column/map_column.h"
#include "column/struct_column.h"
#include "column/type_traits.h"
#ifndef MACOS_DISABLE_JAVA
#include "exprs/agg/java_udaf_function.h"
#endif
#include "runtime/runtime_state.h"
#include "types/logical_type_infra.h"
#ifndef MACOS_DISABLE_JAVA
#include "udf/java/java_udf.h"
#endif
#include "util/bloom_filter.h"
namespace starrocks {
@ -42,7 +38,7 @@ FunctionContext* FunctionContext::create_context(RuntimeState* state, MemPool* p
ctx->_mem_pool = pool;
ctx->_return_type = return_type;
ctx->_arg_types = arg_types;
#if !defined(MACOS_DISABLE_JAVA) && !defined(BUILD_FORMAT_LIB)
#if !defined(BUILD_FORMAT_LIB)
ctx->_jvm_udaf_ctxs = std::make_unique<JavaUDAFContext>();
#endif
return ctx;
@ -58,7 +54,7 @@ FunctionContext* FunctionContext::create_context(RuntimeState* state, MemPool* p
ctx->_mem_pool = pool;
ctx->_return_type = return_type;
ctx->_arg_types = arg_types;
#if !defined(MACOS_DISABLE_JAVA) && !defined(BUILD_FORMAT_LIB)
#if !defined(BUILD_FORMAT_LIB)
ctx->_jvm_udaf_ctxs = std::make_unique<JavaUDAFContext>();
#endif
ctx->_is_distinct = is_distinct;
@ -141,12 +137,10 @@ void* FunctionContext::get_function_state(FunctionStateScope scope) const {
}
void FunctionContext::release_mems() {
#ifndef MACOS_DISABLE_JAVA
if (_jvm_udaf_ctxs != nullptr && _jvm_udaf_ctxs->states) {
auto env = JVMFunctionHelper::getInstance().getEnv();
_jvm_udaf_ctxs->states->clear(this, env);
}
#endif
}
void FunctionContext::set_error(const char* error_msg, const bool is_udf) {

View File

@ -35,12 +35,6 @@ class RuntimeState;
class Column;
class Slice;
struct JavaUDAFContext;
#if defined(MACOS_DISABLE_JAVA)
// On macOS build, Java is disabled. Provide an empty definition so that
// std::unique_ptr<JavaUDAFContext> has a complete type and can be destroyed
// without pulling in JNI headers.
struct JavaUDAFContext {};
#endif
struct NgramBloomFilterState;
class FunctionContext {

View File

@ -3468,76 +3468,6 @@ StatusOr<ColumnPtr> StringFunctions::regexp_extract(FunctionContext* context, co
return regexp_extract_general(context, options, columns);
}
// Helper function to extract whole match (group 0) using RE2::Match
// This is shared by both overloaded extract_regex_matches functions
template <typename IndexType>
static void extract_whole_matches(const re2::StringPiece& str_sp, const re2::RE2& regex, BinaryColumn* str_col,
IndexType& index, int max_matches) {
re2::StringPiece input = str_sp;
std::vector<re2::StringPiece> matches(max_matches);
size_t pos = 0;
while (pos <= input.size()) {
re2::StringPiece remaining = input.substr(pos);
if (regex.Match(remaining, 0, remaining.size(), RE2::UNANCHORED, &matches[0], max_matches)) {
// matches[0] contains the whole match (group 0)
str_col->append(Slice(matches[0].data(), matches[0].size()));
index += 1;
// Move past this match
pos = matches[0].data() - input.data() + matches[0].size();
if (matches[0].size() == 0) {
pos++; // Avoid infinite loop on zero-length matches
}
} else {
break;
}
}
}
// Helper function to extract regex matches and append to column
// This reduces code duplication across regexp_extract_all_* functions
static void extract_regex_matches(const Slice& str_value, const re2::RE2& regex, int group, BinaryColumn* str_col,
uint32_t& index, int max_matches) {
re2::StringPiece str_sp(str_value.get_data(), str_value.get_size());
if (group == 0) {
// Extract the whole match (group 0)
extract_whole_matches(str_sp, regex, str_col, index, max_matches);
} else {
// Extract specific capture group
re2::StringPiece find[group];
const RE2::Arg* args[group];
RE2::Arg argv[group];
for (size_t i = 0; i < group; i++) {
argv[i] = &find[i];
args[i] = &argv[i];
}
while (re2::RE2::FindAndConsumeN(&str_sp, regex, args, group)) {
str_col->append(Slice(find[group - 1].data(), find[group - 1].size()));
index += 1;
}
}
}
// Overloaded version for pre-allocated arrays (used by regexp_extract_all_const)
static void extract_regex_matches(const Slice& str_value, const re2::RE2& regex, int group, BinaryColumn* str_col,
uint64_t& index, const std::unique_ptr<re2::StringPiece[]>& find,
const std::unique_ptr<const RE2::Arg*[]>& args, int max_matches) {
re2::StringPiece str_sp(str_value.get_data(), str_value.get_size());
if (group == 0) {
// Extract the whole match (group 0) - reuse common logic
extract_whole_matches(str_sp, regex, str_col, index, max_matches);
} else {
// Extract specific capture group using pre-allocated arrays
while (re2::RE2::FindAndConsumeN(&str_sp, regex, args.get(), group)) {
str_col->append(Slice(find[group - 1].data(), find[group - 1].size()));
index += 1;
}
}
}
static ColumnPtr regexp_extract_all_general(FunctionContext* context, re2::RE2::Options* options,
const Columns& columns) {
auto content_viewer = ColumnViewer<TYPE_VARCHAR>(columns[0]);
@ -3553,7 +3483,7 @@ static ColumnPtr regexp_extract_all_general(FunctionContext* context, re2::RE2::
uint32_t index = 0;
for (int row = 0; row < size; ++row) {
if (content_viewer.is_null(row) || ptn_viewer.is_null(row) || group_viewer.is_null(row)) {
if (content_viewer.is_null(row) || ptn_viewer.is_null(row)) {
offset_col->append(index);
nl_col->append(1);
continue;
@ -3570,7 +3500,7 @@ static ColumnPtr regexp_extract_all_general(FunctionContext* context, re2::RE2::
nl_col->append(0);
auto group = group_viewer.value(row);
if (group < 0) {
if (group <= 0) {
offset_col->append(index);
continue;
}
@ -3581,7 +3511,21 @@ static ColumnPtr regexp_extract_all_general(FunctionContext* context, re2::RE2::
continue;
}
extract_regex_matches(content_viewer.value(row), local_re, group, str_col.get(), index, max_matches);
auto str_value = content_viewer.value(row);
re2::StringPiece str_sp(str_value.get_data(), str_value.get_size());
re2::StringPiece find[group];
const RE2::Arg* args[group];
RE2::Arg argv[group];
for (size_t i = 0; i < group; i++) {
argv[i] = &find[i];
args[i] = &argv[i];
}
while (re2::RE2::FindAndConsumeN(&str_sp, local_re, args, group)) {
str_col->append(Slice(find[group - 1].data(), find[group - 1].size()));
index += 1;
}
offset_col->append(index);
}
@ -3603,7 +3547,7 @@ static ColumnPtr regexp_extract_all_const_pattern(re2::RE2* const_re, const Colu
uint32_t index = 0;
for (int row = 0; row < size; ++row) {
if (content_viewer.is_null(row) || group_viewer.is_null(row)) {
if (content_viewer.is_null(row)) {
offset_col->append(index);
nl_col->append(1);
continue;
@ -3611,7 +3555,7 @@ static ColumnPtr regexp_extract_all_const_pattern(re2::RE2* const_re, const Colu
nl_col->append(0);
auto group = group_viewer.value(row);
if (group < 0) {
if (group <= 0) {
offset_col->append(index);
continue;
}
@ -3622,7 +3566,21 @@ static ColumnPtr regexp_extract_all_const_pattern(re2::RE2* const_re, const Colu
continue;
}
extract_regex_matches(content_viewer.value(row), *const_re, group, str_col.get(), index, max_matches);
auto str_value = content_viewer.value(row);
re2::StringPiece str_sp(str_value.get_data(), str_value.get_size());
re2::StringPiece find[group];
const RE2::Arg* args[group];
RE2::Arg argv[group];
for (size_t i = 0; i < group; i++) {
argv[i] = &find[i];
args[i] = &argv[i];
}
while (re2::RE2::FindAndConsumeN(&str_sp, *const_re, args, group)) {
str_col->append(Slice(find[group - 1].data(), find[group - 1].size()));
index += 1;
}
offset_col->append(index);
}
@ -3654,7 +3612,7 @@ static ColumnPtr regexp_extract_all_const(re2::RE2* const_re, const Columns& col
uint64_t index = 0;
int max_matches = 1 + const_re->NumberOfCapturingGroups();
if (group < 0 || group >= max_matches) {
if (group <= 0 || group >= max_matches) {
offset_col->append_value_multiple_times(&index, size);
auto array = ArrayColumn::create(NullableColumn::create(std::move(str_col), NullColumn::create(0, 0)),
std::move(offset_col));
@ -3665,27 +3623,26 @@ static ColumnPtr regexp_extract_all_const(re2::RE2* const_re, const Columns& col
return NullableColumn::create(std::move(array), std::move(nl_col));
}
// Prepare arguments for FindAndConsumeN (only needed when group > 0)
std::unique_ptr<re2::StringPiece[]> find;
std::unique_ptr<const RE2::Arg*[]> args;
std::unique_ptr<RE2::Arg[]> argv;
re2::StringPiece find[group];
const RE2::Arg* args[group];
RE2::Arg argv[group];
if (group > 0) {
find = std::make_unique<re2::StringPiece[]>(group);
args = std::make_unique<const RE2::Arg*[]>(group);
argv = std::make_unique<RE2::Arg[]>(group);
for (size_t i = 0; i < group; i++) {
argv[i] = &find[i];
args[i] = &argv[i];
}
for (size_t i = 0; i < group; i++) {
argv[i] = &find[i];
args[i] = &argv[i];
}
// focuses only on iteration and offset management
for (int row = 0; row < size; ++row) {
if (!content_viewer.is_null(row)) {
extract_regex_matches(content_viewer.value(row), *const_re, group, str_col.get(), index, find, args,
max_matches);
if (content_viewer.is_null(row)) {
offset_col->append(index);
continue;
}
auto str_value = content_viewer.value(row);
re2::StringPiece str_sp(str_value.get_data(), str_value.get_size());
while (re2::RE2::FindAndConsumeN(&str_sp, *const_re, args, group)) {
str_col->append(Slice(find[group - 1].data(), find[group - 1].size()));
index += 1;
}
offset_col->append(index);
}

View File

@ -3897,39 +3897,6 @@ StatusOr<ColumnPtr> TimeFunctions::time_format(FunctionContext* context, const s
return builder.build(ColumnHelper::is_all_const(columns));
}
constexpr static const int64_t MAX_TIME = 3023999L;
static int64_t from_seconds_with_limit(int64_t time) {
if (time > MAX_TIME) {
return MAX_TIME;
}
if (time < -MAX_TIME) {
return -MAX_TIME;
}
return time;
}
StatusOr<ColumnPtr> TimeFunctions::sec_to_time(FunctionContext* context, const starrocks::Columns& columns) {
const auto& bigint_column = columns[0];
RETURN_IF_COLUMNS_ONLY_NULL(columns);
auto bigint_viewer = ColumnViewer<TYPE_BIGINT>(bigint_column);
const size_t size = bigint_column->size();
auto builder = ColumnBuilder<TYPE_TIME>(size);
for (size_t i = 0; i < size; ++i) {
if (bigint_viewer.is_null(i)) {
builder.append_null();
continue;
}
auto time = static_cast<double>(from_seconds_with_limit(bigint_viewer.value(i)));
builder.append(time);
}
return builder.build(ColumnHelper::is_all_const(columns));
}
} // namespace starrocks
#include "gen_cpp/opcode/TimeFunctions.inc"

View File

@ -766,14 +766,6 @@ public:
*/
DEFINE_VECTORIZED_FN(time_to_sec);
/**
* return time
* @param: [int]
* @paramType columns: [BinaryColumn]
* @return Int64Column
*/
DEFINE_VECTORIZED_FN(sec_to_time);
/**
* Returns the date of the first specified DOW (day of week) that occurs after the input date.
* @param: [timestamp, dow]

View File

@ -147,7 +147,7 @@ public:
auto& src_null_data = src_nullable_column->null_column()->get_data();
auto& dst_null_data = dst_nullable_column->null_column()->get_data();
size_t size = dst_null_data.size();
size_t size = src_column->size();
memcpy(dst_null_data.data(), src_null_data.data(), size);
convert_int_to_int<SourceType, DestType>(src_data.data(), dst_data.data(), size);
dst_nullable_column->set_has_null(src_nullable_column->has_null());

View File

@ -20,7 +20,7 @@
#include <string>
#include <vector>
#include "cache/disk_cache/block_cache.h"
#include "cache/block_cache/block_cache.h"
#include "column/vectorized_fwd.h"
#include "common/status.h"
#include "common/statusor.h"

View File

@ -470,7 +470,7 @@ StatusOr<FileMetaDataPtr> FileMetaDataParser::get_file_metadata() {
RETURN_IF_ERROR(_parse_footer(&file_metadata, &file_metadata_size));
if (file_metadata_size > 0) {
auto deleter = [](const starrocks::CacheKey& key, void* value) { delete (FileMetaDataPtr*)value; };
MemCacheWriteOptions options;
ObjectCacheWriteOptions options;
options.evict_probability = _datacache_options->datacache_evict_probability;
auto capture = std::make_unique<FileMetaDataPtr>(file_metadata);
Status st = _cache->insert(metacache_key, (void*)(capture.get()), file_metadata_size, deleter, options,

Some files were not shown because too many files have changed in this diff Show More