Compare commits
1 Commits
main
...
creatstar-
| Author | SHA1 | Date |
|---|---|---|
|
|
7b664baabb |
|
|
@ -33,7 +33,6 @@ fe_plugins/output
|
|||
fe/mocked
|
||||
fe/ut_ports
|
||||
fe/*/target
|
||||
fe/*/bin
|
||||
fe/plugin/*/target/
|
||||
fe/fe-core/gen
|
||||
fe/fe-grammar/gen
|
||||
|
|
@ -98,15 +97,3 @@ CMakeLists.txt
|
|||
.claude
|
||||
CLAUDE.md
|
||||
.clangd
|
||||
|
||||
|
||||
!build-mac/CMakeLists.txt
|
||||
# build-mac generated files
|
||||
build-mac/.ninja_deps
|
||||
build-mac/.ninja_lock
|
||||
build-mac/.ninja_log
|
||||
build-mac/CMakeCache.txt
|
||||
build-mac/CMakeFiles/
|
||||
build-mac/build.ninja
|
||||
build-mac/build_version.cc
|
||||
build-mac/cmake_install.cmake
|
||||
|
|
|
|||
|
|
@ -139,7 +139,6 @@ This project is used by the following companies. Learn more about their use case
|
|||
- [Fanatics](https://www.youtube.com/watch?v=hbXovqR6tOc)
|
||||
- [Fresha](https://medium.com/fresha-data-engineering/how-we-accidentally-became-one-of-uks-first-starrocks-production-pioneers-7db249f10010)
|
||||
- [Grab](https://engineering.grab.com/building-a-spark-observability)
|
||||
- [Haezoom](https://www.starrocks.io/blog/haezoom-and-cloudshift-overcome-apache-druids-limits-with-starrocks)
|
||||
- [HerdWatch](https://medium.com/p/a7916a7e87bf)
|
||||
- [Intuit](https://www.youtube.com/watch?v=tUC3FS3ki10)
|
||||
- [iQiyi](https://medium.com/starrocks-engineering/real-time-analytics-at-scale-why-we-use-starrocks-0aa3c859cbeb)
|
||||
|
|
|
|||
|
|
@ -650,7 +650,6 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
|||
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Werror")
|
||||
endif()
|
||||
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-parameter -Wno-documentation -Wno-weak-vtables")
|
||||
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unknown-warning-option")
|
||||
# Turn on following warning as error explicitly
|
||||
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Werror=string-plus-int")
|
||||
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Werror=pessimizing-move")
|
||||
|
|
@ -671,10 +670,6 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
|||
# ignore warning from apache-orc
|
||||
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-switch-default")
|
||||
endif ()
|
||||
# Add -rtlib=compiler-rt for ARM architecture to fix LLVM bug: https://bugs.llvm.org/show_bug.cgi?id=16404
|
||||
if ("${CMAKE_BUILD_TARGET_ARCH}" STREQUAL "aarch64")
|
||||
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -rtlib=compiler-rt")
|
||||
endif()
|
||||
else ()
|
||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "14.0.0")
|
||||
# ignore error from apache-orc
|
||||
|
|
|
|||
|
|
@ -848,22 +848,15 @@ void* ReportDataCacheMetricsTaskWorkerPool::_worker_thread_callback(void* arg_th
|
|||
request.__set_report_version(g_report_version.load(std::memory_order_relaxed));
|
||||
|
||||
TDataCacheMetrics t_metrics{};
|
||||
const LocalDiskCacheEngine* disk_cache = DataCache::GetInstance()->local_disk_cache();
|
||||
const LocalMemCacheEngine* mem_cache = DataCache::GetInstance()->local_mem_cache();
|
||||
bool disk_cache_inited = disk_cache != nullptr && disk_cache->is_initialized();
|
||||
bool mem_cache_inited = mem_cache != nullptr && mem_cache->is_initialized();
|
||||
|
||||
if (!disk_cache_inited && !mem_cache_inited) {
|
||||
t_metrics.__set_status(TDataCacheStatus::DISABLED);
|
||||
// TODO: mem_metrics + disk_metrics
|
||||
const LocalCacheEngine* cache = DataCache::GetInstance()->local_disk_cache();
|
||||
if (cache != nullptr && cache->is_initialized()) {
|
||||
const auto metrics = cache->cache_metrics();
|
||||
DataCacheUtils::set_metrics_from_thrift(t_metrics, metrics);
|
||||
} else {
|
||||
if (mem_cache_inited) {
|
||||
t_metrics.__set_status(TDataCacheStatus::NORMAL);
|
||||
DataCacheUtils::set_metrics_to_thrift(t_metrics, mem_cache->cache_metrics());
|
||||
}
|
||||
if (disk_cache_inited) {
|
||||
DataCacheUtils::set_metrics_to_thrift(t_metrics, disk_cache->cache_metrics());
|
||||
}
|
||||
t_metrics.__set_status(TDataCacheStatus::DISABLED);
|
||||
}
|
||||
|
||||
request.__set_datacache_metrics(t_metrics);
|
||||
|
||||
TMasterResult result;
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@
|
|||
#include <filesystem>
|
||||
#include <memory>
|
||||
|
||||
#include "cache/disk_cache/starcache_engine.h"
|
||||
#include "cache/starcache_engine.h"
|
||||
#include "starcache/common/types.h"
|
||||
#include "util/logging.h"
|
||||
#include "util/random.h"
|
||||
|
|
|
|||
|
|
@ -18,9 +18,9 @@
|
|||
#include <random>
|
||||
|
||||
#include "cache/cache_options.h"
|
||||
#include "cache/disk_cache/starcache_engine.h"
|
||||
#include "cache/mem_cache/lrucache_engine.h"
|
||||
#include "cache/mem_cache/page_cache.h"
|
||||
#include "cache/lrucache_engine.h"
|
||||
#include "cache/object_cache/page_cache.h"
|
||||
#include "cache/starcache_engine.h"
|
||||
#include "common/config.h"
|
||||
#include "runtime/current_thread.h"
|
||||
#include "runtime/exec_env.h"
|
||||
|
|
@ -105,13 +105,14 @@ std::string ObjectCacheBench::get_cache_type_str(CacheType type) {
|
|||
}
|
||||
|
||||
void ObjectCacheBench::init_cache(CacheType cache_type) {
|
||||
DiskCacheOptions opt;
|
||||
CacheOptions opt;
|
||||
opt.mem_space_size = _capacity;
|
||||
opt.block_size = config::datacache_block_size;
|
||||
opt.max_flying_memory_mb = config::datacache_max_flying_memory_mb;
|
||||
opt.max_concurrent_inserts = config::datacache_max_concurrent_inserts;
|
||||
opt.enable_checksum = config::datacache_checksum_enable;
|
||||
opt.enable_direct_io = config::datacache_direct_io_enable;
|
||||
opt.enable_tiered_cache = config::datacache_tiered_cache_enable;
|
||||
opt.skip_read_factor = config::datacache_skip_read_factor;
|
||||
opt.scheduler_threads_per_cpu = config::datacache_scheduler_threads_per_cpu;
|
||||
opt.enable_datacache_persistence = false;
|
||||
|
|
@ -145,7 +146,7 @@ void ObjectCacheBench::prepare_sequence_data(StoragePageCache* cache, int64_t co
|
|||
auto* ptr = new std::vector<uint8_t>(_page_size);
|
||||
(*ptr)[0] = 1;
|
||||
PageCacheHandle handle;
|
||||
MemCacheWriteOptions options;
|
||||
ObjectCacheWriteOptions options;
|
||||
Status st = cache->insert(key, ptr, options, &handle);
|
||||
if (!st.ok()) {
|
||||
if (!st.is_already_exist()) {
|
||||
|
|
@ -161,7 +162,7 @@ void ObjectCacheBench::prepare_data(StoragePageCache* cache, int64_t count) {
|
|||
auto* ptr = new std::vector<uint8_t>(_page_size);
|
||||
(*ptr)[0] = 1;
|
||||
PageCacheHandle handle;
|
||||
MemCacheWriteOptions options;
|
||||
ObjectCacheWriteOptions options;
|
||||
Status st = cache->insert(key, ptr, options, &handle);
|
||||
if (!st.ok()) {
|
||||
if (!st.is_already_exist()) {
|
||||
|
|
@ -210,7 +211,7 @@ void ObjectCacheBench::random_insert_multi_threads(benchmark::State* state, Stor
|
|||
auto* ptr = new std::vector<uint8_t>(page_size);
|
||||
(*ptr)[0] = 1;
|
||||
PageCacheHandle handle;
|
||||
MemCacheWriteOptions options;
|
||||
ObjectCacheWriteOptions options;
|
||||
Status st = cache->insert(key, ptr, options, &handle);
|
||||
if (!st.ok()) {
|
||||
if (!st.is_already_exist()) {
|
||||
|
|
|
|||
|
|
@ -23,15 +23,15 @@ set(CACHE_FILES
|
|||
mem_space_monitor.cpp
|
||||
datacache.cpp
|
||||
datacache_utils.cpp
|
||||
mem_cache/lrucache_engine.cpp
|
||||
mem_cache/page_cache.cpp
|
||||
disk_cache/block_cache.cpp
|
||||
disk_cache/io_buffer.cpp
|
||||
disk_cache/block_cache_hit_rate_counter.hpp
|
||||
lrucache_engine.cpp
|
||||
block_cache/block_cache.cpp
|
||||
block_cache/io_buffer.cpp
|
||||
block_cache/block_cache_hit_rate_counter.hpp
|
||||
object_cache/page_cache.cpp
|
||||
)
|
||||
|
||||
if (${WITH_STARCACHE} STREQUAL "ON")
|
||||
list(APPEND CACHE_FILES disk_cache/starcache_engine.cpp)
|
||||
list(APPEND CACHE_FILES starcache_engine.cpp)
|
||||
list(APPEND CACHE_FILES peer_cache_engine.cpp)
|
||||
endif()
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "cache/disk_cache/block_cache.h"
|
||||
#include "cache/block_cache/block_cache.h"
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
|
|
@ -36,7 +36,7 @@ BlockCache::~BlockCache() {
|
|||
(void)shutdown();
|
||||
}
|
||||
|
||||
Status BlockCache::init(const BlockCacheOptions& options, std::shared_ptr<LocalDiskCacheEngine> local_cache,
|
||||
Status BlockCache::init(const BlockCacheOptions& options, std::shared_ptr<LocalCacheEngine> local_cache,
|
||||
std::shared_ptr<RemoteCacheEngine> remote_cache) {
|
||||
_block_size = std::min(options.block_size, MAX_BLOCK_SIZE);
|
||||
_local_cache = std::move(local_cache);
|
||||
|
|
@ -45,8 +45,7 @@ Status BlockCache::init(const BlockCacheOptions& options, std::shared_ptr<LocalD
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status BlockCache::write(const CacheKey& cache_key, off_t offset, const IOBuffer& buffer,
|
||||
DiskCacheWriteOptions* options) {
|
||||
Status BlockCache::write(const CacheKey& cache_key, off_t offset, const IOBuffer& buffer, WriteCacheOptions* options) {
|
||||
if (offset % _block_size != 0) {
|
||||
LOG(WARNING) << "write block key: " << cache_key << " with invalid args, offset: " << offset;
|
||||
return Status::InvalidArgument(strings::Substitute("offset must be aligned by block size $0", _block_size));
|
||||
|
|
@ -63,7 +62,7 @@ Status BlockCache::write(const CacheKey& cache_key, off_t offset, const IOBuffer
|
|||
static void empty_deleter(void*) {}
|
||||
|
||||
Status BlockCache::write(const CacheKey& cache_key, off_t offset, size_t size, const char* data,
|
||||
DiskCacheWriteOptions* options) {
|
||||
WriteCacheOptions* options) {
|
||||
if (!data) {
|
||||
return Status::InvalidArgument("invalid data buffer");
|
||||
}
|
||||
|
|
@ -74,7 +73,7 @@ Status BlockCache::write(const CacheKey& cache_key, off_t offset, size_t size, c
|
|||
}
|
||||
|
||||
Status BlockCache::read(const CacheKey& cache_key, off_t offset, size_t size, IOBuffer* buffer,
|
||||
DiskCacheReadOptions* options) {
|
||||
ReadCacheOptions* options) {
|
||||
if (size == 0) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
@ -85,7 +84,7 @@ Status BlockCache::read(const CacheKey& cache_key, off_t offset, size_t size, IO
|
|||
}
|
||||
|
||||
StatusOr<size_t> BlockCache::read(const CacheKey& cache_key, off_t offset, size_t size, char* data,
|
||||
DiskCacheReadOptions* options) {
|
||||
ReadCacheOptions* options) {
|
||||
IOBuffer buffer;
|
||||
RETURN_IF_ERROR(read(cache_key, offset, size, &buffer, options));
|
||||
buffer.copy_to(data);
|
||||
|
|
@ -118,7 +117,7 @@ Status BlockCache::remove(const CacheKey& cache_key, off_t offset, size_t size)
|
|||
}
|
||||
|
||||
Status BlockCache::read_buffer_from_remote_cache(const std::string& cache_key, size_t offset, size_t size,
|
||||
IOBuffer* buffer, DiskCacheReadOptions* options) {
|
||||
IOBuffer* buffer, ReadCacheOptions* options) {
|
||||
if (size == 0) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
@ -16,16 +16,12 @@
|
|||
|
||||
#include <atomic>
|
||||
|
||||
#include "cache/disk_cache/local_disk_cache_engine.h"
|
||||
#include "cache/local_cache_engine.h"
|
||||
#include "cache/remote_cache_engine.h"
|
||||
#include "common/status.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
struct BlockCacheOptions {
|
||||
size_t block_size = 0;
|
||||
};
|
||||
|
||||
class BlockCache {
|
||||
public:
|
||||
using CacheKey = std::string;
|
||||
|
|
@ -37,23 +33,22 @@ public:
|
|||
~BlockCache();
|
||||
|
||||
// Init the block cache instance
|
||||
Status init(const BlockCacheOptions& options, std::shared_ptr<LocalDiskCacheEngine> local_cache,
|
||||
Status init(const BlockCacheOptions& options, std::shared_ptr<LocalCacheEngine> local_cache,
|
||||
std::shared_ptr<RemoteCacheEngine> remote_cache);
|
||||
|
||||
// Write data buffer to cache, the `offset` must be aligned by block size
|
||||
Status write(const CacheKey& cache_key, off_t offset, const IOBuffer& buffer,
|
||||
DiskCacheWriteOptions* options = nullptr);
|
||||
Status write(const CacheKey& cache_key, off_t offset, const IOBuffer& buffer, WriteCacheOptions* options = nullptr);
|
||||
|
||||
Status write(const CacheKey& cache_key, off_t offset, size_t size, const char* data,
|
||||
DiskCacheWriteOptions* options = nullptr);
|
||||
WriteCacheOptions* options = nullptr);
|
||||
|
||||
// Read data from cache, it returns the data size if successful; otherwise the error status
|
||||
// will be returned. The offset and size must be aligned by block size.
|
||||
Status read(const CacheKey& cache_key, off_t offset, size_t size, IOBuffer* buffer,
|
||||
DiskCacheReadOptions* options = nullptr);
|
||||
ReadCacheOptions* options = nullptr);
|
||||
|
||||
StatusOr<size_t> read(const CacheKey& cache_key, off_t offset, size_t size, char* data,
|
||||
DiskCacheReadOptions* options = nullptr);
|
||||
ReadCacheOptions* options = nullptr);
|
||||
|
||||
bool exist(const CacheKey& cache_key, off_t offset, size_t size) const;
|
||||
|
||||
|
|
@ -62,7 +57,7 @@ public:
|
|||
|
||||
// Read data from remote cache
|
||||
Status read_buffer_from_remote_cache(const std::string& cache_key, size_t offset, size_t size, IOBuffer* buffer,
|
||||
DiskCacheReadOptions* options);
|
||||
ReadCacheOptions* options);
|
||||
|
||||
void record_read_local_cache(size_t size, int64_t latency_us);
|
||||
|
||||
|
|
@ -78,14 +73,15 @@ public:
|
|||
bool is_initialized() const { return _initialized.load(std::memory_order_relaxed); }
|
||||
|
||||
bool available() const { return is_initialized() && _local_cache->available(); }
|
||||
bool mem_cache_available() const { return is_initialized() && _local_cache->mem_cache_available(); }
|
||||
|
||||
std::shared_ptr<LocalDiskCacheEngine> local_cache() { return _local_cache; }
|
||||
std::shared_ptr<LocalCacheEngine> local_cache() { return _local_cache; }
|
||||
|
||||
static const size_t MAX_BLOCK_SIZE;
|
||||
|
||||
private:
|
||||
size_t _block_size = 0;
|
||||
std::shared_ptr<LocalDiskCacheEngine> _local_cache;
|
||||
std::shared_ptr<LocalCacheEngine> _local_cache;
|
||||
std::shared_ptr<RemoteCacheEngine> _remote_cache;
|
||||
std::atomic<bool> _initialized = false;
|
||||
};
|
||||
|
|
@ -12,7 +12,7 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "cache/disk_cache/io_buffer.h"
|
||||
#include "cache/block_cache/io_buffer.h"
|
||||
|
||||
#include "gutil/strings/fastmem.h"
|
||||
|
||||
|
|
@ -55,4 +55,19 @@ struct DataCacheStatusUtils {
|
|||
}
|
||||
};
|
||||
|
||||
struct DataCacheMetrics {
|
||||
DataCacheStatus status;
|
||||
|
||||
size_t mem_quota_bytes;
|
||||
size_t mem_used_bytes;
|
||||
size_t disk_quota_bytes;
|
||||
size_t disk_used_bytes;
|
||||
size_t meta_used_bytes = 0;
|
||||
};
|
||||
|
||||
#ifdef WITH_STARCACHE
|
||||
using StarCacheMetrics = starcache::CacheMetrics;
|
||||
using StarCacheStatus = starcache::CacheStatus;
|
||||
#endif
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
|
|||
|
|
@ -14,8 +14,17 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "cache/cache_metrics.h"
|
||||
#include "common/status.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
// Options to control how to create DataCache instance
|
||||
struct DataCacheOptions {
|
||||
bool enable_datacache = false;
|
||||
bool enable_cache_select = false;
|
||||
|
|
@ -28,4 +37,82 @@ struct DataCacheOptions {
|
|||
int64_t datacache_ttl_seconds = 0;
|
||||
};
|
||||
|
||||
struct DirSpace {
|
||||
std::string path;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
struct RemoteCacheOptions {
|
||||
double skip_read_factor = 0;
|
||||
};
|
||||
|
||||
struct MemCacheOptions {
|
||||
size_t mem_space_size = 0;
|
||||
};
|
||||
|
||||
struct DiskCacheOptions {
|
||||
// basic
|
||||
size_t mem_space_size = 0;
|
||||
std::vector<DirSpace> dir_spaces;
|
||||
std::string meta_path;
|
||||
|
||||
// advanced
|
||||
size_t block_size = 0;
|
||||
bool enable_checksum = false;
|
||||
bool enable_direct_io = false;
|
||||
bool enable_tiered_cache = true;
|
||||
bool enable_datacache_persistence = false;
|
||||
size_t max_concurrent_inserts = 0;
|
||||
size_t max_flying_memory_mb = 0;
|
||||
double scheduler_threads_per_cpu = 0;
|
||||
double skip_read_factor = 0;
|
||||
uint32_t inline_item_count_limit = 0;
|
||||
std::string eviction_policy;
|
||||
};
|
||||
|
||||
struct BlockCacheOptions {
|
||||
size_t block_size = 0;
|
||||
};
|
||||
|
||||
struct WriteCacheOptions {
|
||||
int8_t priority = 0;
|
||||
// If ttl_seconds=0 (default), no ttl restriction will be set. If an old one exists, remove it.
|
||||
uint64_t ttl_seconds = 0;
|
||||
// If overwrite=true, the cache value will be replaced if it already exists.
|
||||
bool overwrite = false;
|
||||
bool async = false;
|
||||
// When allow_zero_copy=true, it means the caller can ensure the target buffer not be released before
|
||||
// to write finish. So the cache library can use the buffer directly without copying it to another buffer.
|
||||
bool allow_zero_copy = false;
|
||||
std::function<void(int, const std::string&)> callback = nullptr;
|
||||
|
||||
// The probability to evict other items if the cache space is full, which can help avoid frequent cache replacement
|
||||
// and improve cache hit rate sometimes.
|
||||
// It is expressed as a percentage. If evict_probability is 10, it means the probability to evict other data is 10%.
|
||||
int32_t evict_probability = 100;
|
||||
|
||||
// The base frequency for target cache.
|
||||
// When using multiple segment lru, a higher frequency may cause the cache is written to warm segment directly.
|
||||
// For the default cache options, that `lru_segment_freq_bits` is 0:
|
||||
// * The default `frequency=0` indicates the cache will be written to cold segment.
|
||||
// * A frequency value greater than 0 indicates writing this cache directly to the warm segment.
|
||||
int8_t frequency = 0;
|
||||
|
||||
struct Stats {
|
||||
int64_t write_mem_bytes = 0;
|
||||
int64_t write_disk_bytes = 0;
|
||||
} stats;
|
||||
};
|
||||
|
||||
struct ReadCacheOptions {
|
||||
bool use_adaptor = false;
|
||||
std::string remote_host;
|
||||
int32_t remote_port;
|
||||
|
||||
struct Stats {
|
||||
int64_t read_mem_bytes = 0;
|
||||
int64_t read_disk_bytes = 0;
|
||||
} stats;
|
||||
};
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
|
|||
|
|
@ -16,9 +16,9 @@
|
|||
|
||||
#include "cache/datacache_utils.h"
|
||||
#include "cache/disk_space_monitor.h"
|
||||
#include "cache/mem_cache/lrucache_engine.h"
|
||||
#include "cache/mem_cache/page_cache.h"
|
||||
#include "cache/lrucache_engine.h"
|
||||
#include "cache/mem_space_monitor.h"
|
||||
#include "cache/object_cache/page_cache.h"
|
||||
#include "common/status.h"
|
||||
#include "gutil/strings/split.h"
|
||||
#include "gutil/strings/strip.h"
|
||||
|
|
@ -27,8 +27,8 @@
|
|||
#include "util/parse_util.h"
|
||||
|
||||
#ifdef WITH_STARCACHE
|
||||
#include "cache/disk_cache/starcache_engine.h"
|
||||
#include "cache/peer_cache_engine.h"
|
||||
#include "cache/starcache_engine.h"
|
||||
#endif
|
||||
|
||||
namespace starrocks {
|
||||
|
|
@ -44,6 +44,11 @@ Status DataCache::init(const std::vector<StorePath>& store_paths) {
|
|||
_block_cache = std::make_shared<BlockCache>();
|
||||
_page_cache = std::make_shared<StoragePageCache>();
|
||||
|
||||
#if defined(WITH_STARCACHE)
|
||||
_local_disk_cache_engine = "starcache";
|
||||
#endif
|
||||
_local_mem_cache_engine = "lrucache";
|
||||
|
||||
if (!config::datacache_enable) {
|
||||
config::disable_storage_page_cache = true;
|
||||
config::block_cache_enable = false;
|
||||
|
|
@ -168,88 +173,89 @@ BlockCacheOptions DataCache::_init_block_cache_options() {
|
|||
return cache_options;
|
||||
}
|
||||
|
||||
#if defined(WITH_STARCACHE)
|
||||
StatusOr<DiskCacheOptions> DataCache::_init_disk_cache_options() {
|
||||
DiskCacheOptions cache_options;
|
||||
|
||||
if (_local_disk_cache_engine == "starcache") {
|
||||
#ifdef USE_STAROS
|
||||
std::vector<string> corresponding_starlet_dirs;
|
||||
if (config::datacache_unified_instance_enable && !config::starlet_cache_dir.empty()) {
|
||||
// in older versions, users might set `starlet_cache_dir` instead of `storage_root_path` for starlet cache,
|
||||
// we need to move starlet cache into storage_root_path/datacache
|
||||
auto s = DataCacheUtils::get_corresponding_starlet_cache_dir(_store_paths, config::starlet_cache_dir);
|
||||
if (!s.ok()) {
|
||||
LOG(WARNING) << s.status().message() << ", change config::datacache_unified_instance_enable to false";
|
||||
config::datacache_unified_instance_enable = false;
|
||||
} else {
|
||||
corresponding_starlet_dirs = *s;
|
||||
}
|
||||
}
|
||||
int idx = 0;
|
||||
#endif
|
||||
|
||||
for (auto& root_path : _store_paths) {
|
||||
// Because we have unified the datacache between datalake and starlet, we also need to unify the
|
||||
// cache path and quota.
|
||||
// To reuse the old cache data in `starlet_cache` directory, we try to rename it to the new `datacache`
|
||||
// directory if it exists. To avoid the risk of cross disk renaming of a large amount of cached data,
|
||||
// we do not automatically rename it when the source and destination directories are on different disks.
|
||||
// In this case, users should manually remount the directories and restart them.
|
||||
std::string datacache_path = root_path.path + "/datacache";
|
||||
#ifdef USE_STAROS
|
||||
if (config::datacache_unified_instance_enable) {
|
||||
std::string starlet_cache_path;
|
||||
if (idx < corresponding_starlet_dirs.size()) {
|
||||
starlet_cache_path = corresponding_starlet_dirs[idx++];
|
||||
std::vector<string> corresponding_starlet_dirs;
|
||||
if (config::datacache_unified_instance_enable && !config::starlet_cache_dir.empty()) {
|
||||
// in older versions, users might set `starlet_cache_dir` instead of `storage_root_path` for starlet cache,
|
||||
// we need to move starlet cache into storage_root_path/datacache
|
||||
auto s = DataCacheUtils::get_corresponding_starlet_cache_dir(_store_paths, config::starlet_cache_dir);
|
||||
if (!s.ok()) {
|
||||
LOG(WARNING) << s.status().message() << ", change config::datacache_unified_instance_enable to false";
|
||||
config::datacache_unified_instance_enable = false;
|
||||
} else {
|
||||
starlet_cache_path = root_path.path + "/starlet_cache/star_cache";
|
||||
corresponding_starlet_dirs = *s;
|
||||
}
|
||||
RETURN_IF_ERROR(DataCacheUtils::change_disk_path(starlet_cache_path, datacache_path));
|
||||
}
|
||||
int idx = 0;
|
||||
#endif
|
||||
// Create it if not exist
|
||||
Status st = FileSystem::Default()->create_dir_if_missing(datacache_path);
|
||||
if (!st.ok()) {
|
||||
LOG(ERROR) << "Fail to create datacache directory: " << datacache_path << ", reason: " << st.message();
|
||||
return Status::InternalError("Fail to create datacache directory");
|
||||
}
|
||||
|
||||
ASSIGN_OR_RETURN(int64_t disk_size, DataCacheUtils::parse_conf_datacache_disk_size(
|
||||
datacache_path, config::datacache_disk_size, -1));
|
||||
for (auto& root_path : _store_paths) {
|
||||
// Because we have unified the datacache between datalake and starlet, we also need to unify the
|
||||
// cache path and quota.
|
||||
// To reuse the old cache data in `starlet_cache` directory, we try to rename it to the new `datacache`
|
||||
// directory if it exists. To avoid the risk of cross disk renaming of a large amount of cached data,
|
||||
// we do not automatically rename it when the source and destination directories are on different disks.
|
||||
// In this case, users should manually remount the directories and restart them.
|
||||
std::string datacache_path = root_path.path + "/datacache";
|
||||
#ifdef USE_STAROS
|
||||
// If the `datacache_disk_size` is manually set a positive value, we will use the maximum cache quota between
|
||||
// dataleke and starlet cache as the quota of the unified cache. Otherwise, the cache quota will remain zero
|
||||
// and then automatically adjusted based on the current avalible disk space.
|
||||
if (config::datacache_unified_instance_enable &&
|
||||
(!config::enable_datacache_disk_auto_adjust || disk_size > 0)) {
|
||||
ASSIGN_OR_RETURN(
|
||||
int64_t starlet_cache_size,
|
||||
DataCacheUtils::parse_conf_datacache_disk_size(
|
||||
datacache_path, fmt::format("{}%", config::starlet_star_cache_disk_size_percent), -1));
|
||||
disk_size = std::max(disk_size, starlet_cache_size);
|
||||
}
|
||||
if (config::datacache_unified_instance_enable) {
|
||||
std::string starlet_cache_path;
|
||||
if (idx < corresponding_starlet_dirs.size()) {
|
||||
starlet_cache_path = corresponding_starlet_dirs[idx++];
|
||||
} else {
|
||||
starlet_cache_path = root_path.path + "/starlet_cache/star_cache";
|
||||
}
|
||||
RETURN_IF_ERROR(DataCacheUtils::change_disk_path(starlet_cache_path, datacache_path));
|
||||
}
|
||||
#endif
|
||||
cache_options.dir_spaces.push_back({.path = datacache_path, .size = static_cast<size_t>(disk_size)});
|
||||
}
|
||||
// Create it if not exist
|
||||
Status st = FileSystem::Default()->create_dir_if_missing(datacache_path);
|
||||
if (!st.ok()) {
|
||||
LOG(ERROR) << "Fail to create datacache directory: " << datacache_path << ", reason: " << st.message();
|
||||
return Status::InternalError("Fail to create datacache directory");
|
||||
}
|
||||
|
||||
if (cache_options.dir_spaces.empty()) {
|
||||
config::enable_datacache_disk_auto_adjust = false;
|
||||
}
|
||||
ASSIGN_OR_RETURN(int64_t disk_size, DataCacheUtils::parse_conf_datacache_disk_size(
|
||||
datacache_path, config::datacache_disk_size, -1));
|
||||
#ifdef USE_STAROS
|
||||
// If the `datacache_disk_size` is manually set a positive value, we will use the maximum cache quota between
|
||||
// dataleke and starlet cache as the quota of the unified cache. Otherwise, the cache quota will remain zero
|
||||
// and then automatically adjusted based on the current avalible disk space.
|
||||
if (config::datacache_unified_instance_enable &&
|
||||
(!config::enable_datacache_disk_auto_adjust || disk_size > 0)) {
|
||||
ASSIGN_OR_RETURN(
|
||||
int64_t starlet_cache_size,
|
||||
DataCacheUtils::parse_conf_datacache_disk_size(
|
||||
datacache_path, fmt::format("{}%", config::starlet_star_cache_disk_size_percent), -1));
|
||||
disk_size = std::max(disk_size, starlet_cache_size);
|
||||
}
|
||||
#endif
|
||||
cache_options.dir_spaces.push_back({.path = datacache_path, .size = static_cast<size_t>(disk_size)});
|
||||
}
|
||||
|
||||
cache_options.block_size = config::datacache_block_size;
|
||||
cache_options.max_flying_memory_mb = config::datacache_max_flying_memory_mb;
|
||||
cache_options.max_concurrent_inserts = config::datacache_max_concurrent_inserts;
|
||||
cache_options.enable_checksum = config::datacache_checksum_enable;
|
||||
cache_options.enable_direct_io = config::datacache_direct_io_enable;
|
||||
cache_options.skip_read_factor = config::datacache_skip_read_factor;
|
||||
cache_options.scheduler_threads_per_cpu = config::datacache_scheduler_threads_per_cpu;
|
||||
cache_options.enable_datacache_persistence = config::datacache_persistence_enable;
|
||||
cache_options.inline_item_count_limit = config::datacache_inline_item_count_limit;
|
||||
cache_options.eviction_policy = config::datacache_eviction_policy;
|
||||
if (cache_options.dir_spaces.empty()) {
|
||||
config::enable_datacache_disk_auto_adjust = false;
|
||||
}
|
||||
|
||||
cache_options.block_size = config::datacache_block_size;
|
||||
cache_options.max_flying_memory_mb = config::datacache_max_flying_memory_mb;
|
||||
cache_options.max_concurrent_inserts = config::datacache_max_concurrent_inserts;
|
||||
cache_options.enable_checksum = config::datacache_checksum_enable;
|
||||
cache_options.enable_direct_io = config::datacache_direct_io_enable;
|
||||
cache_options.enable_tiered_cache = config::datacache_tiered_cache_enable;
|
||||
cache_options.skip_read_factor = config::datacache_skip_read_factor;
|
||||
cache_options.scheduler_threads_per_cpu = config::datacache_scheduler_threads_per_cpu;
|
||||
cache_options.enable_datacache_persistence = config::datacache_persistence_enable;
|
||||
cache_options.inline_item_count_limit = config::datacache_inline_item_count_limit;
|
||||
cache_options.eviction_policy = config::datacache_eviction_policy;
|
||||
}
|
||||
|
||||
return cache_options;
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool parse_resource_str(const string& str, string* value) {
|
||||
if (!str.empty()) {
|
||||
|
|
@ -283,7 +289,7 @@ void DataCache::try_release_resource_before_core_dump() {
|
|||
};
|
||||
|
||||
if (_local_mem_cache != nullptr && need_release("data_cache")) {
|
||||
(void)_local_mem_cache->update_mem_quota(0);
|
||||
(void)_local_mem_cache->update_mem_quota(0, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,9 +14,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "cache/disk_cache/block_cache.h"
|
||||
#include "cache/disk_cache/local_disk_cache_engine.h"
|
||||
#include "cache/mem_cache/local_mem_cache_engine.h"
|
||||
#include "cache/block_cache/block_cache.h"
|
||||
#include "cache/local_cache_engine.h"
|
||||
#include "common/status.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
|
@ -40,13 +39,16 @@ public:
|
|||
|
||||
void try_release_resource_before_core_dump();
|
||||
|
||||
void set_local_disk_cache(std::shared_ptr<LocalDiskCacheEngine> local_disk_cache) {
|
||||
void set_local_mem_cache(std::shared_ptr<LocalCacheEngine> local_mem_cache) {
|
||||
_local_mem_cache = std::move(local_mem_cache);
|
||||
}
|
||||
void set_local_disk_cache(std::shared_ptr<LocalCacheEngine> local_disk_cache) {
|
||||
_local_disk_cache = std::move(local_disk_cache);
|
||||
}
|
||||
void set_page_cache(std::shared_ptr<StoragePageCache> page_cache) { _page_cache = std::move(page_cache); }
|
||||
|
||||
LocalMemCacheEngine* local_mem_cache() { return _local_mem_cache.get(); }
|
||||
LocalDiskCacheEngine* local_disk_cache() { return _local_disk_cache.get(); }
|
||||
LocalCacheEngine* local_mem_cache() { return _local_mem_cache.get(); }
|
||||
LocalCacheEngine* local_disk_cache() { return _local_disk_cache.get(); }
|
||||
BlockCache* block_cache() const { return _block_cache.get(); }
|
||||
void set_block_cache(std::shared_ptr<BlockCache> block_cache) { _block_cache = std::move(block_cache); }
|
||||
StoragePageCache* page_cache() const { return _page_cache.get(); }
|
||||
|
|
@ -61,11 +63,11 @@ public:
|
|||
|
||||
private:
|
||||
StatusOr<MemCacheOptions> _init_mem_cache_options();
|
||||
StatusOr<DiskCacheOptions> _init_disk_cache_options();
|
||||
RemoteCacheOptions _init_remote_cache_options();
|
||||
BlockCacheOptions _init_block_cache_options();
|
||||
|
||||
#if defined(WITH_STARCACHE)
|
||||
StatusOr<DiskCacheOptions> _init_disk_cache_options();
|
||||
Status _init_starcache_engine(DiskCacheOptions* cache_options);
|
||||
Status _init_peer_cache(const RemoteCacheOptions& cache_options);
|
||||
#endif
|
||||
|
|
@ -76,8 +78,10 @@ private:
|
|||
std::vector<StorePath> _store_paths;
|
||||
|
||||
// cache engine
|
||||
std::shared_ptr<LocalMemCacheEngine> _local_mem_cache;
|
||||
std::shared_ptr<LocalDiskCacheEngine> _local_disk_cache;
|
||||
std::string _local_mem_cache_engine;
|
||||
std::string _local_disk_cache_engine;
|
||||
std::shared_ptr<LocalCacheEngine> _local_mem_cache;
|
||||
std::shared_ptr<LocalCacheEngine> _local_disk_cache;
|
||||
std::shared_ptr<RemoteCacheEngine> _remote_cache;
|
||||
|
||||
std::shared_ptr<BlockCache> _block_cache;
|
||||
|
|
|
|||
|
|
@ -21,29 +21,26 @@
|
|||
|
||||
#include "absl/status/statusor.h"
|
||||
#include "absl/strings/str_split.h"
|
||||
#include "cache/mem_cache/local_mem_cache_engine.h"
|
||||
#include "fs/fs.h"
|
||||
#include "gutil/strings/split.h"
|
||||
#include "util/parse_util.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
void DataCacheUtils::set_metrics_to_thrift(TDataCacheMetrics& t_metrics, const DataCacheDiskMetrics& metrics) {
|
||||
void DataCacheUtils::set_metrics_from_thrift(TDataCacheMetrics& t_metrics, const DataCacheMetrics& metrics) {
|
||||
t_metrics.__set_status(DataCacheStatusUtils::to_thrift(metrics.status));
|
||||
t_metrics.__set_disk_quota_bytes(metrics.disk_quota_bytes);
|
||||
t_metrics.__set_disk_used_bytes(metrics.disk_used_bytes);
|
||||
}
|
||||
|
||||
void DataCacheUtils::set_metrics_to_thrift(TDataCacheMetrics& t_metrics, const DataCacheMemMetrics& metrics) {
|
||||
t_metrics.__set_mem_quota_bytes(metrics.mem_quota_bytes);
|
||||
t_metrics.__set_mem_used_bytes(metrics.mem_used_bytes);
|
||||
}
|
||||
|
||||
#ifdef WITH_STARCACHE
|
||||
void DataCacheUtils::set_disk_metrics_to_thrift(TDataCacheMetrics& t_metrics, const StarCacheMetrics& metrics) {
|
||||
void DataCacheUtils::set_metrics_from_thrift(TDataCacheMetrics& t_metrics, const StarCacheMetrics& metrics) {
|
||||
t_metrics.__set_status(DataCacheStatusUtils::to_thrift(static_cast<DataCacheStatus>(metrics.status)));
|
||||
t_metrics.__set_disk_quota_bytes(metrics.disk_quota_bytes);
|
||||
t_metrics.__set_disk_used_bytes(metrics.disk_used_bytes);
|
||||
t_metrics.__set_mem_quota_bytes(metrics.mem_quota_bytes);
|
||||
t_metrics.__set_mem_used_bytes(metrics.mem_used_bytes);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -15,21 +15,18 @@
|
|||
#pragma once
|
||||
|
||||
#include "cache/cache_metrics.h"
|
||||
#include "cache/disk_cache/starcache_engine.h"
|
||||
#include "cache/local_cache_engine.h"
|
||||
#include "gen_cpp/DataCache_types.h"
|
||||
#include "storage/options.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
class DataCacheMemMetrics;
|
||||
|
||||
class DataCacheUtils {
|
||||
public:
|
||||
static void set_metrics_to_thrift(TDataCacheMetrics& t_metrics, const DataCacheDiskMetrics& metrics);
|
||||
static void set_metrics_to_thrift(TDataCacheMetrics& t_metrics, const DataCacheMemMetrics& metrics);
|
||||
static void set_metrics_from_thrift(TDataCacheMetrics& t_metrics, const DataCacheMetrics& metrics);
|
||||
|
||||
#ifdef WITH_STARCACHE
|
||||
static void set_disk_metrics_to_thrift(TDataCacheMetrics& t_metrics, const StarCacheMetrics& metrics);
|
||||
static void set_metrics_from_thrift(TDataCacheMetrics& t_metrics, const StarCacheMetrics& metrics);
|
||||
#endif
|
||||
|
||||
static Status parse_conf_datacache_mem_size(const std::string& conf_mem_size_str, int64_t mem_limit,
|
||||
|
|
|
|||
|
|
@ -1,137 +0,0 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cache/cache_metrics.h"
|
||||
#include "cache/disk_cache/io_buffer.h"
|
||||
#include "common/status.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
struct DirSpace {
|
||||
std::string path;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
struct DiskCacheOptions {
|
||||
// basic
|
||||
size_t mem_space_size = 0;
|
||||
std::vector<DirSpace> dir_spaces;
|
||||
std::string meta_path;
|
||||
|
||||
// advanced
|
||||
size_t block_size = 0;
|
||||
bool enable_checksum = false;
|
||||
bool enable_direct_io = false;
|
||||
bool enable_datacache_persistence = false;
|
||||
size_t max_concurrent_inserts = 0;
|
||||
size_t max_flying_memory_mb = 0;
|
||||
double scheduler_threads_per_cpu = 0;
|
||||
double skip_read_factor = 0;
|
||||
uint32_t inline_item_count_limit = 0;
|
||||
std::string eviction_policy;
|
||||
};
|
||||
|
||||
struct DiskCacheWriteOptions {
|
||||
int8_t priority = 0;
|
||||
// If ttl_seconds=0 (default), no ttl restriction will be set. If an old one exists, remove it.
|
||||
uint64_t ttl_seconds = 0;
|
||||
// If overwrite=true, the cache value will be replaced if it already exists.
|
||||
bool overwrite = false;
|
||||
bool async = false;
|
||||
// When allow_zero_copy=true, it means the caller can ensure the target buffer not be released before
|
||||
// to write finish. So the cache library can use the buffer directly without copying it to another buffer.
|
||||
bool allow_zero_copy = false;
|
||||
std::function<void(int, const std::string&)> callback = nullptr;
|
||||
|
||||
// The base frequency for target cache.
|
||||
// When using multiple segment lru, a higher frequency may cause the cache is written to warm segment directly.
|
||||
// For the default cache options, that `lru_segment_freq_bits` is 0:
|
||||
// * The default `frequency=0` indicates the cache will be written to cold segment.
|
||||
// * A frequency value greater than 0 indicates writing this cache directly to the warm segment.
|
||||
int8_t frequency = 0;
|
||||
|
||||
struct Stats {
|
||||
int64_t write_mem_bytes = 0;
|
||||
int64_t write_disk_bytes = 0;
|
||||
} stats;
|
||||
};
|
||||
|
||||
struct DiskCacheReadOptions {
|
||||
bool use_adaptor = false;
|
||||
std::string remote_host;
|
||||
int32_t remote_port;
|
||||
|
||||
struct Stats {
|
||||
int64_t read_mem_bytes = 0;
|
||||
int64_t read_disk_bytes = 0;
|
||||
} stats;
|
||||
};
|
||||
|
||||
struct DataCacheDiskMetrics {
|
||||
DataCacheStatus status;
|
||||
|
||||
size_t disk_quota_bytes;
|
||||
size_t disk_used_bytes;
|
||||
};
|
||||
|
||||
class LocalDiskCacheEngine {
|
||||
public:
|
||||
virtual ~LocalDiskCacheEngine() = default;
|
||||
|
||||
virtual bool is_initialized() const = 0;
|
||||
|
||||
// Write data to cache
|
||||
virtual Status write(const std::string& key, const IOBuffer& buffer, DiskCacheWriteOptions* options) = 0;
|
||||
|
||||
// Read data from cache, it returns the data size if successful; otherwise the error status
|
||||
// will be returned.
|
||||
virtual Status read(const std::string& key, size_t off, size_t size, IOBuffer* buffer,
|
||||
DiskCacheReadOptions* options) = 0;
|
||||
|
||||
virtual bool exist(const std::string& key) const = 0;
|
||||
|
||||
// Remove data from cache.
|
||||
virtual Status remove(const std::string& key) = 0;
|
||||
|
||||
// Update the datacache disk space information, such as disk quota or disk path.
|
||||
virtual Status update_disk_spaces(const std::vector<DirSpace>& spaces) = 0;
|
||||
|
||||
// Update the datacache inline cache count limit
|
||||
virtual Status update_inline_cache_count_limit(int32_t limit) = 0;
|
||||
|
||||
virtual const DataCacheDiskMetrics cache_metrics() const = 0;
|
||||
|
||||
virtual void record_read_remote(size_t size, int64_t latency_us) = 0;
|
||||
|
||||
virtual void record_read_cache(size_t size, int64_t latency_us) = 0;
|
||||
|
||||
virtual Status shutdown() = 0;
|
||||
|
||||
virtual bool has_disk_cache() const = 0;
|
||||
virtual bool available() const = 0;
|
||||
virtual void disk_spaces(std::vector<DirSpace>* spaces) const = 0;
|
||||
|
||||
// Get the lookup count, including cache hit count and cache miss count.
|
||||
virtual size_t lookup_count() const = 0;
|
||||
|
||||
// Get the cache hit count.
|
||||
virtual size_t hit_count() const = 0;
|
||||
|
||||
// Remove all cache entries that are not actively in use.
|
||||
virtual Status prune() = 0;
|
||||
};
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -224,10 +224,10 @@ dev_t DiskSpace::FileSystemWrapper::device_id(const std::string& path) {
|
|||
return DataCacheUtils::disk_device_id(path);
|
||||
}
|
||||
|
||||
DiskSpaceMonitor::DiskSpaceMonitor(LocalDiskCacheEngine* cache)
|
||||
DiskSpaceMonitor::DiskSpaceMonitor(LocalCacheEngine* cache)
|
||||
: _cache(cache), _fs(std::make_shared<DiskSpace::FileSystemWrapper>()) {}
|
||||
|
||||
DiskSpaceMonitor::DiskSpaceMonitor(LocalDiskCacheEngine* cache, std::shared_ptr<DiskSpace::FileSystemWrapper> fs)
|
||||
DiskSpaceMonitor::DiskSpaceMonitor(LocalCacheEngine* cache, std::shared_ptr<DiskSpace::FileSystemWrapper> fs)
|
||||
: _cache(cache), _fs(std::move(fs)) {}
|
||||
|
||||
DiskSpaceMonitor::~DiskSpaceMonitor() {
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@
|
|||
#include <unordered_map>
|
||||
|
||||
#include "cache/cache_options.h"
|
||||
#include "cache/disk_cache/local_disk_cache_engine.h"
|
||||
#include "cache/local_cache_engine.h"
|
||||
#include "common/status.h"
|
||||
#include "fs/fs.h"
|
||||
#include "util/disk_info.h"
|
||||
|
|
@ -118,8 +118,8 @@ private:
|
|||
|
||||
class DiskSpaceMonitor {
|
||||
public:
|
||||
DiskSpaceMonitor(LocalDiskCacheEngine* cache);
|
||||
DiskSpaceMonitor(LocalDiskCacheEngine* cache, std::shared_ptr<DiskSpace::FileSystemWrapper> fs);
|
||||
DiskSpaceMonitor(LocalCacheEngine* cache);
|
||||
DiskSpaceMonitor(LocalCacheEngine* cache, std::shared_ptr<DiskSpace::FileSystemWrapper> fs);
|
||||
~DiskSpaceMonitor();
|
||||
|
||||
Status init(std::vector<DirSpace>* dir_spaces);
|
||||
|
|
@ -152,7 +152,7 @@ private:
|
|||
|
||||
size_t _total_cache_usage = 0;
|
||||
size_t _total_cache_quota = 0;
|
||||
LocalDiskCacheEngine* _cache = nullptr;
|
||||
LocalCacheEngine* _cache = nullptr;
|
||||
std::shared_ptr<DiskSpace::FileSystemWrapper> _fs = nullptr;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -14,69 +14,46 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "cache/cache_metrics.h"
|
||||
#include "cache/disk_cache/io_buffer.h"
|
||||
#include "cache/block_cache/io_buffer.h"
|
||||
#include "cache/cache_options.h"
|
||||
#include "cache/object_cache/cache_types.h"
|
||||
#include "common/status.h"
|
||||
|
||||
namespace starrocks {
|
||||
class CacheKey;
|
||||
|
||||
struct MemCacheOptions {
|
||||
size_t mem_space_size = 0;
|
||||
};
|
||||
enum class LocalCacheEngineType { STARCACHE, LRUCACHE };
|
||||
|
||||
struct MemCacheWriteOptions {
|
||||
// The priority of the cache object, only support 0 and 1 now.
|
||||
int8_t priority = 0;
|
||||
|
||||
// The probability to evict other items if the cache space is full, which can help avoid frequent cache replacement
|
||||
// and improve cache hit rate sometimes.
|
||||
// It is expressed as a percentage. If evict_probability is 10, it means the probability to evict other data is 10%.
|
||||
int32_t evict_probability = 100;
|
||||
};
|
||||
|
||||
struct MemCacheReadOptions {};
|
||||
|
||||
struct MemCacheHandle {};
|
||||
|
||||
using MemCacheHandlePtr = MemCacheHandle*;
|
||||
|
||||
// using CacheDeleter = std::function<void(const std::string&, void*)>;
|
||||
//
|
||||
// We only use the deleter function of the lru cache temporarily.
|
||||
// Maybe a std::function object or a function pointer like `void (*)(std::string&, void*)` which
|
||||
// independent on lru cache is more appropriate, but it is not easy to convert them to the lru
|
||||
// cache deleter when using a lru cache module.
|
||||
using MemCacheDeleter = void (*)(const CacheKey&, void*);
|
||||
|
||||
struct DataCacheMemMetrics {
|
||||
size_t mem_quota_bytes = 0;
|
||||
size_t mem_used_bytes = 0;
|
||||
};
|
||||
|
||||
class LocalMemCacheEngine {
|
||||
class LocalCacheEngine {
|
||||
public:
|
||||
virtual ~LocalMemCacheEngine() = default;
|
||||
virtual ~LocalCacheEngine() = default;
|
||||
|
||||
virtual bool is_initialized() const = 0;
|
||||
|
||||
// Write data to cache
|
||||
virtual Status write(const std::string& key, const IOBuffer& buffer, WriteCacheOptions* options) = 0;
|
||||
|
||||
// Read data from cache, it returns the data size if successful; otherwise the error status
|
||||
// will be returned.
|
||||
virtual Status read(const std::string& key, size_t off, size_t size, IOBuffer* buffer,
|
||||
ReadCacheOptions* options) = 0;
|
||||
|
||||
// Insert object to cache
|
||||
virtual Status insert(const std::string& key, void* value, size_t size, MemCacheDeleter deleter,
|
||||
MemCacheHandlePtr* handle, const MemCacheWriteOptions& options) = 0;
|
||||
virtual Status insert(const std::string& key, void* value, size_t size, ObjectCacheDeleter deleter,
|
||||
ObjectCacheHandlePtr* handle, const ObjectCacheWriteOptions& options) = 0;
|
||||
|
||||
// Lookup object from cache, the `handle` wraps the object pointer.
|
||||
// As long as the handle object is not destroyed and the user does not manually call the `handle->release()`
|
||||
// function, the corresponding pointer will never be freed by the cache system.
|
||||
virtual Status lookup(const std::string& key, MemCacheHandlePtr* handle,
|
||||
MemCacheReadOptions* options = nullptr) = 0;
|
||||
virtual Status lookup(const std::string& key, ObjectCacheHandlePtr* handle,
|
||||
ObjectCacheReadOptions* options = nullptr) = 0;
|
||||
|
||||
// Release a handle returned by a previous insert() or lookup().
|
||||
// The handle must have not been released yet.
|
||||
virtual void release(MemCacheHandlePtr handle) = 0;
|
||||
virtual void release(ObjectCacheHandlePtr handle) = 0;
|
||||
|
||||
// Return the value in the given handle returned by a previous insert() or lookup().
|
||||
// The handle must have not been released yet.
|
||||
virtual const void* value(MemCacheHandlePtr handle) = 0;
|
||||
virtual const void* value(ObjectCacheHandlePtr handle) = 0;
|
||||
|
||||
virtual bool exist(const std::string& key) const = 0;
|
||||
|
||||
|
|
@ -88,15 +65,29 @@ public:
|
|||
virtual Status adjust_mem_quota(int64_t delta, size_t min_capacity) = 0;
|
||||
|
||||
// Update the datacache memory quota.
|
||||
virtual Status update_mem_quota(size_t quota_bytes) = 0;
|
||||
virtual Status update_mem_quota(size_t quota_bytes, bool flush_to_disk) = 0;
|
||||
|
||||
virtual const DataCacheMemMetrics cache_metrics() const = 0;
|
||||
// Update the datacache disk space information, such as disk quota or disk path.
|
||||
virtual Status update_disk_spaces(const std::vector<DirSpace>& spaces) = 0;
|
||||
|
||||
// Update the datacache inline cache count limit
|
||||
virtual Status update_inline_cache_count_limit(int32_t limit) = 0;
|
||||
|
||||
virtual const DataCacheMetrics cache_metrics() const = 0;
|
||||
|
||||
virtual void record_read_remote(size_t size, int64_t latency_us) = 0;
|
||||
|
||||
virtual void record_read_cache(size_t size, int64_t latency_us) = 0;
|
||||
|
||||
virtual Status shutdown() = 0;
|
||||
|
||||
virtual LocalCacheEngineType engine_type() = 0;
|
||||
|
||||
virtual bool has_mem_cache() const = 0;
|
||||
virtual bool has_disk_cache() const = 0;
|
||||
virtual bool available() const = 0;
|
||||
virtual bool mem_cache_available() const = 0;
|
||||
virtual void disk_spaces(std::vector<DirSpace>* spaces) const = 0;
|
||||
|
||||
virtual size_t mem_quota() const = 0;
|
||||
virtual size_t mem_usage() const = 0;
|
||||
|
|
@ -107,17 +98,11 @@ public:
|
|||
// Get the cache hit count.
|
||||
virtual size_t hit_count() const = 0;
|
||||
|
||||
// Get the insert count.
|
||||
virtual size_t insert_count() const = 0;
|
||||
|
||||
// Get the insert evict count.
|
||||
virtual size_t insert_evict_count() const = 0;
|
||||
|
||||
// Get the release evict count.
|
||||
virtual size_t release_evict_count() const = 0;
|
||||
// Get all cache metrics together.
|
||||
virtual const ObjectCacheMetrics metrics() const = 0;
|
||||
|
||||
// Remove all cache entries that are not actively in use.
|
||||
virtual Status prune() = 0;
|
||||
};
|
||||
|
||||
} // namespace starrocks
|
||||
} // namespace starrocks
|
||||
|
|
@ -12,7 +12,7 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "cache/mem_cache/lrucache_engine.h"
|
||||
#include "cache/lrucache_engine.h"
|
||||
|
||||
#include <butil/fast_rand.h>
|
||||
|
||||
|
|
@ -23,24 +23,33 @@ Status LRUCacheEngine::init(const MemCacheOptions& options) {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status LRUCacheEngine::insert(const std::string& key, void* value, size_t size, MemCacheDeleter deleter,
|
||||
MemCacheHandlePtr* handle, const MemCacheWriteOptions& options) {
|
||||
Status LRUCacheEngine::write(const std::string& key, const IOBuffer& buffer, WriteCacheOptions* options) {
|
||||
return Status::NotSupported("LRUCache engine don't support write block");
|
||||
}
|
||||
|
||||
Status LRUCacheEngine::read(const std::string& key, size_t off, size_t size, IOBuffer* buffer,
|
||||
ReadCacheOptions* options) {
|
||||
return Status::NotSupported("LRUCache engine don't support read block");
|
||||
}
|
||||
|
||||
Status LRUCacheEngine::insert(const std::string& key, void* value, size_t size, ObjectCacheDeleter deleter,
|
||||
ObjectCacheHandlePtr* handle, const ObjectCacheWriteOptions& options) {
|
||||
if (!_check_write(size, options)) {
|
||||
return Status::InternalError("cache insertion is rejected");
|
||||
}
|
||||
auto* lru_handle = _cache->insert(key, value, size, deleter, static_cast<CachePriority>(options.priority));
|
||||
if (handle) {
|
||||
*handle = reinterpret_cast<MemCacheHandlePtr>(lru_handle);
|
||||
*handle = reinterpret_cast<ObjectCacheHandlePtr>(lru_handle);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status LRUCacheEngine::lookup(const std::string& key, MemCacheHandlePtr* handle, MemCacheReadOptions* options) {
|
||||
Status LRUCacheEngine::lookup(const std::string& key, ObjectCacheHandlePtr* handle, ObjectCacheReadOptions* options) {
|
||||
auto* lru_handle = _cache->lookup(CacheKey(key));
|
||||
if (!lru_handle) {
|
||||
return Status::NotFound("no such entry");
|
||||
}
|
||||
*handle = reinterpret_cast<MemCacheHandlePtr>(lru_handle);
|
||||
*handle = reinterpret_cast<ObjectCacheHandlePtr>(lru_handle);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
@ -59,13 +68,26 @@ Status LRUCacheEngine::remove(const std::string& key) {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status LRUCacheEngine::update_mem_quota(size_t quota_bytes) {
|
||||
Status LRUCacheEngine::update_mem_quota(size_t quota_bytes, bool flush_to_disk) {
|
||||
_cache->set_capacity(quota_bytes);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
const DataCacheMemMetrics LRUCacheEngine::cache_metrics() const {
|
||||
return DataCacheMemMetrics{.mem_quota_bytes = _cache->get_capacity(), .mem_used_bytes = _cache->get_memory_usage()};
|
||||
Status LRUCacheEngine::update_disk_spaces(const std::vector<DirSpace>& spaces) {
|
||||
return Status::NotSupported("LRUCache engine don't support update disk spaces");
|
||||
}
|
||||
|
||||
Status LRUCacheEngine::update_inline_cache_count_limit(int32_t limit) {
|
||||
return Status::NotSupported("LRUCache engine don't support update inline cache count limit");
|
||||
}
|
||||
|
||||
const DataCacheMetrics LRUCacheEngine::cache_metrics() const {
|
||||
return DataCacheMetrics{.status = DataCacheStatus::NORMAL,
|
||||
.mem_quota_bytes = _cache->get_capacity(),
|
||||
.mem_used_bytes = _cache->get_memory_usage(),
|
||||
.disk_quota_bytes = 0,
|
||||
.disk_used_bytes = 0,
|
||||
.meta_used_bytes = 0};
|
||||
}
|
||||
|
||||
Status LRUCacheEngine::shutdown() {
|
||||
|
|
@ -78,12 +100,12 @@ Status LRUCacheEngine::prune() {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
void LRUCacheEngine::release(MemCacheHandlePtr handle) {
|
||||
void LRUCacheEngine::release(ObjectCacheHandlePtr handle) {
|
||||
auto lru_handle = reinterpret_cast<Cache::Handle*>(handle);
|
||||
_cache->release(lru_handle);
|
||||
}
|
||||
|
||||
const void* LRUCacheEngine::value(MemCacheHandlePtr handle) {
|
||||
const void* LRUCacheEngine::value(ObjectCacheHandlePtr handle) {
|
||||
auto lru_handle = reinterpret_cast<Cache::Handle*>(handle);
|
||||
return _cache->value(lru_handle);
|
||||
}
|
||||
|
|
@ -111,19 +133,18 @@ size_t LRUCacheEngine::hit_count() const {
|
|||
return _cache->get_hit_count();
|
||||
}
|
||||
|
||||
size_t LRUCacheEngine::insert_count() const {
|
||||
return _cache->get_insert_count();
|
||||
const ObjectCacheMetrics LRUCacheEngine::metrics() const {
|
||||
ObjectCacheMetrics m;
|
||||
m.capacity = _cache->get_capacity();
|
||||
m.usage = _cache->get_memory_usage();
|
||||
m.lookup_count = _cache->get_lookup_count();
|
||||
m.hit_count = _cache->get_hit_count();
|
||||
// Unsupported
|
||||
m.object_item_count = 0;
|
||||
return m;
|
||||
}
|
||||
|
||||
size_t LRUCacheEngine::insert_evict_count() const {
|
||||
return _cache->get_insert_evict_count();
|
||||
}
|
||||
|
||||
size_t LRUCacheEngine::release_evict_count() const {
|
||||
return _cache->get_release_evict_count();
|
||||
}
|
||||
|
||||
bool LRUCacheEngine::_check_write(size_t charge, const MemCacheWriteOptions& options) const {
|
||||
bool LRUCacheEngine::_check_write(size_t charge, const ObjectCacheWriteOptions& options) const {
|
||||
if (options.evict_probability >= 100) {
|
||||
return true;
|
||||
}
|
||||
|
|
@ -143,4 +164,5 @@ bool LRUCacheEngine::_check_write(size_t charge, const MemCacheWriteOptions& opt
|
|||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -16,11 +16,11 @@
|
|||
|
||||
#include <atomic>
|
||||
|
||||
#include "cache/mem_cache/local_mem_cache_engine.h"
|
||||
#include "cache/local_cache_engine.h"
|
||||
#include "util/lru_cache.h"
|
||||
|
||||
namespace starrocks {
|
||||
class LRUCacheEngine final : public LocalMemCacheEngine {
|
||||
class LRUCacheEngine final : public LocalCacheEngine {
|
||||
public:
|
||||
LRUCacheEngine() = default;
|
||||
~LRUCacheEngine() override = default;
|
||||
|
|
@ -28,25 +28,36 @@ public:
|
|||
Status init(const MemCacheOptions& options);
|
||||
bool is_initialized() const override { return _initialized.load(std::memory_order_relaxed); }
|
||||
|
||||
Status insert(const std::string& key, void* value, size_t size, MemCacheDeleter deleter, MemCacheHandlePtr* handle,
|
||||
const MemCacheWriteOptions& options) override;
|
||||
Status lookup(const std::string& key, MemCacheHandlePtr* handle, MemCacheReadOptions* options) override;
|
||||
Status write(const std::string& key, const IOBuffer& buffer, WriteCacheOptions* options) override;
|
||||
Status read(const std::string& key, size_t off, size_t size, IOBuffer* buffer, ReadCacheOptions* options) override;
|
||||
|
||||
Status insert(const std::string& key, void* value, size_t size, ObjectCacheDeleter deleter,
|
||||
ObjectCacheHandlePtr* handle, const ObjectCacheWriteOptions& options) override;
|
||||
Status lookup(const std::string& key, ObjectCacheHandlePtr* handle, ObjectCacheReadOptions* options) override;
|
||||
|
||||
bool exist(const std::string& key) const override;
|
||||
Status remove(const std::string& key) override;
|
||||
|
||||
Status update_mem_quota(size_t quota_bytes) override;
|
||||
Status update_mem_quota(size_t quota_bytes, bool flush_to_disk) override;
|
||||
Status update_disk_spaces(const std::vector<DirSpace>& spaces) override;
|
||||
Status update_inline_cache_count_limit(int32_t limit) override;
|
||||
|
||||
const DataCacheMemMetrics cache_metrics() const override;
|
||||
const DataCacheMetrics cache_metrics() const override;
|
||||
void record_read_remote(size_t size, int64_t latency_us) override {}
|
||||
void record_read_cache(size_t size, int64_t latency_us) override {}
|
||||
|
||||
Status shutdown() override;
|
||||
LocalCacheEngineType engine_type() override { return LocalCacheEngineType::LRUCACHE; }
|
||||
bool has_mem_cache() const override { return _cache->get_capacity() > 0; }
|
||||
bool has_disk_cache() const override { return false; }
|
||||
|
||||
bool available() const override { return is_initialized() && has_mem_cache(); }
|
||||
bool mem_cache_available() const override { return is_initialized() && has_mem_cache(); }
|
||||
|
||||
void release(MemCacheHandlePtr handle) override;
|
||||
const void* value(MemCacheHandlePtr handle) override;
|
||||
void disk_spaces(std::vector<DirSpace>* spaces) const override {}
|
||||
|
||||
void release(ObjectCacheHandlePtr handle) override;
|
||||
const void* value(ObjectCacheHandlePtr handle) override;
|
||||
|
||||
Status adjust_mem_quota(int64_t delta, size_t min_capacity) override;
|
||||
|
||||
|
|
@ -57,18 +68,14 @@ public:
|
|||
|
||||
size_t hit_count() const override;
|
||||
|
||||
size_t insert_count() const override;
|
||||
|
||||
size_t insert_evict_count() const override;
|
||||
|
||||
size_t release_evict_count() const override;
|
||||
const ObjectCacheMetrics metrics() const override;
|
||||
|
||||
Status prune() override;
|
||||
|
||||
private:
|
||||
bool _check_write(size_t charge, const MemCacheWriteOptions& options) const;
|
||||
bool _check_write(size_t charge, const ObjectCacheWriteOptions& options) const;
|
||||
|
||||
std::atomic<bool> _initialized = false;
|
||||
std::unique_ptr<ShardedLRUCache> _cache;
|
||||
};
|
||||
} // namespace starrocks
|
||||
} // namespace starrocks
|
||||
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
#include "cache/mem_space_monitor.h"
|
||||
|
||||
#include "cache/mem_cache/page_cache.h"
|
||||
#include "cache/object_cache/page_cache.h"
|
||||
#include "common/config.h"
|
||||
#include "runtime/exec_env.h"
|
||||
#include "runtime/mem_tracker.h"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,75 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
|
||||
// Not a good way to import lru cache header here, just for temporary compatibility with old deleters.
|
||||
#include "util/lru_cache.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
enum class ObjectCacheModuleType { LRUCACHE, STARCACHE };
|
||||
|
||||
struct ObjectCacheWriteOptions {
|
||||
// The priority of the cache object, only support 0 and 1 now.
|
||||
int8_t priority = 0;
|
||||
// If ttl_seconds=0 (default), no ttl restriction will be set. If an old one exists, remove it.
|
||||
uint64_t ttl_seconds = 0;
|
||||
// If overwrite=true, the cache value will be replaced if it already exists.
|
||||
bool overwrite = false;
|
||||
// The probability to evict other items if the cache space is full, which can help avoid frequent cache replacement
|
||||
// and improve cache hit rate sometimes.
|
||||
// It is expressed as a percentage. If evict_probability is 10, it means the probability to evict other data is 10%.
|
||||
int32_t evict_probability = 100;
|
||||
};
|
||||
|
||||
struct ObjectCacheReadOptions {};
|
||||
|
||||
struct ObjectCacheHandle {};
|
||||
|
||||
struct ObjectCacheMetrics {
|
||||
size_t capacity = 0;
|
||||
size_t usage = 0;
|
||||
size_t lookup_count = 0;
|
||||
size_t hit_count = 0;
|
||||
size_t object_item_count = 0;
|
||||
};
|
||||
|
||||
using ObjectCacheHandlePtr = ObjectCacheHandle*;
|
||||
|
||||
// using CacheDeleter = std::function<void(const std::string&, void*)>;
|
||||
//
|
||||
// We only use the deleter function of the lru cache temporarily.
|
||||
// Maybe a std::function object or a function pointer like `void (*)(std::string&, void*)` which
|
||||
// independent on lru cache is more appropriate, but it is not easy to convert them to the lru
|
||||
// cache deleter when using a lru cache module.
|
||||
using ObjectCacheDeleter = void (*)(const CacheKey&, void*);
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const ObjectCacheModuleType& module) {
|
||||
switch (module) {
|
||||
case ObjectCacheModuleType::LRUCACHE:
|
||||
os << "lrucache";
|
||||
break;
|
||||
case ObjectCacheModuleType::STARCACHE:
|
||||
os << "starcache";
|
||||
break;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -32,7 +32,7 @@
|
|||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "cache/mem_cache/page_cache.h"
|
||||
#include "cache/object_cache/page_cache.h"
|
||||
|
||||
#include <malloc.h>
|
||||
|
||||
|
|
@ -49,9 +49,6 @@ std::atomic<size_t> StoragePageCacheMetrics::released_page_handle_count{};
|
|||
|
||||
METRIC_DEFINE_UINT_GAUGE(page_cache_lookup_count, MetricUnit::OPERATIONS);
|
||||
METRIC_DEFINE_UINT_GAUGE(page_cache_hit_count, MetricUnit::OPERATIONS);
|
||||
METRIC_DEFINE_UINT_GAUGE(page_cache_insert_count, MetricUnit::OPERATIONS);
|
||||
METRIC_DEFINE_UINT_GAUGE(page_cache_insert_evict_count, MetricUnit::OPERATIONS);
|
||||
METRIC_DEFINE_UINT_GAUGE(page_cache_release_evict_count, MetricUnit::OPERATIONS);
|
||||
METRIC_DEFINE_UINT_GAUGE(page_cache_capacity, MetricUnit::BYTES);
|
||||
METRIC_DEFINE_UINT_GAUGE(page_cache_pinned_count, MetricUnit::BYTES);
|
||||
|
||||
|
|
@ -64,22 +61,6 @@ void StoragePageCache::init_metrics() {
|
|||
StarRocksMetrics::instance()->metrics()->register_hook(
|
||||
"page_cache_hit_count", [this]() { page_cache_hit_count.set_value(get_hit_count()); });
|
||||
|
||||
StarRocksMetrics::instance()->metrics()->register_metric("page_cache_insert_count", &page_cache_insert_count);
|
||||
StarRocksMetrics::instance()->metrics()->register_hook(
|
||||
"page_cache_insert_count", [this]() { page_cache_insert_count.set_value(get_insert_count()); });
|
||||
|
||||
StarRocksMetrics::instance()->metrics()->register_metric("page_cache_insert_evict_count",
|
||||
&page_cache_insert_evict_count);
|
||||
StarRocksMetrics::instance()->metrics()->register_hook("page_cache_insert_evict_count", [this]() {
|
||||
page_cache_insert_evict_count.set_value(get_insert_evict_count());
|
||||
});
|
||||
|
||||
StarRocksMetrics::instance()->metrics()->register_metric("page_cache_release_evict_count",
|
||||
&page_cache_release_evict_count);
|
||||
StarRocksMetrics::instance()->metrics()->register_hook("page_cache_release_evict_count", [this]() {
|
||||
page_cache_release_evict_count.set_value(get_release_evict_count());
|
||||
});
|
||||
|
||||
StarRocksMetrics::instance()->metrics()->register_metric("page_cache_capacity", &page_cache_capacity);
|
||||
StarRocksMetrics::instance()->metrics()->register_hook("page_cache_capacity",
|
||||
[this]() { page_cache_capacity.set_value(get_capacity()); });
|
||||
|
|
@ -94,7 +75,7 @@ void StoragePageCache::prune() {
|
|||
}
|
||||
|
||||
void StoragePageCache::set_capacity(size_t capacity) {
|
||||
Status st = _cache->update_mem_quota(capacity);
|
||||
Status st = _cache->update_mem_quota(capacity, false);
|
||||
LOG_IF(INFO, !st.ok()) << "Fail to set cache capacity to " << capacity << ", reason: " << st.message();
|
||||
}
|
||||
|
||||
|
|
@ -110,18 +91,6 @@ uint64_t StoragePageCache::get_hit_count() const {
|
|||
return _cache->hit_count();
|
||||
}
|
||||
|
||||
uint64_t StoragePageCache::get_insert_count() const {
|
||||
return _cache->insert_count();
|
||||
}
|
||||
|
||||
uint64_t StoragePageCache::get_insert_evict_count() const {
|
||||
return _cache->insert_evict_count();
|
||||
}
|
||||
|
||||
uint64_t StoragePageCache::get_release_evict_count() const {
|
||||
return _cache->release_evict_count();
|
||||
}
|
||||
|
||||
bool StoragePageCache::adjust_capacity(int64_t delta, size_t min_capacity) {
|
||||
Status st = _cache->adjust_mem_quota(delta, min_capacity);
|
||||
if (!st.ok()) {
|
||||
|
|
@ -136,7 +105,7 @@ size_t StoragePageCache::get_pinned_count() const {
|
|||
}
|
||||
|
||||
bool StoragePageCache::lookup(const std::string& key, PageCacheHandle* handle) {
|
||||
MemCacheHandle* obj_handle = nullptr;
|
||||
ObjectCacheHandle* obj_handle = nullptr;
|
||||
Status st = _cache->lookup(key, &obj_handle);
|
||||
if (!st.ok()) {
|
||||
return false;
|
||||
|
|
@ -146,7 +115,7 @@ bool StoragePageCache::lookup(const std::string& key, PageCacheHandle* handle) {
|
|||
return true;
|
||||
}
|
||||
|
||||
Status StoragePageCache::insert(const std::string& key, std::vector<uint8_t>* data, const MemCacheWriteOptions& opts,
|
||||
Status StoragePageCache::insert(const std::string& key, std::vector<uint8_t>* data, const ObjectCacheWriteOptions& opts,
|
||||
PageCacheHandle* handle) {
|
||||
#ifndef BE_TEST
|
||||
int64_t mem_size = malloc_usable_size(data->data()) + sizeof(*data);
|
||||
|
|
@ -162,7 +131,7 @@ Status StoragePageCache::insert(const std::string& key, std::vector<uint8_t>* da
|
|||
delete cache_item;
|
||||
};
|
||||
|
||||
MemCacheHandle* obj_handle = nullptr;
|
||||
ObjectCacheHandle* obj_handle = nullptr;
|
||||
// Use mem size managed by memory allocator as this record charge size.
|
||||
// At the same time, we should record this record size for data fetching when lookup.
|
||||
Status st = _cache->insert(key, (void*)data, mem_size, deleter, &obj_handle, opts);
|
||||
|
|
@ -173,9 +142,9 @@ Status StoragePageCache::insert(const std::string& key, std::vector<uint8_t>* da
|
|||
return st;
|
||||
}
|
||||
|
||||
Status StoragePageCache::insert(const std::string& key, void* data, int64_t size, MemCacheDeleter deleter,
|
||||
const MemCacheWriteOptions& opts, PageCacheHandle* handle) {
|
||||
MemCacheHandle* obj_handle = nullptr;
|
||||
Status StoragePageCache::insert(const std::string& key, void* data, int64_t size, ObjectCacheDeleter deleter,
|
||||
const ObjectCacheWriteOptions& opts, PageCacheHandle* handle) {
|
||||
ObjectCacheHandle* obj_handle = nullptr;
|
||||
Status st = _cache->insert(key, data, size, deleter, &obj_handle, opts);
|
||||
if (st.ok()) {
|
||||
*handle = PageCacheHandle(_cache, obj_handle);
|
||||
|
|
@ -43,7 +43,7 @@ namespace starrocks {
|
|||
|
||||
class PageCacheHandle;
|
||||
class MemTracker;
|
||||
struct MemCacheWriteOptions;
|
||||
struct ObjectCacheWriteOptions;
|
||||
|
||||
// Page cache min size is 256MB
|
||||
static constexpr int64_t kcacheMinSize = 268435456;
|
||||
|
|
@ -66,9 +66,9 @@ public:
|
|||
// Client should call create_global_cache before.
|
||||
static StoragePageCache* instance() { return DataCache::GetInstance()->page_cache(); }
|
||||
|
||||
StoragePageCache(LocalMemCacheEngine* cache_engine) : _cache(cache_engine), _initialized(true) {}
|
||||
StoragePageCache(LocalCacheEngine* cache_engine) : _cache(cache_engine), _initialized(true) {}
|
||||
|
||||
void init(LocalMemCacheEngine* cache_engine) {
|
||||
void init(LocalCacheEngine* cache_engine) {
|
||||
_cache = cache_engine;
|
||||
_initialized.store(true, std::memory_order_relaxed);
|
||||
}
|
||||
|
|
@ -87,11 +87,11 @@ public:
|
|||
// This function is thread-safe, and when two clients insert two same key
|
||||
// concurrently, this function can assure that only one page is cached.
|
||||
// The in_memory page will have higher priority.
|
||||
Status insert(const std::string& key, std::vector<uint8_t>* data, const MemCacheWriteOptions& opts,
|
||||
Status insert(const std::string& key, std::vector<uint8_t>* data, const ObjectCacheWriteOptions& opts,
|
||||
PageCacheHandle* handle);
|
||||
|
||||
Status insert(const std::string& key, void* data, int64_t size, MemCacheDeleter deleter,
|
||||
const MemCacheWriteOptions& opts, PageCacheHandle* handle);
|
||||
Status insert(const std::string& key, void* data, int64_t size, ObjectCacheDeleter deleter,
|
||||
const ObjectCacheWriteOptions& opts, PageCacheHandle* handle);
|
||||
|
||||
size_t memory_usage() const { return _cache->mem_usage(); }
|
||||
|
||||
|
|
@ -103,12 +103,6 @@ public:
|
|||
|
||||
uint64_t get_hit_count() const;
|
||||
|
||||
uint64_t get_insert_count() const;
|
||||
|
||||
uint64_t get_insert_evict_count() const;
|
||||
|
||||
uint64_t get_release_evict_count() const;
|
||||
|
||||
bool adjust_capacity(int64_t delta, size_t min_capacity = 0);
|
||||
|
||||
void prune();
|
||||
|
|
@ -121,7 +115,7 @@ public:
|
|||
size_t get_pinned_count() const;
|
||||
|
||||
private:
|
||||
LocalMemCacheEngine* _cache = nullptr;
|
||||
LocalCacheEngine* _cache = nullptr;
|
||||
std::atomic<bool> _initialized = false;
|
||||
};
|
||||
|
||||
|
|
@ -131,12 +125,7 @@ private:
|
|||
class PageCacheHandle {
|
||||
public:
|
||||
PageCacheHandle() = default;
|
||||
PageCacheHandle(LocalMemCacheEngine* cache, MemCacheHandle* handle) : _cache(cache), _handle(handle) {}
|
||||
|
||||
// Don't allow copy and assign
|
||||
PageCacheHandle(const PageCacheHandle&) = delete;
|
||||
const PageCacheHandle& operator=(const PageCacheHandle&) = delete;
|
||||
|
||||
PageCacheHandle(LocalCacheEngine* cache, ObjectCacheHandle* handle) : _cache(cache), _handle(handle) {}
|
||||
~PageCacheHandle() {
|
||||
if (_handle != nullptr) {
|
||||
StoragePageCacheMetrics::released_page_handle_count++;
|
||||
|
|
@ -156,12 +145,16 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
LocalMemCacheEngine* cache() const { return _cache; }
|
||||
LocalCacheEngine* cache() const { return _cache; }
|
||||
const void* data() const { return _cache->value(_handle); }
|
||||
|
||||
private:
|
||||
LocalMemCacheEngine* _cache = nullptr;
|
||||
MemCacheHandle* _handle = nullptr;
|
||||
LocalCacheEngine* _cache = nullptr;
|
||||
ObjectCacheHandle* _handle = nullptr;
|
||||
|
||||
// Don't allow copy and assign
|
||||
PageCacheHandle(const PageCacheHandle&) = delete;
|
||||
const PageCacheHandle& operator=(const PageCacheHandle&) = delete;
|
||||
};
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -29,7 +29,7 @@ Status PeerCacheEngine::init(const RemoteCacheOptions& options) {
|
|||
}
|
||||
|
||||
Status PeerCacheEngine::read(const std::string& key, size_t off, size_t size, IOBuffer* buffer,
|
||||
DiskCacheReadOptions* options) {
|
||||
ReadCacheOptions* options) {
|
||||
if (options->use_adaptor && !_cache_adaptor->check_read_cache()) {
|
||||
return Status::ResourceBusy("resource is busy");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,10 +26,9 @@ public:
|
|||
|
||||
Status init(const RemoteCacheOptions& options) override;
|
||||
|
||||
Status read(const std::string& key, size_t off, size_t size, IOBuffer* buffer,
|
||||
DiskCacheReadOptions* options) override;
|
||||
Status read(const std::string& key, size_t off, size_t size, IOBuffer* buffer, ReadCacheOptions* options) override;
|
||||
|
||||
Status write(const std::string& key, const IOBuffer& buffer, DiskCacheWriteOptions* options) override {
|
||||
Status write(const std::string& key, const IOBuffer& buffer, WriteCacheOptions* options) override {
|
||||
return Status::NotSupported("write data to peer cache is unsupported");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,16 +14,12 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "cache/disk_cache/io_buffer.h"
|
||||
#include "cache/disk_cache/local_disk_cache_engine.h"
|
||||
#include "cache/block_cache/io_buffer.h"
|
||||
#include "cache/cache_options.h"
|
||||
#include "common/status.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
struct RemoteCacheOptions {
|
||||
double skip_read_factor = 0;
|
||||
};
|
||||
|
||||
class RemoteCacheEngine {
|
||||
public:
|
||||
virtual ~RemoteCacheEngine() = default;
|
||||
|
|
@ -32,12 +28,12 @@ public:
|
|||
virtual Status init(const RemoteCacheOptions& options) = 0;
|
||||
|
||||
// Write data to remote cache
|
||||
virtual Status write(const std::string& key, const IOBuffer& buffer, DiskCacheWriteOptions* options) = 0;
|
||||
virtual Status write(const std::string& key, const IOBuffer& buffer, WriteCacheOptions* options) = 0;
|
||||
|
||||
// Read data from remote cache, it returns the data size if successful; otherwise the error status
|
||||
// will be returned.
|
||||
virtual Status read(const std::string& key, size_t off, size_t size, IOBuffer* buffer,
|
||||
DiskCacheReadOptions* options) = 0;
|
||||
ReadCacheOptions* options) = 0;
|
||||
|
||||
// Remove data from cache.
|
||||
virtual Status remove(const std::string& key) = 0;
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "cache/disk_cache/starcache_engine.h"
|
||||
#include "cache/starcache_engine.h"
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
|
|
@ -55,6 +55,8 @@ Status StarCacheEngine::init(const DiskCacheOptions& options) {
|
|||
}
|
||||
opt.lru_segment_freq_bits = 0;
|
||||
|
||||
_enable_tiered_cache = options.enable_tiered_cache;
|
||||
_enable_datacache_persistence = options.enable_datacache_persistence;
|
||||
_cache = std::make_shared<starcache::StarCache>();
|
||||
RETURN_IF_ERROR(to_status(_cache->init(opt)));
|
||||
|
||||
|
|
@ -66,7 +68,7 @@ Status StarCacheEngine::init(const DiskCacheOptions& options) {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status StarCacheEngine::write(const std::string& key, const IOBuffer& buffer, DiskCacheWriteOptions* options) {
|
||||
Status StarCacheEngine::write(const std::string& key, const IOBuffer& buffer, WriteCacheOptions* options) {
|
||||
if (!options) {
|
||||
return to_status(_cache->set(key, buffer.const_raw_buf(), nullptr));
|
||||
}
|
||||
|
|
@ -78,8 +80,12 @@ Status StarCacheEngine::write(const std::string& key, const IOBuffer& buffer, Di
|
|||
opts.async = options->async;
|
||||
opts.keep_alive = options->allow_zero_copy;
|
||||
opts.callback = options->callback;
|
||||
opts.mode = starcache::WriteOptions::WriteMode::WRITE_THROUGH;
|
||||
opts.evict_probability = 100;
|
||||
if (!_enable_datacache_persistence && _enable_tiered_cache) {
|
||||
opts.mode = starcache::WriteOptions::WriteMode::WRITE_BACK;
|
||||
} else {
|
||||
opts.mode = starcache::WriteOptions::WriteMode::WRITE_THROUGH;
|
||||
}
|
||||
opts.evict_probability = options->evict_probability;
|
||||
opts.ignore_inline = true;
|
||||
Status st;
|
||||
{
|
||||
|
|
@ -98,13 +104,14 @@ Status StarCacheEngine::write(const std::string& key, const IOBuffer& buffer, Di
|
|||
}
|
||||
|
||||
Status StarCacheEngine::read(const std::string& key, size_t off, size_t size, IOBuffer* buffer,
|
||||
DiskCacheReadOptions* options) {
|
||||
ReadCacheOptions* options) {
|
||||
if (!options) {
|
||||
return to_status(_cache->read(key, off, size, &buffer->raw_buf(), nullptr));
|
||||
}
|
||||
starcache::ReadOptions opts;
|
||||
opts.use_adaptor = options->use_adaptor;
|
||||
opts.mode = starcache::ReadOptions::ReadMode::READ_THROUGH;
|
||||
opts.mode = _enable_tiered_cache ? starcache::ReadOptions::ReadMode::READ_BACK
|
||||
: starcache::ReadOptions::ReadMode::READ_THROUGH;
|
||||
auto st = to_status(_cache->read(key, off, size, &buffer->raw_buf(), &opts));
|
||||
if (st.ok()) {
|
||||
options->stats.read_mem_bytes = opts.stats.read_mem_bytes;
|
||||
|
|
@ -113,6 +120,52 @@ Status StarCacheEngine::read(const std::string& key, size_t off, size_t size, IO
|
|||
return st;
|
||||
}
|
||||
|
||||
Status StarCacheEngine::insert(const std::string& key, void* value, size_t size, ObjectCacheDeleter deleter,
|
||||
ObjectCacheHandlePtr* handle, const ObjectCacheWriteOptions& options) {
|
||||
starcache::ObjectHandle* obj_hdl = new starcache::ObjectHandle;
|
||||
auto obj_deleter = [deleter, key, value] {
|
||||
// For temporary compatibility with old deleters.
|
||||
CacheKey cache_key(key);
|
||||
deleter(cache_key, value);
|
||||
};
|
||||
starcache::WriteOptions opts;
|
||||
opts.priority = options.priority;
|
||||
opts.ttl_seconds = options.ttl_seconds;
|
||||
opts.overwrite = options.overwrite;
|
||||
opts.evict_probability = options.evict_probability;
|
||||
Status st = to_status(_cache->set_object(key, value, size, obj_deleter, obj_hdl, &opts));
|
||||
if (!st.ok()) {
|
||||
delete obj_hdl;
|
||||
} else if (handle) {
|
||||
// Try release the old handle before fill it with a new one.
|
||||
_try_release_obj_handle(*handle);
|
||||
*handle = reinterpret_cast<ObjectCacheHandlePtr>(obj_hdl);
|
||||
}
|
||||
return st;
|
||||
}
|
||||
|
||||
Status StarCacheEngine::lookup(const std::string& key, ObjectCacheHandlePtr* handle, ObjectCacheReadOptions* options) {
|
||||
starcache::ObjectHandle* obj_hdl = new starcache::ObjectHandle;
|
||||
// Skip checking options temporarily because there is no valid members in `ObjectCacheReadOptions` now.
|
||||
Status st = to_status(_cache->get_object(key, obj_hdl, nullptr));
|
||||
if (!st.ok()) {
|
||||
delete obj_hdl;
|
||||
} else if (handle) {
|
||||
_try_release_obj_handle(*handle);
|
||||
*handle = reinterpret_cast<ObjectCacheHandlePtr>(obj_hdl);
|
||||
}
|
||||
return st;
|
||||
}
|
||||
|
||||
void StarCacheEngine::release(ObjectCacheHandlePtr handle) {
|
||||
_try_release_obj_handle(handle);
|
||||
}
|
||||
|
||||
const void* StarCacheEngine::value(ObjectCacheHandlePtr handle) {
|
||||
auto obj_hdl = reinterpret_cast<starcache::ObjectHandle*>(handle);
|
||||
return obj_hdl->ptr();
|
||||
}
|
||||
|
||||
bool StarCacheEngine::exist(const std::string& key) const {
|
||||
return _cache->exist(key);
|
||||
}
|
||||
|
|
@ -121,6 +174,22 @@ Status StarCacheEngine::remove(const std::string& key) {
|
|||
return to_status(_cache->remove(key));
|
||||
}
|
||||
|
||||
Status StarCacheEngine::update_mem_quota(size_t quota_bytes, bool flush_to_disk) {
|
||||
Status st = to_status(_cache->update_mem_quota(quota_bytes, flush_to_disk));
|
||||
_refresh_quota();
|
||||
return st;
|
||||
}
|
||||
|
||||
Status StarCacheEngine::adjust_mem_quota(int64_t delta, size_t min_capacity) {
|
||||
auto starcache_metrics = _cache->metrics();
|
||||
size_t capacity = starcache_metrics.mem_quota_bytes;
|
||||
int64_t new_capacity = capacity + delta;
|
||||
if (new_capacity < (int64_t)min_capacity) {
|
||||
return Status::InvalidArgument("target capacity is less than the minimum capacity");
|
||||
}
|
||||
return to_status(_cache->update_mem_quota(new_capacity, false));
|
||||
}
|
||||
|
||||
Status StarCacheEngine::update_disk_spaces(const std::vector<DirSpace>& spaces) {
|
||||
std::vector<starcache::DirSpace> disk_spaces;
|
||||
disk_spaces.reserve(spaces.size());
|
||||
|
|
@ -140,11 +209,14 @@ const StarCacheMetrics StarCacheEngine::starcache_metrics(int level) const {
|
|||
return _cache->metrics(level);
|
||||
}
|
||||
|
||||
const DataCacheDiskMetrics StarCacheEngine::cache_metrics() const {
|
||||
const DataCacheMetrics StarCacheEngine::cache_metrics() const {
|
||||
auto starcache_metrics = _cache->metrics(0);
|
||||
DataCacheDiskMetrics metrics = {.status = static_cast<DataCacheStatus>(starcache_metrics.status),
|
||||
.disk_quota_bytes = starcache_metrics.disk_quota_bytes,
|
||||
.disk_used_bytes = starcache_metrics.disk_used_bytes};
|
||||
DataCacheMetrics metrics = {.status = static_cast<DataCacheStatus>(starcache_metrics.status),
|
||||
.mem_quota_bytes = starcache_metrics.mem_quota_bytes,
|
||||
.mem_used_bytes = starcache_metrics.mem_used_bytes,
|
||||
.disk_quota_bytes = starcache_metrics.disk_quota_bytes,
|
||||
.disk_used_bytes = starcache_metrics.disk_used_bytes,
|
||||
.meta_used_bytes = starcache_metrics.meta_used_bytes};
|
||||
return metrics;
|
||||
}
|
||||
|
||||
|
|
@ -167,9 +239,18 @@ Status StarCacheEngine::shutdown() {
|
|||
|
||||
void StarCacheEngine::_refresh_quota() {
|
||||
auto metrics = starcache_metrics(0);
|
||||
_mem_quota.store(metrics.mem_quota_bytes, std::memory_order_relaxed);
|
||||
_disk_quota.store(metrics.disk_quota_bytes, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void StarCacheEngine::_try_release_obj_handle(ObjectCacheHandlePtr handle) {
|
||||
if (handle) {
|
||||
auto obj_hdl = reinterpret_cast<starcache::ObjectHandle*>(handle);
|
||||
obj_hdl->release();
|
||||
delete obj_hdl;
|
||||
}
|
||||
}
|
||||
|
||||
void StarCacheEngine::disk_spaces(std::vector<DirSpace>* spaces) const {
|
||||
spaces->clear();
|
||||
auto metrics = starcache_metrics(0);
|
||||
|
|
@ -178,6 +259,18 @@ void StarCacheEngine::disk_spaces(std::vector<DirSpace>* spaces) const {
|
|||
}
|
||||
}
|
||||
|
||||
size_t StarCacheEngine::mem_quota() const {
|
||||
starcache::CacheMetrics metrics = _cache->metrics(0);
|
||||
// TODO: optimizer later
|
||||
return metrics.mem_quota_bytes;
|
||||
}
|
||||
|
||||
size_t StarCacheEngine::mem_usage() const {
|
||||
// TODO: add meta size?
|
||||
starcache::CacheMetrics metrics = _cache->metrics(0);
|
||||
return metrics.mem_used_bytes;
|
||||
}
|
||||
|
||||
size_t StarCacheEngine::lookup_count() const {
|
||||
starcache::CacheMetrics metrics = _cache->metrics(1);
|
||||
return metrics.detail_l1->hit_count + metrics.detail_l1->miss_count;
|
||||
|
|
@ -188,6 +281,17 @@ size_t StarCacheEngine::hit_count() const {
|
|||
return metrics.detail_l1->hit_count;
|
||||
}
|
||||
|
||||
const ObjectCacheMetrics StarCacheEngine::metrics() const {
|
||||
auto starcache_metrics = _cache->metrics(2);
|
||||
ObjectCacheMetrics m;
|
||||
m.capacity = starcache_metrics.mem_quota_bytes;
|
||||
m.usage = starcache_metrics.mem_used_bytes;
|
||||
m.lookup_count = starcache_metrics.detail_l1->hit_count + starcache_metrics.detail_l1->miss_count;
|
||||
m.hit_count = starcache_metrics.detail_l1->hit_count;
|
||||
m.object_item_count = starcache_metrics.detail_l2->object_item_count;
|
||||
return m;
|
||||
}
|
||||
|
||||
Status StarCacheEngine::prune() {
|
||||
return to_status(_cache->update_mem_quota(0, false));
|
||||
}
|
||||
|
|
@ -14,51 +14,48 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "cache/disk_cache/local_disk_cache_engine.h"
|
||||
#include "cache/local_cache_engine.h"
|
||||
#include "common/status.h"
|
||||
|
||||
#ifdef WITH_STARCACHE
|
||||
#include "starcache/star_cache.h"
|
||||
#include "starcache/time_based_cache_adaptor.h"
|
||||
#else
|
||||
namespace starcache {
|
||||
class StarCache;
|
||||
class TimeBasedCacheAdaptor;
|
||||
struct CacheMetrics;
|
||||
} // namespace starcache
|
||||
#endif
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
#ifdef WITH_STARCACHE
|
||||
using StarCacheMetrics = starcache::CacheMetrics;
|
||||
#endif
|
||||
|
||||
class StarCacheEngine : public LocalDiskCacheEngine {
|
||||
class StarCacheEngine : public LocalCacheEngine {
|
||||
public:
|
||||
StarCacheEngine() = default;
|
||||
~StarCacheEngine() override = default;
|
||||
virtual ~StarCacheEngine() override = default;
|
||||
|
||||
Status init(const DiskCacheOptions& options);
|
||||
bool is_initialized() const override { return _initialized.load(std::memory_order_relaxed); }
|
||||
|
||||
Status write(const std::string& key, const IOBuffer& buffer, DiskCacheWriteOptions* options) override;
|
||||
Status read(const std::string& key, size_t off, size_t size, IOBuffer* buffer,
|
||||
DiskCacheReadOptions* options) override;
|
||||
Status write(const std::string& key, const IOBuffer& buffer, WriteCacheOptions* options) override;
|
||||
Status read(const std::string& key, size_t off, size_t size, IOBuffer* buffer, ReadCacheOptions* options) override;
|
||||
|
||||
Status insert(const std::string& key, void* value, size_t size, ObjectCacheDeleter deleter,
|
||||
ObjectCacheHandlePtr* handle, const ObjectCacheWriteOptions& options) override;
|
||||
|
||||
Status lookup(const std::string& key, ObjectCacheHandlePtr* handle, ObjectCacheReadOptions* options) override;
|
||||
|
||||
void release(ObjectCacheHandlePtr handle) override;
|
||||
|
||||
const void* value(ObjectCacheHandlePtr handle) override;
|
||||
|
||||
bool exist(const std::string& key) const override;
|
||||
|
||||
Status remove(const std::string& key) override;
|
||||
|
||||
Status adjust_mem_quota(int64_t delta, size_t min_capacity) override;
|
||||
|
||||
Status update_mem_quota(size_t quota_bytes, bool flush_to_disk) override;
|
||||
|
||||
Status update_disk_spaces(const std::vector<DirSpace>& spaces) override;
|
||||
|
||||
Status update_inline_cache_count_limit(int32_t limit) override;
|
||||
|
||||
#ifdef WITH_STARCACHE
|
||||
const StarCacheMetrics starcache_metrics(int level) const;
|
||||
#endif
|
||||
|
||||
const DataCacheDiskMetrics cache_metrics() const override;
|
||||
const DataCacheMetrics cache_metrics() const override;
|
||||
|
||||
void record_read_remote(size_t size, int64_t latency_us) override;
|
||||
|
||||
|
|
@ -66,25 +63,38 @@ public:
|
|||
|
||||
Status shutdown() override;
|
||||
|
||||
LocalCacheEngineType engine_type() override { return LocalCacheEngineType::STARCACHE; }
|
||||
|
||||
std::shared_ptr<starcache::StarCache> starcache_instance() { return _cache; }
|
||||
bool has_mem_cache() const override { return _mem_quota.load(std::memory_order_relaxed) > 0; }
|
||||
bool has_disk_cache() const override { return _disk_quota.load(std::memory_order_relaxed) > 0; }
|
||||
bool available() const override { return is_initialized() && has_disk_cache(); }
|
||||
bool available() const override { return is_initialized() && (has_mem_cache() || has_disk_cache()); }
|
||||
bool mem_cache_available() const override { return is_initialized() && has_mem_cache(); }
|
||||
|
||||
void disk_spaces(std::vector<DirSpace>* spaces) const override;
|
||||
|
||||
size_t mem_quota() const override;
|
||||
size_t mem_usage() const override;
|
||||
|
||||
size_t lookup_count() const override;
|
||||
|
||||
size_t hit_count() const override;
|
||||
|
||||
const ObjectCacheMetrics metrics() const override;
|
||||
|
||||
Status prune() override;
|
||||
|
||||
private:
|
||||
void _refresh_quota();
|
||||
void _try_release_obj_handle(ObjectCacheHandlePtr handle);
|
||||
|
||||
std::shared_ptr<starcache::StarCache> _cache;
|
||||
std::unique_ptr<starcache::TimeBasedCacheAdaptor> _cache_adaptor;
|
||||
bool _enable_tiered_cache = false;
|
||||
bool _enable_datacache_persistence = false;
|
||||
std::atomic<bool> _initialized = false;
|
||||
|
||||
std::atomic<size_t> _mem_quota = 0;
|
||||
std::atomic<size_t> _disk_quota = 0;
|
||||
};
|
||||
} // namespace starrocks
|
||||
|
|
@ -529,11 +529,11 @@ void NullableColumn::put_mysql_row_buffer(MysqlRowBuffer* buf, size_t idx, bool
|
|||
}
|
||||
|
||||
void NullableColumn::check_or_die() const {
|
||||
DCHECK_EQ(_null_column->size(), _data_column->size());
|
||||
CHECK_EQ(_null_column->size(), _data_column->size());
|
||||
// when _has_null=true, the column may have no null value, so don't check.
|
||||
if (!_has_null) {
|
||||
auto null_data = _null_column->immutable_data();
|
||||
DCHECK(!SIMD::contain_nonzero(null_data, 0));
|
||||
CHECK(!SIMD::contain_nonzero(null_data, 0));
|
||||
}
|
||||
_data_column->check_or_die();
|
||||
_null_column->check_or_die();
|
||||
|
|
|
|||
|
|
@ -412,7 +412,7 @@ CONF_Bool(enable_event_based_compaction_framework, "true");
|
|||
CONF_Bool(enable_size_tiered_compaction_strategy, "true");
|
||||
CONF_mBool(enable_pk_size_tiered_compaction_strategy, "true");
|
||||
// Enable parallel execution within tablet for primary key tables.
|
||||
CONF_mBool(enable_pk_parallel_execution, "true");
|
||||
CONF_mBool(enable_pk_parallel_execution, "false");
|
||||
// The minimum threshold of data size for enabling pk parallel execution.
|
||||
// Default is 300MB.
|
||||
CONF_mInt64(pk_parallel_execution_threshold_bytes, "314572800");
|
||||
|
|
@ -1299,8 +1299,19 @@ CONF_Bool(datacache_block_buffer_enable, "true");
|
|||
// To control how many threads will be created for datacache synchronous tasks.
|
||||
// For the default value, it means for every 8 cpu, one thread will be created.
|
||||
CONF_Double(datacache_scheduler_threads_per_cpu, "0.125");
|
||||
// To control whether cache raw data both in memory and disk.
|
||||
// If true, the raw data will be written to the tiered cache composed of memory cache and disk cache,
|
||||
// and the memory cache hotter data than disk.
|
||||
// If false, the raw data will be written to disk directly and read from disk without promotion.
|
||||
// For object data, such as parquet footer object, which can only be cached in memory are not affected
|
||||
// by this configuration.
|
||||
CONF_Bool(datacache_tiered_cache_enable, "false");
|
||||
// Whether to persist cached data
|
||||
CONF_Bool(datacache_persistence_enable, "true");
|
||||
// DataCache engines, alternatives: starcache, lrucache
|
||||
// Set the default value empty to indicate whether it is manually configured by users.
|
||||
// If not, we need to adjust the default engine based on build switches like "WITH_STARCACHE".
|
||||
CONF_String_enum(datacache_engine, "", ",starcache,lrucache");
|
||||
// The interval time (millisecond) for agent report datacache metrics to FE.
|
||||
CONF_mInt32(report_datacache_metrics_interval_ms, "60000");
|
||||
|
||||
|
|
@ -1354,6 +1365,7 @@ CONF_Alias(datacache_block_size, block_cache_block_size);
|
|||
CONF_Alias(datacache_max_concurrent_inserts, block_cache_max_concurrent_inserts);
|
||||
CONF_Alias(datacache_checksum_enable, block_cache_checksum_enable);
|
||||
CONF_Alias(datacache_direct_io_enable, block_cache_direct_io_enable);
|
||||
CONF_Alias(datacache_engine, block_cache_engine);
|
||||
|
||||
CONF_mInt64(l0_l1_merge_ratio, "10");
|
||||
// max wal file size in l0
|
||||
|
|
@ -1604,7 +1616,7 @@ CONF_mBool(apply_del_vec_after_all_index_filter, "true");
|
|||
// connector sink memory watermark
|
||||
CONF_mDouble(connector_sink_mem_high_watermark_ratio, "0.3");
|
||||
CONF_mDouble(connector_sink_mem_low_watermark_ratio, "0.1");
|
||||
CONF_mDouble(connector_sink_mem_urgent_space_ratio, "0.05");
|
||||
CONF_mDouble(connector_sink_mem_urgent_space_ratio, "0.1");
|
||||
// Whether enable spill intermediate data for connector sink.
|
||||
CONF_mBool(enable_connector_sink_spill, "true");
|
||||
|
||||
|
|
|
|||
|
|
@ -340,7 +340,7 @@ void TEST_clear_configs();
|
|||
|
||||
template <>
|
||||
struct fmt::formatter<starrocks::config::MutableString> : formatter<std::string> {
|
||||
auto format(const starrocks::config::MutableString& s, format_context& ctx) const {
|
||||
auto format(const starrocks::config::MutableString& s, format_context& ctx) {
|
||||
return formatter<std::string>::format(s.value(), ctx);
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -44,13 +44,6 @@
|
|||
#ifdef USE_STAROS
|
||||
#include "fslib/star_cache_handler.h"
|
||||
#endif
|
||||
#include <fmt/ranges.h>
|
||||
|
||||
#include <csignal>
|
||||
// Need POSIX signal APIs like sigaction/siginfo_t.
|
||||
// NOLINTNEXTLINE(modernize-deprecated-headers)
|
||||
#include <signal.h>
|
||||
|
||||
#include "fs/encrypt_file.h"
|
||||
#include "gutil/cpu.h"
|
||||
#include "jemalloc/jemalloc.h"
|
||||
|
|
@ -171,12 +164,6 @@ struct JemallocStats {
|
|||
};
|
||||
|
||||
static void retrieve_jemalloc_stats(JemallocStats* stats) {
|
||||
// On macOS, jemalloc may define je_mallctl as mallctl via macro in jemalloc.h
|
||||
#ifdef __APPLE__
|
||||
#ifndef je_mallctl
|
||||
#define je_mallctl mallctl
|
||||
#endif
|
||||
#endif
|
||||
uint64_t epoch = 1;
|
||||
size_t sz = sizeof(epoch);
|
||||
je_mallctl("epoch", &epoch, &sz, &epoch, sz);
|
||||
|
|
|
|||
|
|
@ -18,12 +18,6 @@
|
|||
#include <glog/logging.h>
|
||||
#include <glog/vlog_is_on.h>
|
||||
#include <jemalloc/jemalloc.h>
|
||||
#ifdef __APPLE__
|
||||
#include <mach/mach_init.h>
|
||||
#include <mach/mach_port.h>
|
||||
#include <mach/thread_act.h>
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstdio>
|
||||
|
|
@ -33,7 +27,7 @@
|
|||
#include <mutex>
|
||||
|
||||
#include "cache/datacache.h"
|
||||
#include "cache/mem_cache/page_cache.h"
|
||||
#include "cache/object_cache/page_cache.h"
|
||||
#include "common/config.h"
|
||||
#include "gutil/endian.h"
|
||||
#include "gutil/stringprintf.h"
|
||||
|
|
@ -134,12 +128,7 @@ static void dontdump_unused_pages() {
|
|||
static bool start_dump = false;
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, nullptr);
|
||||
// On macOS, pthread_t is an opaque pointer; convert to a numeric id for fmt
|
||||
#ifdef __APPLE__
|
||||
uint64_t tid = static_cast<uint64_t>(pthread_mach_thread_np(pthread_self()));
|
||||
#else
|
||||
pthread_t tid = pthread_self();
|
||||
#endif
|
||||
const uint32_t MAX_BUFFER_SIZE = 1024;
|
||||
char buffer[MAX_BUFFER_SIZE] = {};
|
||||
// memory_buffer allocate 500 bytes from stack
|
||||
|
|
@ -147,13 +136,7 @@ static void dontdump_unused_pages() {
|
|||
if (!start_dump) {
|
||||
int res = snprintf(buffer, MAX_BUFFER_SIZE, "arena.%d.purge", MALLCTL_ARENAS_ALL);
|
||||
buffer[res] = '\0';
|
||||
int ret =
|
||||
#ifdef __APPLE__
|
||||
mallctl
|
||||
#else
|
||||
je_mallctl
|
||||
#endif
|
||||
(buffer, nullptr, nullptr, nullptr, 0);
|
||||
int ret = je_mallctl(buffer, nullptr, nullptr, nullptr, 0);
|
||||
|
||||
if (ret != 0) {
|
||||
FMT_LOG("je_mallctl execute purge failed, errno:{}", ret);
|
||||
|
|
@ -163,13 +146,7 @@ static void dontdump_unused_pages() {
|
|||
|
||||
res = snprintf(buffer, MAX_BUFFER_SIZE, "arena.%d.dontdump", MALLCTL_ARENAS_ALL);
|
||||
buffer[res] = '\0';
|
||||
ret =
|
||||
#ifdef __APPLE__
|
||||
mallctl
|
||||
#else
|
||||
je_mallctl
|
||||
#endif
|
||||
(buffer, nullptr, nullptr, nullptr, 0);
|
||||
ret = je_mallctl(buffer, nullptr, nullptr, nullptr, 0);
|
||||
|
||||
if (ret != 0) {
|
||||
FMT_LOG("je_mallctl execute dontdump failed, errno:{}", ret);
|
||||
|
|
@ -222,10 +199,8 @@ bool init_glog(const char* basename, bool install_signal_handler) {
|
|||
FLAGS_logbuflevel = 0;
|
||||
// Buffer log messages for at most this many seconds.
|
||||
FLAGS_logbufsecs = 30;
|
||||
// Set roll num. Not available with Homebrew glog on macOS.
|
||||
#ifndef __APPLE__
|
||||
// Set roll num.
|
||||
FLAGS_log_filenum_quota = config::sys_log_roll_num;
|
||||
#endif
|
||||
|
||||
// Set log level.
|
||||
std::string loglevel = config::sys_log_level;
|
||||
|
|
@ -255,19 +230,13 @@ bool init_glog(const char* basename, bool install_signal_handler) {
|
|||
std::string sizeflag = "SIZE-MB-";
|
||||
bool ok = false;
|
||||
if (rollmode.compare("TIME-DAY") == 0) {
|
||||
#ifndef __APPLE__
|
||||
FLAGS_log_split_method = "day";
|
||||
#endif
|
||||
ok = true;
|
||||
} else if (rollmode.compare("TIME-HOUR") == 0) {
|
||||
#ifndef __APPLE__
|
||||
FLAGS_log_split_method = "hour";
|
||||
#endif
|
||||
ok = true;
|
||||
} else if (rollmode.substr(0, sizeflag.length()).compare(sizeflag) == 0) {
|
||||
#ifndef __APPLE__
|
||||
FLAGS_log_split_method = "size";
|
||||
#endif
|
||||
std::string sizestr = rollmode.substr(sizeflag.size(), rollmode.size() - sizeflag.size());
|
||||
if (sizestr.size() != 0) {
|
||||
char* end = nullptr;
|
||||
|
|
@ -309,10 +278,7 @@ bool init_glog(const char* basename, bool install_signal_handler) {
|
|||
if (config::dump_trace_info) {
|
||||
google::InstallFailureWriter(failure_writer);
|
||||
google::InstallFailureFunction((google::logging_fail_func_t)failure_function);
|
||||
#ifndef MACOS_DISABLE_GLOG_STACKTRACE
|
||||
// This symbol may be unavailable on macOS builds using system glog.
|
||||
google::InstallFailureHandlerAfterOutputLog(failure_handler_after_output_log);
|
||||
#endif
|
||||
}
|
||||
|
||||
logging_initialized = true;
|
||||
|
|
|
|||
|
|
@ -27,45 +27,21 @@
|
|||
namespace starrocks {
|
||||
// detail implements for allocator
|
||||
static int set_jemalloc_profiling(bool enable) {
|
||||
int ret =
|
||||
#ifdef __APPLE__
|
||||
mallctl
|
||||
#else
|
||||
je_mallctl
|
||||
#endif
|
||||
("prof.active", nullptr, nullptr, &enable, 1);
|
||||
ret |=
|
||||
#ifdef __APPLE__
|
||||
mallctl
|
||||
#else
|
||||
je_mallctl
|
||||
#endif
|
||||
("prof.thread_active_init", nullptr, nullptr, &enable, 1);
|
||||
int ret = je_mallctl("prof.active", nullptr, nullptr, &enable, 1);
|
||||
ret |= je_mallctl("prof.thread_active_init", nullptr, nullptr, &enable, 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int has_enable_heap_profile() {
|
||||
int value = 0;
|
||||
size_t size = sizeof(value);
|
||||
|
||||
#ifdef __APPLE__
|
||||
mallctl
|
||||
#else
|
||||
je_mallctl
|
||||
#endif
|
||||
("prof.active", &value, &size, nullptr, 0);
|
||||
je_mallctl("prof.active", &value, &size, nullptr, 0);
|
||||
return value;
|
||||
}
|
||||
|
||||
bool dump_snapshot(const std::string& filename) {
|
||||
const char* fname = filename.c_str();
|
||||
return (
|
||||
#ifdef __APPLE__
|
||||
mallctl
|
||||
#else
|
||||
je_mallctl
|
||||
#endif
|
||||
("prof.dump", nullptr, nullptr, &fname, sizeof(const char*))) == 0;
|
||||
return je_mallctl("prof.dump", nullptr, nullptr, &fname, sizeof(const char*)) == 0;
|
||||
}
|
||||
|
||||
// declare exec from script
|
||||
|
|
@ -109,4 +85,4 @@ std::string HeapProf::to_dot_format(const std::string& heapdump_filename) {
|
|||
return exec(fmt::format("{} --dot {} {}", jeprof, binary, heapdump_filename));
|
||||
}
|
||||
|
||||
} // namespace starrocks
|
||||
} // namespace starrocks
|
||||
|
|
@ -41,8 +41,7 @@ Status ConnectorChunkSink::init() {
|
|||
}
|
||||
|
||||
Status ConnectorChunkSink::write_partition_chunk(const std::string& partition,
|
||||
const std::vector<int8_t>& partition_field_null_list,
|
||||
const ChunkPtr& chunk) {
|
||||
const std::vector<int8_t>& partition_field_null_list, Chunk* chunk) {
|
||||
// partition_field_null_list is used to distinguish with the secenario like NULL and string "null"
|
||||
// They are under the same dir path, but should not in the same data file.
|
||||
// We should record them in different files so that each data file could has its own meta info.
|
||||
|
|
@ -65,13 +64,13 @@ Status ConnectorChunkSink::write_partition_chunk(const std::string& partition,
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ConnectorChunkSink::add(const ChunkPtr& chunk) {
|
||||
Status ConnectorChunkSink::add(Chunk* chunk) {
|
||||
std::string partition = DEFAULT_PARTITION;
|
||||
bool partitioned = !_partition_column_names.empty();
|
||||
if (partitioned) {
|
||||
ASSIGN_OR_RETURN(partition,
|
||||
HiveUtils::make_partition_name(_partition_column_names, _partition_column_evaluators,
|
||||
chunk.get(), _support_null_partition));
|
||||
HiveUtils::make_partition_name(_partition_column_names, _partition_column_evaluators, chunk,
|
||||
_support_null_partition));
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(
|
||||
|
|
@ -80,13 +79,6 @@ Status ConnectorChunkSink::add(const ChunkPtr& chunk) {
|
|||
}
|
||||
|
||||
Status ConnectorChunkSink::finish() {
|
||||
// Flushing data to disk to make more memory space for subsequent merge operations.
|
||||
for (auto& [partition_key, writer] : _partition_chunk_writers) {
|
||||
RETURN_IF_ERROR(writer->flush());
|
||||
}
|
||||
for (auto& [partition_key, writer] : _partition_chunk_writers) {
|
||||
RETURN_IF_ERROR(writer->wait_flush());
|
||||
}
|
||||
for (auto& [partition_key, writer] : _partition_chunk_writers) {
|
||||
RETURN_IF_ERROR(writer->finish());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ public:
|
|||
|
||||
Status init();
|
||||
|
||||
virtual Status add(const ChunkPtr& chunk);
|
||||
virtual Status add(Chunk* chunk);
|
||||
|
||||
Status finish();
|
||||
|
||||
|
|
@ -58,7 +58,7 @@ public:
|
|||
virtual void callback_on_commit(const CommitResult& result) = 0;
|
||||
|
||||
Status write_partition_chunk(const std::string& partition, const vector<int8_t>& partition_field_null_list,
|
||||
const ChunkPtr& chunk);
|
||||
Chunk* chunk);
|
||||
|
||||
Status status();
|
||||
|
||||
|
|
|
|||
|
|
@ -50,16 +50,13 @@ int ConnectorSinkSpillExecutor::calc_max_thread_num() {
|
|||
}
|
||||
|
||||
void ChunkSpillTask::run() {
|
||||
SCOPED_THREAD_LOCAL_MEM_TRACKER_SETTER(_mem_tracker);
|
||||
auto res = _load_chunk_spiller->spill(*_chunk);
|
||||
if (_cb) {
|
||||
_cb(_chunk, res);
|
||||
}
|
||||
_chunk.reset();
|
||||
}
|
||||
|
||||
void MergeBlockTask::run() {
|
||||
SCOPED_THREAD_LOCAL_MEM_TRACKER_SETTER(_mem_tracker);
|
||||
auto st = _writer->merge_blocks();
|
||||
if (_cb) {
|
||||
_cb(st);
|
||||
|
|
|
|||
|
|
@ -71,12 +71,9 @@ protected:
|
|||
|
||||
class ChunkSpillTask final : public Runnable {
|
||||
public:
|
||||
ChunkSpillTask(LoadChunkSpiller* load_chunk_spiller, ChunkPtr chunk, MemTracker* mem_tracker,
|
||||
ChunkSpillTask(LoadChunkSpiller* load_chunk_spiller, ChunkPtr chunk,
|
||||
std::function<void(ChunkPtr chunk, const StatusOr<size_t>&)> cb)
|
||||
: _load_chunk_spiller(load_chunk_spiller),
|
||||
_chunk(std::move(chunk)),
|
||||
_mem_tracker(mem_tracker),
|
||||
_cb(std::move(cb)) {}
|
||||
: _load_chunk_spiller(load_chunk_spiller), _chunk(chunk), _cb(std::move(cb)) {}
|
||||
|
||||
~ChunkSpillTask() override = default;
|
||||
|
||||
|
|
@ -85,20 +82,18 @@ public:
|
|||
private:
|
||||
LoadChunkSpiller* _load_chunk_spiller;
|
||||
ChunkPtr _chunk;
|
||||
MemTracker* _mem_tracker;
|
||||
std::function<void(ChunkPtr, const StatusOr<size_t>&)> _cb;
|
||||
};
|
||||
|
||||
class MergeBlockTask : public Runnable {
|
||||
public:
|
||||
MergeBlockTask(SpillPartitionChunkWriter* writer, MemTracker* mem_tracker, std::function<void(const Status&)> cb)
|
||||
: _writer(writer), _mem_tracker(mem_tracker), _cb(std::move(cb)) {}
|
||||
MergeBlockTask(SpillPartitionChunkWriter* writer, std::function<void(const Status&)> cb)
|
||||
: _writer(writer), _cb(std::move(cb)) {}
|
||||
|
||||
void run() override;
|
||||
|
||||
private:
|
||||
SpillPartitionChunkWriter* _writer;
|
||||
MemTracker* _mem_tracker;
|
||||
std::function<void(const Status&)> _cb;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -56,9 +56,8 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> FileChunkSinkProvider::create_chun
|
|||
std::shared_ptr<formats::FileWriterFactory> file_writer_factory;
|
||||
if (boost::iequals(ctx->format, formats::PARQUET)) {
|
||||
file_writer_factory = std::make_shared<formats::ParquetFileWriterFactory>(
|
||||
fs, ctx->compression_type, ctx->options, ctx->column_names,
|
||||
std::make_shared<std::vector<std::unique_ptr<ColumnEvaluator>>>(std::move(column_evaluators)),
|
||||
std::nullopt, ctx->executor, runtime_state);
|
||||
fs, ctx->compression_type, ctx->options, ctx->column_names, std::move(column_evaluators), std::nullopt,
|
||||
ctx->executor, runtime_state);
|
||||
} else if (boost::iequals(ctx->format, formats::ORC)) {
|
||||
file_writer_factory = std::make_shared<formats::ORCFileWriterFactory>(
|
||||
fs, ctx->compression_type, ctx->options, ctx->column_names, std::move(column_evaluators), ctx->executor,
|
||||
|
|
|
|||
|
|
@ -66,8 +66,7 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> HiveChunkSinkProvider::create_chun
|
|||
ctx->options[formats::ParquetWriterOptions::USE_LEGACY_DECIMAL_ENCODING] = "true";
|
||||
ctx->options[formats::ParquetWriterOptions::USE_INT96_TIMESTAMP_ENCODING] = "true";
|
||||
file_writer_factory = std::make_shared<formats::ParquetFileWriterFactory>(
|
||||
fs, ctx->compression_type, ctx->options, ctx->data_column_names,
|
||||
std::make_shared<std::vector<std::unique_ptr<ColumnEvaluator>>>(std::move(data_column_evaluators)),
|
||||
fs, ctx->compression_type, ctx->options, ctx->data_column_names, std::move(data_column_evaluators),
|
||||
std::nullopt, ctx->executor, runtime_state);
|
||||
} else if (boost::iequals(ctx->format, formats::ORC)) {
|
||||
file_writer_factory = std::make_shared<formats::ORCFileWriterFactory>(
|
||||
|
|
|
|||
|
|
@ -82,8 +82,7 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> IcebergChunkSinkProvider::create_c
|
|||
auto ctx = std::dynamic_pointer_cast<IcebergChunkSinkContext>(context);
|
||||
auto runtime_state = ctx->fragment_context->runtime_state();
|
||||
std::shared_ptr<FileSystem> fs = FileSystem::CreateUniqueFromString(ctx->path, FSOptions(&ctx->cloud_conf)).value();
|
||||
auto column_evaluators = std::make_shared<std::vector<std::unique_ptr<ColumnEvaluator>>>(
|
||||
ColumnEvaluator::clone(ctx->column_evaluators));
|
||||
auto column_evaluators = ColumnEvaluator::clone(ctx->column_evaluators);
|
||||
auto location_provider = std::make_shared<connector::LocationProvider>(
|
||||
ctx->path, print_id(ctx->fragment_context->query_id()), runtime_state->be_number(), driver_id,
|
||||
boost::to_lower_copy(ctx->format));
|
||||
|
|
@ -94,8 +93,8 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> IcebergChunkSinkProvider::create_c
|
|||
std::shared_ptr<formats::FileWriterFactory> file_writer_factory;
|
||||
if (boost::iequals(ctx->format, formats::PARQUET)) {
|
||||
file_writer_factory = std::make_shared<formats::ParquetFileWriterFactory>(
|
||||
fs, ctx->compression_type, ctx->options, ctx->column_names, column_evaluators, ctx->parquet_field_ids,
|
||||
ctx->executor, runtime_state);
|
||||
fs, ctx->compression_type, ctx->options, ctx->column_names, std::move(column_evaluators),
|
||||
ctx->parquet_field_ids, ctx->executor, runtime_state);
|
||||
} else {
|
||||
file_writer_factory = std::make_shared<formats::UnknownFileWriterFactory>(ctx->format);
|
||||
}
|
||||
|
|
@ -108,7 +107,7 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> IcebergChunkSinkProvider::create_c
|
|||
fs,
|
||||
ctx->fragment_context,
|
||||
runtime_state->desc_tbl().get_tuple_descriptor(ctx->tuple_desc_id),
|
||||
column_evaluators,
|
||||
&ctx->column_evaluators,
|
||||
ctx->sort_ordering});
|
||||
partition_chunk_writer_factory = std::make_unique<SpillPartitionChunkWriterFactory>(partition_chunk_writer_ctx);
|
||||
} else {
|
||||
|
|
@ -124,14 +123,14 @@ StatusOr<std::unique_ptr<ConnectorChunkSink>> IcebergChunkSinkProvider::create_c
|
|||
std::move(partition_chunk_writer_factory), runtime_state);
|
||||
}
|
||||
|
||||
Status IcebergChunkSink::add(const ChunkPtr& chunk) {
|
||||
Status IcebergChunkSink::add(Chunk* chunk) {
|
||||
std::string partition = DEFAULT_PARTITION;
|
||||
bool partitioned = !_partition_column_names.empty();
|
||||
std::vector<int8_t> partition_field_null_list;
|
||||
if (partitioned) {
|
||||
ASSIGN_OR_RETURN(partition, HiveUtils::iceberg_make_partition_name(
|
||||
_partition_column_names, _partition_column_evaluators,
|
||||
dynamic_cast<IcebergChunkSink*>(this)->transform_expr(), chunk.get(),
|
||||
dynamic_cast<IcebergChunkSink*>(this)->transform_expr(), chunk,
|
||||
_support_null_partition, partition_field_null_list));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ public:
|
|||
|
||||
const std::vector<std::string>& transform_expr() const { return _transform_exprs; }
|
||||
|
||||
Status add(const ChunkPtr& chunk) override;
|
||||
Status add(Chunk* chunk) override;
|
||||
|
||||
private:
|
||||
std::vector<std::string> _transform_exprs;
|
||||
|
|
|
|||
|
|
@ -625,7 +625,7 @@ void LakeDataSource::init_counter(RuntimeState* state) {
|
|||
ADD_CHILD_COUNTER(_runtime_profile, "ShortKeyRangeNumber", TUnit::UNIT, segment_init_name);
|
||||
_column_iterator_init_timer = ADD_CHILD_TIMER(_runtime_profile, "ColumnIteratorInit", segment_init_name);
|
||||
_bitmap_index_iterator_init_timer = ADD_CHILD_TIMER(_runtime_profile, "BitmapIndexIteratorInit", segment_init_name);
|
||||
_zone_map_filter_timer = ADD_CHILD_TIMER(_runtime_profile, "ZoneMapIndexFilter", segment_init_name);
|
||||
_zone_map_filter_timer = ADD_CHILD_TIMER(_runtime_profile, "ZoneMapIndexFiter", segment_init_name);
|
||||
_rows_key_range_filter_timer = ADD_CHILD_TIMER(_runtime_profile, "ShortKeyFilter", segment_init_name);
|
||||
_bf_filter_timer = ADD_CHILD_TIMER(_runtime_profile, "BloomFilterFilter", segment_init_name);
|
||||
|
||||
|
|
|
|||
|
|
@ -72,12 +72,12 @@ Status BufferPartitionChunkWriter::init() {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status BufferPartitionChunkWriter::write(const ChunkPtr& chunk) {
|
||||
if (_file_writer && _file_writer->get_written_bytes() >= _max_file_size) {
|
||||
Status BufferPartitionChunkWriter::write(Chunk* chunk) {
|
||||
RETURN_IF_ERROR(create_file_writer_if_needed());
|
||||
if (_file_writer->get_written_bytes() >= _max_file_size) {
|
||||
commit_file();
|
||||
}
|
||||
RETURN_IF_ERROR(create_file_writer_if_needed());
|
||||
return _file_writer->write(chunk.get());
|
||||
return _file_writer->write(chunk);
|
||||
}
|
||||
|
||||
Status BufferPartitionChunkWriter::flush() {
|
||||
|
|
@ -85,10 +85,6 @@ Status BufferPartitionChunkWriter::flush() {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status BufferPartitionChunkWriter::wait_flush() {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status BufferPartitionChunkWriter::finish() {
|
||||
commit_file();
|
||||
return Status::OK();
|
||||
|
|
@ -106,7 +102,6 @@ SpillPartitionChunkWriter::SpillPartitionChunkWriter(std::string partition,
|
|||
_block_merge_token = StorageEngine::instance()->load_spill_block_merge_executor()->create_token();
|
||||
_tuple_desc = ctx->tuple_desc;
|
||||
_writer_id = generate_uuid();
|
||||
_spill_mode = _sort_ordering != nullptr;
|
||||
}
|
||||
|
||||
SpillPartitionChunkWriter::~SpillPartitionChunkWriter() {
|
||||
|
|
@ -125,16 +120,16 @@ Status SpillPartitionChunkWriter::init() {
|
|||
RETURN_IF_ERROR(_load_spill_block_mgr->init());
|
||||
_load_chunk_spiller = std::make_unique<LoadChunkSpiller>(_load_spill_block_mgr.get(),
|
||||
_fragment_context->runtime_state()->runtime_profile());
|
||||
if (_column_evaluators) {
|
||||
RETURN_IF_ERROR(ColumnEvaluator::init(*_column_evaluators));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SpillPartitionChunkWriter::write(const ChunkPtr& chunk) {
|
||||
Status SpillPartitionChunkWriter::write(Chunk* chunk) {
|
||||
RETURN_IF_ERROR(create_file_writer_if_needed());
|
||||
if (!_spill_mode) {
|
||||
return _write_chunk(chunk.get());
|
||||
}
|
||||
|
||||
_chunks.push_back(chunk);
|
||||
_chunks.push_back(chunk->clone_unique());
|
||||
_chunk_bytes_usage += chunk->bytes_usage();
|
||||
if (!_base_chunk) {
|
||||
_base_chunk = _chunks.back();
|
||||
|
|
@ -154,21 +149,11 @@ Status SpillPartitionChunkWriter::write(const ChunkPtr& chunk) {
|
|||
|
||||
Status SpillPartitionChunkWriter::flush() {
|
||||
RETURN_IF(!_file_writer, Status::OK());
|
||||
// Change to spill mode if memory is insufficent.
|
||||
if (!_spill_mode) {
|
||||
_spill_mode = true;
|
||||
commit_file();
|
||||
return Status::OK();
|
||||
}
|
||||
return _spill();
|
||||
}
|
||||
|
||||
Status SpillPartitionChunkWriter::wait_flush() {
|
||||
_chunk_spill_token->wait();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SpillPartitionChunkWriter::finish() {
|
||||
_chunk_spill_token->wait();
|
||||
// If no chunks have been spilled, flush data to remote file directly.
|
||||
if (_load_chunk_spiller->empty()) {
|
||||
VLOG(2) << "flush to remote directly when finish, query_id: " << print_id(_fragment_context->query_id())
|
||||
|
|
@ -184,8 +169,7 @@ Status SpillPartitionChunkWriter::finish() {
|
|||
_handle_err(st);
|
||||
commit_file();
|
||||
};
|
||||
auto merge_task = std::make_shared<MergeBlockTask>(this, _fragment_context->runtime_state()->instance_mem_tracker(),
|
||||
std::move(cb));
|
||||
auto merge_task = std::make_shared<MergeBlockTask>(this, cb);
|
||||
return _block_merge_token->submit(merge_task);
|
||||
}
|
||||
|
||||
|
|
@ -198,7 +182,9 @@ bool SpillPartitionChunkWriter::is_finished() {
|
|||
}
|
||||
|
||||
Status SpillPartitionChunkWriter::merge_blocks() {
|
||||
RETURN_IF_ERROR(flush());
|
||||
_chunk_spill_token->wait();
|
||||
|
||||
auto write_func = [this](Chunk* chunk) { return _flush_chunk(chunk, false); };
|
||||
auto flush_func = [this]() {
|
||||
// Commit file after each merge function to ensure the data written to one file is ordered,
|
||||
|
|
@ -253,13 +239,10 @@ Status SpillPartitionChunkWriter::_spill() {
|
|||
}
|
||||
_spilling_bytes_usage.fetch_sub(chunk->bytes_usage(), std::memory_order_relaxed);
|
||||
};
|
||||
auto spill_task = std::make_shared<ChunkSpillTask>(_load_chunk_spiller.get(), _result_chunk,
|
||||
_fragment_context->runtime_state()->instance_mem_tracker(),
|
||||
std::move(callback));
|
||||
auto spill_task = std::make_shared<ChunkSpillTask>(_load_chunk_spiller.get(), _result_chunk, callback);
|
||||
RETURN_IF_ERROR(_chunk_spill_token->submit(spill_task));
|
||||
_spilling_bytes_usage.fetch_add(_result_chunk->bytes_usage(), std::memory_order_relaxed);
|
||||
_chunk_bytes_usage = 0;
|
||||
_result_chunk.reset();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
@ -347,6 +330,7 @@ Status SpillPartitionChunkWriter::_merge_chunks() {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
chunk.reset();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ struct SpillPartitionChunkWriterContext : public PartitionChunkWriterContext {
|
|||
std::shared_ptr<FileSystem> fs;
|
||||
pipeline::FragmentContext* fragment_context = nullptr;
|
||||
TupleDescriptor* tuple_desc = nullptr;
|
||||
std::shared_ptr<std::vector<std::unique_ptr<ColumnEvaluator>>> column_evaluators;
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>>* column_evaluators;
|
||||
std::shared_ptr<SortOrdering> sort_ordering;
|
||||
};
|
||||
|
||||
|
|
@ -63,12 +63,10 @@ public:
|
|||
|
||||
virtual Status init() = 0;
|
||||
|
||||
virtual Status write(const ChunkPtr& chunk) = 0;
|
||||
virtual Status write(Chunk* chunk) = 0;
|
||||
|
||||
virtual Status flush() = 0;
|
||||
|
||||
virtual Status wait_flush() = 0;
|
||||
|
||||
virtual Status finish() = 0;
|
||||
|
||||
virtual bool is_finished() = 0;
|
||||
|
|
@ -120,12 +118,10 @@ public:
|
|||
|
||||
Status init() override;
|
||||
|
||||
Status write(const ChunkPtr& chunk) override;
|
||||
Status write(Chunk* chunk) override;
|
||||
|
||||
Status flush() override;
|
||||
|
||||
Status wait_flush() override;
|
||||
|
||||
Status finish() override;
|
||||
|
||||
bool is_finished() override { return true; }
|
||||
|
|
@ -144,12 +140,10 @@ public:
|
|||
|
||||
Status init() override;
|
||||
|
||||
Status write(const ChunkPtr& chunk) override;
|
||||
Status write(Chunk* chunk) override;
|
||||
|
||||
Status flush() override;
|
||||
|
||||
Status wait_flush() override;
|
||||
|
||||
Status finish() override;
|
||||
|
||||
bool is_finished() override;
|
||||
|
|
@ -162,12 +156,7 @@ public:
|
|||
_file_writer->get_written_bytes();
|
||||
}
|
||||
|
||||
int64_t get_flushable_bytes() override {
|
||||
if (!_spill_mode) {
|
||||
return _file_writer ? _file_writer->get_written_bytes() : 0;
|
||||
}
|
||||
return _chunk_bytes_usage;
|
||||
}
|
||||
int64_t get_flushable_bytes() override { return _chunk_bytes_usage; }
|
||||
|
||||
Status merge_blocks();
|
||||
|
||||
|
|
@ -196,12 +185,13 @@ private:
|
|||
std::shared_ptr<FileSystem> _fs = nullptr;
|
||||
pipeline::FragmentContext* _fragment_context = nullptr;
|
||||
TupleDescriptor* _tuple_desc = nullptr;
|
||||
std::shared_ptr<std::vector<std::unique_ptr<ColumnEvaluator>>> _column_evaluators;
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>>* _column_evaluators;
|
||||
std::shared_ptr<SortOrdering> _sort_ordering;
|
||||
std::unique_ptr<ThreadPoolToken> _chunk_spill_token;
|
||||
std::unique_ptr<ThreadPoolToken> _block_merge_token;
|
||||
std::unique_ptr<LoadSpillBlockManager> _load_spill_block_mgr;
|
||||
std::shared_ptr<LoadChunkSpiller> _load_chunk_spiller;
|
||||
//std::function<StatusOr<ColumnPtr>(Chunk*, size_t)> _column_eval_func;
|
||||
TUniqueId _writer_id;
|
||||
|
||||
std::list<ChunkPtr> _chunks;
|
||||
|
|
@ -211,7 +201,6 @@ private:
|
|||
ChunkPtr _base_chunk;
|
||||
SchemaPtr _schema;
|
||||
std::unordered_map<int, int> _col_index_map; // result chunk index -> chunk index
|
||||
bool _spill_mode = false;
|
||||
|
||||
static const int64_t kWaitMilliseconds;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -51,9 +51,8 @@ bool SinkOperatorMemoryManager::kill_victim() {
|
|||
// The flush will decrease the writer flushable memory bytes, so it usually
|
||||
// will not be choosed in a short time.
|
||||
const auto filename = victim->out_stream()->filename();
|
||||
size_t flush_bytes = victim->get_flushable_bytes();
|
||||
const auto result = victim->flush();
|
||||
LOG(INFO) << "kill victim: " << filename << ", result: " << result << ", flushable_bytes: " << flush_bytes;
|
||||
LOG(INFO) << "kill victim: " << filename << ", result: " << result;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -119,7 +118,7 @@ bool SinkMemoryManager::_apply_on_mem_tracker(SinkOperatorMemoryManager* child_m
|
|||
|
||||
auto available_memory = [&]() { return mem_tracker->limit() - mem_tracker->consumption(); };
|
||||
auto low_watermark = static_cast<int64_t>(mem_tracker->limit() * _low_watermark_ratio);
|
||||
int64_t flush_watermark = mem_tracker->limit() * _urgent_space_ratio;
|
||||
int64_t flush_watermark = _query_tracker->limit() * _urgent_space_ratio;
|
||||
while (available_memory() <= low_watermark) {
|
||||
child_manager->update_writer_occupied_memory();
|
||||
int64_t total_occupied_memory = _total_writer_occupied_memory();
|
||||
|
|
|
|||
|
|
@ -556,13 +556,7 @@ Status HashJoiner::_create_runtime_in_filters(RuntimeState* state) {
|
|||
SCOPED_TIMER(build_metrics().build_runtime_filter_timer);
|
||||
size_t ht_row_count = get_ht_row_count();
|
||||
|
||||
// Use FE session variable if set, otherwise fall back to BE config
|
||||
size_t max_conditions = config::max_pushdown_conditions_per_column;
|
||||
if (state->query_options().__isset.max_pushdown_conditions_per_column) {
|
||||
max_conditions = state->query_options().max_pushdown_conditions_per_column;
|
||||
}
|
||||
|
||||
if (ht_row_count > max_conditions) {
|
||||
if (ht_row_count > config::max_pushdown_conditions_per_column) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
#include "exec/hdfs_scanner/hdfs_scanner.h"
|
||||
|
||||
#include "cache/disk_cache/block_cache_hit_rate_counter.hpp"
|
||||
#include "cache/block_cache/block_cache_hit_rate_counter.hpp"
|
||||
#include "column/column_helper.h"
|
||||
#include "column/type_traits.h"
|
||||
#include "connector/deletion_vector/deletion_vector.h"
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@
|
|||
#include <atomic>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
#include "cache/cache_options.h"
|
||||
#include "connector/deletion_vector/deletion_bitmap.h"
|
||||
#include "exec/olap_scan_prepare.h"
|
||||
#include "exec/pipeline/scan/morsel.h"
|
||||
|
|
|
|||
|
|
@ -80,19 +80,12 @@ Status ChunkSource::buffer_next_batch_chunks_blocking(RuntimeState* state, size_
|
|||
if (_status.is_end_of_file()) {
|
||||
chunk->owner_info().set_owner_id(owner_id, true);
|
||||
_chunk_buffer.put(_scan_operator_seq, std::move(chunk), std::move(_chunk_token));
|
||||
break;
|
||||
} else if (_status.is_time_out()) {
|
||||
chunk->owner_info().set_owner_id(owner_id, false);
|
||||
_chunk_buffer.put(_scan_operator_seq, std::move(chunk), std::move(_chunk_token));
|
||||
_status = Status::OK();
|
||||
break;
|
||||
} else if (_status.is_eagain()) {
|
||||
// EAGAIN is normal case, but sleep a while to avoid busy loop
|
||||
SleepFor(MonoDelta::FromNanoseconds(workgroup::WorkGroup::YIELD_PREEMPT_MAX_TIME_SPENT));
|
||||
_status = Status::OK();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// schema won't be used by the computing layer, here we just reset it.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
#include "exec/connector_scan_node.h"
|
||||
#include "exec/pipeline/pipeline_driver.h"
|
||||
#include "exec/pipeline/scan/balanced_chunk_buffer.h"
|
||||
#include "runtime/exec_env.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
|
||||
namespace starrocks::pipeline {
|
||||
|
|
@ -762,7 +763,7 @@ Status ConnectorChunkSource::_read_chunk(RuntimeState* state, ChunkPtr* chunk) {
|
|||
RETURN_IF_ERROR(_open_data_source(state, &mem_alloc_failed));
|
||||
if (mem_alloc_failed) {
|
||||
_mem_alloc_failed_count += 1;
|
||||
return Status::EAgain("");
|
||||
return Status::TimedOut("");
|
||||
}
|
||||
if (state->is_cancelled()) {
|
||||
return Status::Cancelled("canceled state");
|
||||
|
|
|
|||
|
|
@ -178,7 +178,7 @@ void OlapChunkSource::_init_counter(RuntimeState* state) {
|
|||
ADD_CHILD_COUNTER(_runtime_profile, "RemainingRowsAfterShortKeyFilter", TUnit::UNIT, segment_init_name);
|
||||
_column_iterator_init_timer = ADD_CHILD_TIMER(_runtime_profile, "ColumnIteratorInit", segment_init_name);
|
||||
_bitmap_index_iterator_init_timer = ADD_CHILD_TIMER(_runtime_profile, "BitmapIndexIteratorInit", segment_init_name);
|
||||
_zone_map_filter_timer = ADD_CHILD_TIMER(_runtime_profile, "ZoneMapIndexFilter", segment_init_name);
|
||||
_zone_map_filter_timer = ADD_CHILD_TIMER(_runtime_profile, "ZoneMapIndexFiter", segment_init_name);
|
||||
_rows_key_range_filter_timer = ADD_CHILD_TIMER(_runtime_profile, "ShortKeyFilter", segment_init_name);
|
||||
_rows_key_range_counter =
|
||||
ADD_CHILD_COUNTER(_runtime_profile, "ShortKeyRangeNumber", TUnit::UNIT, segment_init_name);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,110 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "exec/pipeline/scan/olap_meta_scan_operator.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "exec/olap_meta_scanner.h"
|
||||
#include "exec/pipeline/scan/olap_meta_chunk_source.h"
|
||||
#include "exec/pipeline/scan/olap_meta_scan_context.h"
|
||||
|
||||
namespace starrocks::pipeline {
|
||||
|
||||
OlapMetaScanOperatorFactory::OlapMetaScanOperatorFactory(int32_t id, ScanNode* meta_scan_node, size_t dop,
|
||||
std::shared_ptr<OlapMetaScanContextFactory> ctx_factory)
|
||||
: ScanOperatorFactory(id, meta_scan_node), _ctx_factory(std::move(ctx_factory)) {}
|
||||
|
||||
Status OlapMetaScanOperatorFactory::do_prepare(RuntimeState* state) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void OlapMetaScanOperatorFactory::do_close(RuntimeState* state) {}
|
||||
|
||||
OperatorPtr OlapMetaScanOperatorFactory::do_create(int32_t dop, int32_t driver_sequence) {
|
||||
return std::make_shared<OlapMetaScanOperator>(this, _id, driver_sequence, dop, _scan_node,
|
||||
_ctx_factory->get_or_create(driver_sequence));
|
||||
}
|
||||
|
||||
OlapMetaScanOperator::OlapMetaScanOperator(OperatorFactory* factory, int32_t id, int32_t driver_sequence, int32_t dop,
|
||||
ScanNode* meta_scan_node, OlapMetaScanContextPtr ctx)
|
||||
: ScanOperator(factory, id, driver_sequence, dop, meta_scan_node), _ctx(std::move(ctx)) {}
|
||||
|
||||
OlapMetaScanOperator::~OlapMetaScanOperator() = default;
|
||||
|
||||
bool OlapMetaScanOperator::has_output() const {
|
||||
if (!_ctx->is_prepare_finished()) {
|
||||
return false;
|
||||
}
|
||||
return ScanOperator::has_output();
|
||||
}
|
||||
|
||||
bool OlapMetaScanOperator::is_finished() const {
|
||||
if (!_ctx->is_prepare_finished()) {
|
||||
return false;
|
||||
}
|
||||
return ScanOperator::is_finished();
|
||||
}
|
||||
|
||||
Status OlapMetaScanOperator::do_prepare(RuntimeState* state) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void OlapMetaScanOperator::do_close(RuntimeState* state) {}
|
||||
|
||||
ChunkSourcePtr OlapMetaScanOperator::create_chunk_source(MorselPtr morsel, int32_t chunk_source_index) {
|
||||
return std::make_shared<OlapMetaChunkSource>(this, _runtime_profile.get(), std::move(morsel), _ctx);
|
||||
}
|
||||
|
||||
ChunkPtr OlapMetaScanOperator::get_chunk_from_buffer() {
|
||||
ChunkPtr chunk = nullptr;
|
||||
if (_ctx->get_chunk_buffer().try_get(_driver_sequence, &chunk)) {
|
||||
return chunk;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
size_t OlapMetaScanOperator::num_buffered_chunks() const {
|
||||
return _ctx->get_chunk_buffer().size(_driver_sequence);
|
||||
}
|
||||
|
||||
size_t OlapMetaScanOperator::buffer_size() const {
|
||||
return _ctx->get_chunk_buffer().limiter()->size();
|
||||
}
|
||||
|
||||
size_t OlapMetaScanOperator::buffer_capacity() const {
|
||||
return _ctx->get_chunk_buffer().limiter()->capacity();
|
||||
}
|
||||
|
||||
size_t OlapMetaScanOperator::buffer_memory_usage() const {
|
||||
return _ctx->get_chunk_buffer().memory_usage();
|
||||
}
|
||||
|
||||
size_t OlapMetaScanOperator::default_buffer_capacity() const {
|
||||
return _ctx->get_chunk_buffer().limiter()->default_capacity();
|
||||
}
|
||||
|
||||
ChunkBufferTokenPtr OlapMetaScanOperator::pin_chunk(int num_chunks) {
|
||||
return _ctx->get_chunk_buffer().limiter()->pin(num_chunks);
|
||||
}
|
||||
|
||||
bool OlapMetaScanOperator::is_buffer_full() const {
|
||||
return _ctx->get_chunk_buffer().limiter()->is_full();
|
||||
}
|
||||
|
||||
void OlapMetaScanOperator::set_buffer_finished() {
|
||||
_ctx->get_chunk_buffer().set_finished(_driver_sequence);
|
||||
}
|
||||
|
||||
} // namespace starrocks::pipeline
|
||||
|
|
@ -0,0 +1,73 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "exec/olap_meta_scan_node.h"
|
||||
#include "exec/pipeline/pipeline_builder.h"
|
||||
#include "exec/pipeline/scan/balanced_chunk_buffer.h"
|
||||
#include "exec/pipeline/scan/olap_meta_scan_context.h"
|
||||
#include "exec/pipeline/scan/scan_operator.h"
|
||||
#include "gen_cpp/Types_types.h"
|
||||
|
||||
namespace starrocks::pipeline {
|
||||
|
||||
class OlapMetaScanOperatorFactory final : public ScanOperatorFactory {
|
||||
public:
|
||||
OlapMetaScanOperatorFactory(int32_t id, ScanNode* meta_scan_node, size_t dop,
|
||||
std::shared_ptr<OlapMetaScanContextFactory> ctx_factory);
|
||||
|
||||
~OlapMetaScanOperatorFactory() override = default;
|
||||
|
||||
bool with_morsels() const override { return true; }
|
||||
|
||||
Status do_prepare(RuntimeState* state) override;
|
||||
void do_close(RuntimeState* state) override;
|
||||
OperatorPtr do_create(int32_t dop, int32_t driver_sequence) override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<OlapMetaScanContextFactory> _ctx_factory;
|
||||
};
|
||||
|
||||
class OlapMetaScanOperator final : public ScanOperator {
|
||||
public:
|
||||
OlapMetaScanOperator(OperatorFactory* factory, int32_t id, int32_t driver_sequence, int32_t dop,
|
||||
ScanNode* scan_node, OlapMetaScanContextPtr scan_ctx);
|
||||
|
||||
~OlapMetaScanOperator() override;
|
||||
|
||||
bool has_output() const override;
|
||||
bool is_finished() const override;
|
||||
|
||||
Status do_prepare(RuntimeState* state) override;
|
||||
void do_close(RuntimeState* state) override;
|
||||
ChunkSourcePtr create_chunk_source(MorselPtr morsel, int32_t chunk_source_index) override;
|
||||
|
||||
private:
|
||||
void attach_chunk_source(int32_t source_index) override {}
|
||||
void detach_chunk_source(int32_t source_index) override {}
|
||||
bool has_shared_chunk_source() const override { return false; }
|
||||
ChunkPtr get_chunk_from_buffer() override;
|
||||
size_t num_buffered_chunks() const override;
|
||||
size_t buffer_size() const override;
|
||||
size_t buffer_capacity() const override;
|
||||
size_t buffer_memory_usage() const override;
|
||||
size_t default_buffer_capacity() const override;
|
||||
ChunkBufferTokenPtr pin_chunk(int num_chunks) override;
|
||||
bool is_buffer_full() const override;
|
||||
void set_buffer_finished() override;
|
||||
|
||||
OlapMetaScanContextPtr _ctx;
|
||||
};
|
||||
} // namespace starrocks::pipeline
|
||||
|
|
@ -109,7 +109,7 @@ StatusOr<ChunkPtr> ConnectorSinkOperator::pull_chunk(RuntimeState* state) {
|
|||
}
|
||||
|
||||
Status ConnectorSinkOperator::push_chunk(RuntimeState* state, const ChunkPtr& chunk) {
|
||||
RETURN_IF_ERROR(_connector_chunk_sink->add(chunk));
|
||||
RETURN_IF_ERROR(_connector_chunk_sink->add(chunk.get()));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -18,14 +18,6 @@
|
|||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
// On macOS, system headers may define a macro named current_task(),
|
||||
// which conflicts with the method name below. Undefine to avoid collisions.
|
||||
#ifdef __APPLE__
|
||||
#ifdef current_task
|
||||
#undef current_task
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "column/vectorized_fwd.h"
|
||||
#include "common/statusor.h"
|
||||
#include "exec/spill/spiller.h"
|
||||
|
|
|
|||
|
|
@ -94,19 +94,17 @@ Status ScanNode::prepare(RuntimeState* state) {
|
|||
}
|
||||
|
||||
// Distribute morsels from a single queue to multiple queues
|
||||
static StatusOr<std::map<int, pipeline::MorselQueuePtr>> uniform_distribute_morsels(
|
||||
pipeline::MorselQueuePtr morsel_queue, int dop) {
|
||||
std::map<int, pipeline::MorselQueuePtr> queue_per_driver;
|
||||
static std::map<int, pipeline::MorselQueuePtr> uniform_distribute_morsels(pipeline::MorselQueuePtr morsel_queue,
|
||||
int dop) {
|
||||
std::map<int, pipeline::Morsels> morsels_per_driver;
|
||||
int driver_seq = 0;
|
||||
while (!morsel_queue->empty()) {
|
||||
auto maybe_morsel_status_or = morsel_queue->try_get();
|
||||
if (UNLIKELY(!maybe_morsel_status_or.ok())) {
|
||||
return maybe_morsel_status_or.status();
|
||||
}
|
||||
morsels_per_driver[driver_seq].push_back(std::move(maybe_morsel_status_or.value()));
|
||||
auto maybe_morsel = morsel_queue->try_get();
|
||||
DCHECK(maybe_morsel.ok());
|
||||
morsels_per_driver[driver_seq].push_back(std::move(maybe_morsel.value()));
|
||||
driver_seq = (driver_seq + 1) % dop;
|
||||
}
|
||||
std::map<int, pipeline::MorselQueuePtr> queue_per_driver;
|
||||
|
||||
auto morsel_queue_type = morsel_queue->type();
|
||||
DCHECK(morsel_queue_type == pipeline::MorselQueue::Type::FIXED ||
|
||||
|
|
@ -146,7 +144,7 @@ StatusOr<pipeline::MorselQueueFactoryPtr> ScanNode::convert_scan_range_to_morsel
|
|||
// If not so much morsels, try to assign morsel uniformly among operators to avoid data skew
|
||||
if (!always_shared_scan() && scan_dop > 1 && is_fixed_or_dynamic_morsel_queue &&
|
||||
morsel_queue->num_original_morsels() <= io_parallelism) {
|
||||
ASSIGN_OR_RETURN(auto morsel_queue_map, uniform_distribute_morsels(std::move(morsel_queue), scan_dop));
|
||||
auto morsel_queue_map = uniform_distribute_morsels(std::move(morsel_queue), scan_dop);
|
||||
return std::make_unique<pipeline::IndividualMorselQueueFactory>(std::move(morsel_queue_map),
|
||||
/*could_local_shuffle*/ true);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@
|
|||
#include "runtime/exec_env.h"
|
||||
|
||||
#ifdef WITH_STARCACHE
|
||||
#include "cache/disk_cache/starcache_engine.h"
|
||||
#include "cache/starcache_engine.h"
|
||||
#endif
|
||||
|
||||
namespace starrocks {
|
||||
|
|
@ -68,14 +68,9 @@ Status SchemaBeDataCacheMetricsScanner::get_next(ChunkPtr* chunk, bool* eos) {
|
|||
row.emplace_back(_be_id);
|
||||
|
||||
// TODO: Support LRUCacheEngine
|
||||
auto* mem_cache = DataCache::GetInstance()->local_mem_cache();
|
||||
DataCacheMemMetrics mem_metrics;
|
||||
if (mem_cache != nullptr && mem_cache->is_initialized()) {
|
||||
mem_metrics = mem_cache->cache_metrics();
|
||||
}
|
||||
auto* disk_cache = DataCache::GetInstance()->local_disk_cache();
|
||||
if (disk_cache != nullptr && disk_cache->is_initialized()) {
|
||||
auto* starcache = reinterpret_cast<StarCacheEngine*>(disk_cache);
|
||||
auto* cache = DataCache::GetInstance()->local_disk_cache();
|
||||
if (cache != nullptr && cache->is_initialized() && cache->engine_type() == LocalCacheEngineType::STARCACHE) {
|
||||
auto* starcache = reinterpret_cast<StarCacheEngine*>(cache);
|
||||
// retrieve different priority's used bytes from level = 2 metrics
|
||||
metrics = starcache->starcache_metrics(2);
|
||||
|
||||
|
|
@ -84,8 +79,8 @@ Status SchemaBeDataCacheMetricsScanner::get_next(ChunkPtr* chunk, bool* eos) {
|
|||
row.emplace_back(Slice(status));
|
||||
row.emplace_back(metrics.disk_quota_bytes);
|
||||
row.emplace_back(metrics.disk_used_bytes);
|
||||
row.emplace_back(mem_metrics.mem_quota_bytes);
|
||||
row.emplace_back(mem_metrics.mem_used_bytes);
|
||||
row.emplace_back(metrics.mem_quota_bytes);
|
||||
row.emplace_back(metrics.mem_used_bytes);
|
||||
row.emplace_back(metrics.meta_used_bytes);
|
||||
|
||||
const auto& dir_spaces = metrics.disk_dir_spaces;
|
||||
|
|
|
|||
|
|
@ -14,11 +14,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <sys/mount.h>
|
||||
#else
|
||||
#include <sys/statfs.h>
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
|
|
@ -105,11 +101,7 @@ private:
|
|||
struct statfs stat1, stat2;
|
||||
statfs(path1.c_str(), &stat1);
|
||||
statfs(path2.c_str(), &stat2);
|
||||
#ifdef __APPLE__
|
||||
return stat1.f_fsid.val[0] == stat2.f_fsid.val[0] && stat1.f_fsid.val[1] == stat2.f_fsid.val[1];
|
||||
#else
|
||||
return stat1.f_fsid.__val[0] == stat2.f_fsid.__val[0] && stat1.f_fsid.__val[1] == stat2.f_fsid.__val[1];
|
||||
#endif
|
||||
}
|
||||
|
||||
std::vector<DirPtr> _dirs;
|
||||
|
|
@ -125,4 +117,4 @@ private:
|
|||
Status::RuntimeError(fmt::format("acquire size error: dir {} try acquire:{} usage:{} capacity:{}", dir->dir(), \
|
||||
acquire_size, dir->get_current_size(), dir->get_max_size()))
|
||||
|
||||
} // namespace starrocks::spill
|
||||
} // namespace starrocks::spill
|
||||
|
|
@ -325,6 +325,8 @@ void PartitionedSpillerWriter::_add_partition(SpilledPartitionPtr&& partition_pt
|
|||
}
|
||||
|
||||
void PartitionedSpillerWriter::_remove_partition(const SpilledPartition* partition) {
|
||||
auto affinity_group = partition->block_group->get_affinity_group();
|
||||
DCHECK(affinity_group != kDefaultBlockAffinityGroup);
|
||||
_id_to_partitions.erase(partition->partition_id);
|
||||
size_t level = partition->level;
|
||||
auto& partitions = _level_to_partitions[level];
|
||||
|
|
@ -332,12 +334,6 @@ void PartitionedSpillerWriter::_remove_partition(const SpilledPartition* partiti
|
|||
auto iter = std::find_if(partitions.begin(), partitions.end(),
|
||||
[partition](auto& val) { return val->partition_id == partition->partition_id; });
|
||||
_total_partition_num -= (iter != partitions.end());
|
||||
if (partition->block_group != nullptr) {
|
||||
auto affinity_group = partition->block_group->get_affinity_group();
|
||||
DCHECK(affinity_group != kDefaultBlockAffinityGroup);
|
||||
WARN_IF_ERROR(_spiller->block_manager()->release_affinity_group(affinity_group),
|
||||
fmt::format("release affinity group {} error", affinity_group));
|
||||
}
|
||||
partitions.erase(iter);
|
||||
if (partitions.empty()) {
|
||||
_level_to_partitions.erase(level);
|
||||
|
|
@ -345,6 +341,8 @@ void PartitionedSpillerWriter::_remove_partition(const SpilledPartition* partiti
|
|||
_min_level = level + 1;
|
||||
}
|
||||
}
|
||||
WARN_IF_ERROR(_spiller->block_manager()->release_affinity_group(affinity_group),
|
||||
fmt::format("release affinity group {} error", affinity_group));
|
||||
}
|
||||
|
||||
Status PartitionedSpillerWriter::_choose_partitions_to_flush(bool is_final_flush,
|
||||
|
|
|
|||
|
|
@ -46,9 +46,9 @@
|
|||
#include "column/column_helper.h"
|
||||
#include "column/map_column.h"
|
||||
#include "column/nullable_column.h"
|
||||
#include "common/config.h"
|
||||
#include "common/statusor.h"
|
||||
#include "common/tracer.h"
|
||||
#include "config.h"
|
||||
#include "exec/pipeline/query_context.h"
|
||||
#include "exec/tablet_sink_colocate_sender.h"
|
||||
#include "exprs/expr.h"
|
||||
|
|
|
|||
|
|
@ -19,10 +19,10 @@
|
|||
#include "column/chunk.h"
|
||||
#include "column/column_viewer.h"
|
||||
#include "column/nullable_column.h"
|
||||
#include "common/config.h"
|
||||
#include "common/statusor.h"
|
||||
#include "common/tracer.h"
|
||||
#include "common/utils.h"
|
||||
#include "config.h"
|
||||
#include "exec/tablet_sink.h"
|
||||
#include "exprs/expr_context.h"
|
||||
#include "gutil/strings/fastmem.h"
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ set(EXPR_FILES
|
|||
agg/factory/aggregate_resolver_minmaxany.cpp
|
||||
agg/factory/aggregate_resolver_others.cpp
|
||||
agg/factory/aggregate_resolver_sumcount.cpp
|
||||
agg/factory/aggregate_resolver_distinct.cpp
|
||||
agg/factory/aggregate_resolver_stream.cpp
|
||||
agg/factory/aggregate_resolver_utility.cpp
|
||||
agg/factory/aggregate_resolver_variance.cpp
|
||||
|
|
|
|||
|
|
@ -16,12 +16,12 @@
|
|||
|
||||
#include <type_traits>
|
||||
|
||||
#include "column/array_column.h"
|
||||
#include "column/type_traits.h"
|
||||
#include "gutil/strings/fastmem.h"
|
||||
#include "types/logical_type.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
// Type traits from aggregate functions
|
||||
template <LogicalType lt, typename = guard::Guard>
|
||||
struct AggDataTypeTraits {};
|
||||
|
|
@ -41,7 +41,6 @@ struct AggDataTypeTraits<lt, FixedLengthLTGuard<lt>> {
|
|||
static RefType get_ref(const ValueType& value) { return value; }
|
||||
|
||||
static void update_max(ValueType& current, const RefType& input) { current = std::max<ValueType>(current, input); }
|
||||
|
||||
static void update_min(ValueType& current, const RefType& input) { current = std::min<ValueType>(current, input); }
|
||||
|
||||
static bool is_equal(const RefType& lhs, const RefType& rhs) { return lhs == rhs; }
|
||||
|
|
@ -57,9 +56,7 @@ struct AggDataTypeTraits<lt, ObjectFamilyLTGuard<lt>> {
|
|||
using RefType = RunTimeCppType<lt>;
|
||||
|
||||
static void assign_value(ValueType& value, RefType ref) { value = *ref; }
|
||||
|
||||
static void assign_value(ColumnType* column, size_t row, const RefType& ref) { *column->get_object(row) = *ref; }
|
||||
|
||||
static void assign_value(ColumnType* column, size_t row, const ValueType& ref) { *column->get_object(row) = ref; }
|
||||
|
||||
static void append_value(ColumnType* column, const ValueType& value) { column->append(&value); }
|
||||
|
|
@ -68,47 +65,12 @@ struct AggDataTypeTraits<lt, ObjectFamilyLTGuard<lt>> {
|
|||
static const RefType get_row_ref(const ColumnType& column, size_t row) { return column.get_object(row); }
|
||||
|
||||
static void update_max(ValueType& current, const RefType& input) { current = std::max<ValueType>(current, *input); }
|
||||
|
||||
static void update_min(ValueType& current, const RefType& input) { current = std::min<ValueType>(current, *input); }
|
||||
|
||||
static bool is_equal(const RefType& lhs, const RefType& rhs) { return *lhs == *rhs; }
|
||||
static bool equals(const ValueType& lhs, const RefType& rhs) { return lhs == *rhs; }
|
||||
};
|
||||
|
||||
// For pointer ref types
|
||||
template <LogicalType lt>
|
||||
struct AggDataTypeTraits<lt, ArrayGuard<lt>> {
|
||||
using CppType = RunTimeCppType<lt>;
|
||||
using ColumnType = RunTimeColumnType<lt>;
|
||||
using ValueType = typename ColumnType::MutablePtr;
|
||||
|
||||
struct RefType {
|
||||
const ColumnType* column;
|
||||
const size_t row;
|
||||
|
||||
RefType(const ColumnType* c, size_t r) : column(c), row(r) {}
|
||||
};
|
||||
|
||||
static void assign_value(ValueType& value, const RefType& ref) {
|
||||
value = ArrayColumn::static_pointer_cast(ref.column->clone_empty());
|
||||
value->append_datum(ref.column->get(ref.row).template get<CppType>());
|
||||
}
|
||||
|
||||
static void append_value(ColumnType* column, const ValueType& value) {
|
||||
column->append_datum(value->get(0).template get<CppType>());
|
||||
}
|
||||
|
||||
static RefType get_row_ref(const ColumnType& column, size_t row) { return RefType(&column, row); }
|
||||
|
||||
static bool is_equal(const ValueType& lhs, const ValueType& rhs) {
|
||||
return lhs->get(0).template get<CppType>() == rhs->get(0).template get<CppType>();
|
||||
}
|
||||
|
||||
static bool equals(const ValueType& lhs, const ValueType& rhs) {
|
||||
return lhs->get(0).template get<CppType>() == rhs->get(0).template get<CppType>();
|
||||
}
|
||||
};
|
||||
|
||||
template <LogicalType lt>
|
||||
struct AggDataTypeTraits<lt, StringLTGuard<lt>> {
|
||||
using ColumnType = RunTimeColumnType<lt>;
|
||||
|
|
@ -134,7 +96,6 @@ struct AggDataTypeTraits<lt, StringLTGuard<lt>> {
|
|||
memcpy(current.data(), input.data, input.size);
|
||||
}
|
||||
}
|
||||
|
||||
static void update_min(ValueType& current, const RefType& input) {
|
||||
if (Slice(current.data(), current.size()).compare(input) > 0) {
|
||||
current.resize(input.size);
|
||||
|
|
@ -149,4 +110,5 @@ template <LogicalType lt>
|
|||
using AggDataValueType = typename AggDataTypeTraits<lt>::ValueType;
|
||||
template <LogicalType lt>
|
||||
using AggDataRefType = typename AggDataTypeTraits<lt>::RefType;
|
||||
} // namespace starrocks
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -75,7 +75,7 @@ public:
|
|||
TYPE_BIGINT, TYPE_LARGEINT, TYPE_FLOAT, TYPE_DOUBLE,
|
||||
TYPE_VARCHAR, TYPE_CHAR, TYPE_DATE, TYPE_DATETIME,
|
||||
TYPE_DECIMALV2, TYPE_DECIMAL32, TYPE_DECIMAL64, TYPE_DECIMAL128,
|
||||
TYPE_DECIMAL256, TYPE_HLL, TYPE_OBJECT, TYPE_ARRAY};
|
||||
TYPE_DECIMAL256, TYPE_HLL, TYPE_OBJECT};
|
||||
return kTypes;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,48 +0,0 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "column/type_traits.h"
|
||||
#include "exprs/agg/distinct.h"
|
||||
#include "exprs/agg/factory/aggregate_factory.hpp"
|
||||
#include "exprs/agg/factory/aggregate_resolver.hpp"
|
||||
#include "types/logical_type.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
struct DistinctDispatcher {
|
||||
template <LogicalType lt>
|
||||
void operator()(AggregateFuncResolver* resolver) {
|
||||
if constexpr (lt_is_aggregate<lt>) {
|
||||
using DistinctState = DistinctAggregateState<lt, SumResultLT<lt>>;
|
||||
using DistinctState2 = DistinctAggregateStateV2<lt, SumResultLT<lt>>;
|
||||
resolver->add_aggregate_mapping<lt, TYPE_BIGINT, DistinctState>(
|
||||
"multi_distinct_count", false, AggregateFactory::MakeCountDistinctAggregateFunction<lt>());
|
||||
resolver->add_aggregate_mapping<lt, TYPE_BIGINT, DistinctState2>(
|
||||
"multi_distinct_count2", false, AggregateFactory::MakeCountDistinctAggregateFunctionV2<lt>());
|
||||
|
||||
resolver->add_aggregate_mapping<lt, SumResultLT<lt>, DistinctState>(
|
||||
"multi_distinct_sum", false, AggregateFactory::MakeSumDistinctAggregateFunction<lt>());
|
||||
resolver->add_aggregate_mapping<lt, SumResultLT<lt>, DistinctState2>(
|
||||
"multi_distinct_sum2", false, AggregateFactory::MakeSumDistinctAggregateFunctionV2<lt>());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void AggregateFuncResolver::register_distinct() {
|
||||
for (auto type : aggregate_types()) {
|
||||
type_dispatch_all(type, DistinctDispatcher(), this);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -13,6 +13,7 @@
|
|||
// limitations under the License.
|
||||
|
||||
#include "column/type_traits.h"
|
||||
#include "exprs/agg/distinct.h"
|
||||
#include "exprs/agg/factory/aggregate_factory.hpp"
|
||||
#include "exprs/agg/factory/aggregate_resolver.hpp"
|
||||
#include "exprs/agg/sum.h"
|
||||
|
|
@ -51,6 +52,25 @@ struct StorageSumDispatcher {
|
|||
}
|
||||
};
|
||||
|
||||
struct DistinctDispatcher {
|
||||
template <LogicalType lt>
|
||||
void operator()(AggregateFuncResolver* resolver) {
|
||||
if constexpr (lt_is_aggregate<lt>) {
|
||||
using DistinctState = DistinctAggregateState<lt, SumResultLT<lt>>;
|
||||
using DistinctState2 = DistinctAggregateStateV2<lt, SumResultLT<lt>>;
|
||||
resolver->add_aggregate_mapping<lt, TYPE_BIGINT, DistinctState>(
|
||||
"multi_distinct_count", false, AggregateFactory::MakeCountDistinctAggregateFunction<lt>());
|
||||
resolver->add_aggregate_mapping<lt, TYPE_BIGINT, DistinctState2>(
|
||||
"multi_distinct_count2", false, AggregateFactory::MakeCountDistinctAggregateFunctionV2<lt>());
|
||||
|
||||
resolver->add_aggregate_mapping<lt, SumResultLT<lt>, DistinctState>(
|
||||
"multi_distinct_sum", false, AggregateFactory::MakeSumDistinctAggregateFunction<lt>());
|
||||
resolver->add_aggregate_mapping<lt, SumResultLT<lt>, DistinctState2>(
|
||||
"multi_distinct_sum2", false, AggregateFactory::MakeSumDistinctAggregateFunctionV2<lt>());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void AggregateFuncResolver::register_sumcount() {
|
||||
for (auto type : aggregate_types()) {
|
||||
type_dispatch_all(type, SumDispatcher(), this);
|
||||
|
|
@ -73,4 +93,10 @@ void AggregateFuncResolver::register_sumcount() {
|
|||
AggregateFactory::MakeCountNullableAggregateFunction<true>());
|
||||
}
|
||||
|
||||
void AggregateFuncResolver::register_distinct() {
|
||||
for (auto type : aggregate_types()) {
|
||||
type_dispatch_all(type, DistinctDispatcher(), this);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ namespace starrocks {
|
|||
struct WindowDispatcher {
|
||||
template <LogicalType lt>
|
||||
void operator()(AggregateFuncResolver* resolver) {
|
||||
if constexpr (lt_is_aggregate<lt> || is_object_type(lt) || lt_is_array<lt>) {
|
||||
if constexpr (lt_is_aggregate<lt> || is_object_type(lt)) {
|
||||
resolver->add_aggregate_mapping_notnull<lt, lt>(
|
||||
"first_value", true, AggregateFactory::MakeFirstValueWindowFunction<lt, false>());
|
||||
// use first_value_in for first_value with ingnore nulls.
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@
|
|||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
#include "column/array_column.h"
|
||||
#include "column/column_helper.h"
|
||||
#include "column/nullable_column.h"
|
||||
#include "column/vectorized_fwd.h"
|
||||
|
|
@ -102,10 +101,6 @@ struct ValueWindowStrategy<LT, JsonGuard<LT>> {
|
|||
/// The dst Object column hasn't been resized.
|
||||
static constexpr bool use_append = true;
|
||||
};
|
||||
template <LogicalType LT>
|
||||
struct ValueWindowStrategy<LT, ArrayGuard<LT>> {
|
||||
static constexpr bool use_append = true;
|
||||
};
|
||||
|
||||
template <LogicalType LT, typename State, typename T = RunTimeCppType<LT>>
|
||||
class ValueWindowFunction : public WindowFunction<State> {
|
||||
|
|
@ -129,7 +124,7 @@ public:
|
|||
|
||||
Column* data_column = nullable_column->mutable_data_column();
|
||||
auto* column = down_cast<InputColumnType*>(data_column);
|
||||
auto& value = AggregateFunctionStateHelper<State>::data(state).value;
|
||||
auto value = AggregateFunctionStateHelper<State>::data(state).value;
|
||||
for (size_t i = start; i < end; ++i) {
|
||||
AggDataTypeTraits<LT>::append_value(column, value);
|
||||
}
|
||||
|
|
@ -567,14 +562,8 @@ class LeadLagWindowFunction final : public ValueWindowFunction<LT, LeadLagState<
|
|||
if (default_column->is_nullable()) {
|
||||
this->data(state).default_is_null = true;
|
||||
} else {
|
||||
if constexpr (lt_is_array<LT>) {
|
||||
const auto* column = down_cast<const ArrayColumn*>(ColumnHelper::get_data_column(arg2));
|
||||
AggDataTypeTraits<LT>::assign_value(this->data(state).default_value,
|
||||
AggDataTypeTraits<LT>::get_row_ref(*column, 0));
|
||||
} else {
|
||||
auto value = ColumnHelper::get_const_value<LT>(arg2);
|
||||
AggDataTypeTraits<LT>::assign_value(this->data(state).default_value, value);
|
||||
}
|
||||
auto value = ColumnHelper::get_const_value<LT>(arg2);
|
||||
AggDataTypeTraits<LT>::assign_value(this->data(state).default_value, value);
|
||||
}
|
||||
|
||||
if constexpr (ignoreNulls) {
|
||||
|
|
@ -680,13 +669,7 @@ class LeadLagWindowFunction final : public ValueWindowFunction<LT, LeadLagState<
|
|||
if (this->data(state).default_is_null) {
|
||||
this->data(state).is_null = true;
|
||||
} else {
|
||||
if constexpr (lt_is_array<LT>) {
|
||||
AggDataTypeTraits<LT>::assign_value(
|
||||
this->data(state).value,
|
||||
AggDataTypeTraits<LT>::get_row_ref(*this->data(state).default_value, 0));
|
||||
} else {
|
||||
this->data(state).value = this->data(state).default_value;
|
||||
}
|
||||
this->data(state).value = this->data(state).default_value;
|
||||
}
|
||||
} else {
|
||||
const Column* data_column = ColumnHelper::get_data_column(columns[0]);
|
||||
|
|
@ -703,13 +686,7 @@ class LeadLagWindowFunction final : public ValueWindowFunction<LT, LeadLagState<
|
|||
this->data(state).is_null = true;
|
||||
} else {
|
||||
this->data(state).is_null = false;
|
||||
if constexpr (lt_is_array<LT>) {
|
||||
AggDataTypeTraits<LT>::assign_value(
|
||||
this->data(state).value,
|
||||
AggDataTypeTraits<LT>::get_row_ref(*this->data(state).default_value, 0));
|
||||
} else {
|
||||
this->data(state).value = this->data(state).default_value;
|
||||
}
|
||||
this->data(state).value = this->data(state).default_value;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ namespace starrocks {
|
|||
\
|
||||
virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new CLASS_NAME(*this)); }
|
||||
|
||||
[[maybe_unused]] static std::optional<LogicalType> eliminate_trivial_cast_for_decimal_mul(const Expr* e) {
|
||||
static std::optional<LogicalType> eliminate_trivial_cast_for_decimal_mul(const Expr* e) {
|
||||
DIAGNOSTIC_PUSH
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
DIAGNOSTIC_IGNORE("-Wmaybe-uninitialized")
|
||||
|
|
|
|||
|
|
@ -176,8 +176,7 @@ StatusOr<ColumnPtr> CastStringToArray::evaluate_checked(ExprContext* context, Ch
|
|||
// return null if not valid array
|
||||
if (!is_valid_array(str, stack)) {
|
||||
if (_throw_exception_if_err) {
|
||||
return Status::InternalError(
|
||||
fmt::format("invalid array input: {}", std::string_view(str.get_data(), str.get_size())));
|
||||
return Status::InternalError(fmt::format("invalid array input: {}", str));
|
||||
} else {
|
||||
has_null = true;
|
||||
null_column->append(1);
|
||||
|
|
|
|||
|
|
@ -140,15 +140,6 @@ struct DecimalNonDecimalCast<overflow_mode, DecimalType, NonDecimalType, Decimal
|
|||
using NonDecimalColumnType = RunTimeColumnType<NonDecimalType>;
|
||||
|
||||
static inline ColumnPtr decimal_from(const ColumnPtr& column, int precision, int scale) {
|
||||
if (scale == 0) {
|
||||
return _decimal_from<true>(column, precision, scale);
|
||||
} else {
|
||||
return _decimal_from<false>(column, precision, scale);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool ZeroScale>
|
||||
static inline ColumnPtr _decimal_from(const ColumnPtr& column, int precision, int scale) {
|
||||
const auto num_rows = column->size();
|
||||
typename DecimalColumnType::MutablePtr result = DecimalColumnType::create(precision, scale, num_rows);
|
||||
const auto data = &ColumnHelper::cast_to_raw<NonDecimalType>(column.get())->immutable_data().front();
|
||||
|
|
@ -172,16 +163,9 @@ struct DecimalNonDecimalCast<overflow_mode, DecimalType, NonDecimalType, Decimal
|
|||
DecimalV3Cast::from_integer<SignedBooleanType, DecimalCppType, check_overflow<overflow_mode>>(
|
||||
(SignedBooleanType)(data[i]), scale_factor, &result_data[i]);
|
||||
} else if constexpr (lt_is_integer<NonDecimalType>) {
|
||||
if constexpr (ZeroScale) {
|
||||
// Fast path for integer-to-decimal conversion with scale 0.
|
||||
overflow =
|
||||
DecimalV3Cast::to_decimal_trivial<NonDecimalCppType, DecimalCppType,
|
||||
check_overflow<overflow_mode>>(data[i], &result_data[i]);
|
||||
} else {
|
||||
overflow = DecimalV3Cast::from_integer<NonDecimalCppType, DecimalCppType,
|
||||
check_overflow<overflow_mode>>(data[i], scale_factor,
|
||||
&result_data[i]);
|
||||
}
|
||||
overflow =
|
||||
DecimalV3Cast::from_integer<NonDecimalCppType, DecimalCppType, check_overflow<overflow_mode>>(
|
||||
data[i], scale_factor, &result_data[i]);
|
||||
} else if constexpr (lt_is_float<NonDecimalType>) {
|
||||
overflow = DecimalV3Cast::from_float<NonDecimalCppType, DecimalCppType>(data[i], scale_factor,
|
||||
&result_data[i]);
|
||||
|
|
@ -234,7 +218,6 @@ struct DecimalNonDecimalCast<overflow_mode, DecimalType, NonDecimalType, Decimal
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (check_overflow<overflow_mode>) {
|
||||
ColumnBuilder<DecimalType> builder(std::move(result), std::move(null_column), has_null);
|
||||
return builder.build(column->is_constant());
|
||||
|
|
|
|||
|
|
@ -20,14 +20,10 @@
|
|||
#include "column/map_column.h"
|
||||
#include "column/struct_column.h"
|
||||
#include "column/type_traits.h"
|
||||
#ifndef MACOS_DISABLE_JAVA
|
||||
#include "exprs/agg/java_udaf_function.h"
|
||||
#endif
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "types/logical_type_infra.h"
|
||||
#ifndef MACOS_DISABLE_JAVA
|
||||
#include "udf/java/java_udf.h"
|
||||
#endif
|
||||
#include "util/bloom_filter.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
|
@ -42,7 +38,7 @@ FunctionContext* FunctionContext::create_context(RuntimeState* state, MemPool* p
|
|||
ctx->_mem_pool = pool;
|
||||
ctx->_return_type = return_type;
|
||||
ctx->_arg_types = arg_types;
|
||||
#if !defined(MACOS_DISABLE_JAVA) && !defined(BUILD_FORMAT_LIB)
|
||||
#if !defined(BUILD_FORMAT_LIB)
|
||||
ctx->_jvm_udaf_ctxs = std::make_unique<JavaUDAFContext>();
|
||||
#endif
|
||||
return ctx;
|
||||
|
|
@ -58,7 +54,7 @@ FunctionContext* FunctionContext::create_context(RuntimeState* state, MemPool* p
|
|||
ctx->_mem_pool = pool;
|
||||
ctx->_return_type = return_type;
|
||||
ctx->_arg_types = arg_types;
|
||||
#if !defined(MACOS_DISABLE_JAVA) && !defined(BUILD_FORMAT_LIB)
|
||||
#if !defined(BUILD_FORMAT_LIB)
|
||||
ctx->_jvm_udaf_ctxs = std::make_unique<JavaUDAFContext>();
|
||||
#endif
|
||||
ctx->_is_distinct = is_distinct;
|
||||
|
|
@ -141,12 +137,10 @@ void* FunctionContext::get_function_state(FunctionStateScope scope) const {
|
|||
}
|
||||
|
||||
void FunctionContext::release_mems() {
|
||||
#ifndef MACOS_DISABLE_JAVA
|
||||
if (_jvm_udaf_ctxs != nullptr && _jvm_udaf_ctxs->states) {
|
||||
auto env = JVMFunctionHelper::getInstance().getEnv();
|
||||
_jvm_udaf_ctxs->states->clear(this, env);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void FunctionContext::set_error(const char* error_msg, const bool is_udf) {
|
||||
|
|
|
|||
|
|
@ -35,12 +35,6 @@ class RuntimeState;
|
|||
class Column;
|
||||
class Slice;
|
||||
struct JavaUDAFContext;
|
||||
#if defined(MACOS_DISABLE_JAVA)
|
||||
// On macOS build, Java is disabled. Provide an empty definition so that
|
||||
// std::unique_ptr<JavaUDAFContext> has a complete type and can be destroyed
|
||||
// without pulling in JNI headers.
|
||||
struct JavaUDAFContext {};
|
||||
#endif
|
||||
struct NgramBloomFilterState;
|
||||
|
||||
class FunctionContext {
|
||||
|
|
|
|||
|
|
@ -3468,76 +3468,6 @@ StatusOr<ColumnPtr> StringFunctions::regexp_extract(FunctionContext* context, co
|
|||
return regexp_extract_general(context, options, columns);
|
||||
}
|
||||
|
||||
// Helper function to extract whole match (group 0) using RE2::Match
|
||||
// This is shared by both overloaded extract_regex_matches functions
|
||||
template <typename IndexType>
|
||||
static void extract_whole_matches(const re2::StringPiece& str_sp, const re2::RE2& regex, BinaryColumn* str_col,
|
||||
IndexType& index, int max_matches) {
|
||||
re2::StringPiece input = str_sp;
|
||||
std::vector<re2::StringPiece> matches(max_matches);
|
||||
size_t pos = 0;
|
||||
|
||||
while (pos <= input.size()) {
|
||||
re2::StringPiece remaining = input.substr(pos);
|
||||
if (regex.Match(remaining, 0, remaining.size(), RE2::UNANCHORED, &matches[0], max_matches)) {
|
||||
// matches[0] contains the whole match (group 0)
|
||||
str_col->append(Slice(matches[0].data(), matches[0].size()));
|
||||
index += 1;
|
||||
// Move past this match
|
||||
pos = matches[0].data() - input.data() + matches[0].size();
|
||||
if (matches[0].size() == 0) {
|
||||
pos++; // Avoid infinite loop on zero-length matches
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to extract regex matches and append to column
|
||||
// This reduces code duplication across regexp_extract_all_* functions
|
||||
static void extract_regex_matches(const Slice& str_value, const re2::RE2& regex, int group, BinaryColumn* str_col,
|
||||
uint32_t& index, int max_matches) {
|
||||
re2::StringPiece str_sp(str_value.get_data(), str_value.get_size());
|
||||
|
||||
if (group == 0) {
|
||||
// Extract the whole match (group 0)
|
||||
extract_whole_matches(str_sp, regex, str_col, index, max_matches);
|
||||
} else {
|
||||
// Extract specific capture group
|
||||
re2::StringPiece find[group];
|
||||
const RE2::Arg* args[group];
|
||||
RE2::Arg argv[group];
|
||||
|
||||
for (size_t i = 0; i < group; i++) {
|
||||
argv[i] = &find[i];
|
||||
args[i] = &argv[i];
|
||||
}
|
||||
while (re2::RE2::FindAndConsumeN(&str_sp, regex, args, group)) {
|
||||
str_col->append(Slice(find[group - 1].data(), find[group - 1].size()));
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Overloaded version for pre-allocated arrays (used by regexp_extract_all_const)
|
||||
static void extract_regex_matches(const Slice& str_value, const re2::RE2& regex, int group, BinaryColumn* str_col,
|
||||
uint64_t& index, const std::unique_ptr<re2::StringPiece[]>& find,
|
||||
const std::unique_ptr<const RE2::Arg*[]>& args, int max_matches) {
|
||||
re2::StringPiece str_sp(str_value.get_data(), str_value.get_size());
|
||||
|
||||
if (group == 0) {
|
||||
// Extract the whole match (group 0) - reuse common logic
|
||||
extract_whole_matches(str_sp, regex, str_col, index, max_matches);
|
||||
} else {
|
||||
// Extract specific capture group using pre-allocated arrays
|
||||
while (re2::RE2::FindAndConsumeN(&str_sp, regex, args.get(), group)) {
|
||||
str_col->append(Slice(find[group - 1].data(), find[group - 1].size()));
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static ColumnPtr regexp_extract_all_general(FunctionContext* context, re2::RE2::Options* options,
|
||||
const Columns& columns) {
|
||||
auto content_viewer = ColumnViewer<TYPE_VARCHAR>(columns[0]);
|
||||
|
|
@ -3553,7 +3483,7 @@ static ColumnPtr regexp_extract_all_general(FunctionContext* context, re2::RE2::
|
|||
uint32_t index = 0;
|
||||
|
||||
for (int row = 0; row < size; ++row) {
|
||||
if (content_viewer.is_null(row) || ptn_viewer.is_null(row) || group_viewer.is_null(row)) {
|
||||
if (content_viewer.is_null(row) || ptn_viewer.is_null(row)) {
|
||||
offset_col->append(index);
|
||||
nl_col->append(1);
|
||||
continue;
|
||||
|
|
@ -3570,7 +3500,7 @@ static ColumnPtr regexp_extract_all_general(FunctionContext* context, re2::RE2::
|
|||
|
||||
nl_col->append(0);
|
||||
auto group = group_viewer.value(row);
|
||||
if (group < 0) {
|
||||
if (group <= 0) {
|
||||
offset_col->append(index);
|
||||
continue;
|
||||
}
|
||||
|
|
@ -3581,7 +3511,21 @@ static ColumnPtr regexp_extract_all_general(FunctionContext* context, re2::RE2::
|
|||
continue;
|
||||
}
|
||||
|
||||
extract_regex_matches(content_viewer.value(row), local_re, group, str_col.get(), index, max_matches);
|
||||
auto str_value = content_viewer.value(row);
|
||||
re2::StringPiece str_sp(str_value.get_data(), str_value.get_size());
|
||||
|
||||
re2::StringPiece find[group];
|
||||
const RE2::Arg* args[group];
|
||||
RE2::Arg argv[group];
|
||||
|
||||
for (size_t i = 0; i < group; i++) {
|
||||
argv[i] = &find[i];
|
||||
args[i] = &argv[i];
|
||||
}
|
||||
while (re2::RE2::FindAndConsumeN(&str_sp, local_re, args, group)) {
|
||||
str_col->append(Slice(find[group - 1].data(), find[group - 1].size()));
|
||||
index += 1;
|
||||
}
|
||||
offset_col->append(index);
|
||||
}
|
||||
|
||||
|
|
@ -3603,7 +3547,7 @@ static ColumnPtr regexp_extract_all_const_pattern(re2::RE2* const_re, const Colu
|
|||
uint32_t index = 0;
|
||||
|
||||
for (int row = 0; row < size; ++row) {
|
||||
if (content_viewer.is_null(row) || group_viewer.is_null(row)) {
|
||||
if (content_viewer.is_null(row)) {
|
||||
offset_col->append(index);
|
||||
nl_col->append(1);
|
||||
continue;
|
||||
|
|
@ -3611,7 +3555,7 @@ static ColumnPtr regexp_extract_all_const_pattern(re2::RE2* const_re, const Colu
|
|||
|
||||
nl_col->append(0);
|
||||
auto group = group_viewer.value(row);
|
||||
if (group < 0) {
|
||||
if (group <= 0) {
|
||||
offset_col->append(index);
|
||||
continue;
|
||||
}
|
||||
|
|
@ -3622,7 +3566,21 @@ static ColumnPtr regexp_extract_all_const_pattern(re2::RE2* const_re, const Colu
|
|||
continue;
|
||||
}
|
||||
|
||||
extract_regex_matches(content_viewer.value(row), *const_re, group, str_col.get(), index, max_matches);
|
||||
auto str_value = content_viewer.value(row);
|
||||
re2::StringPiece str_sp(str_value.get_data(), str_value.get_size());
|
||||
|
||||
re2::StringPiece find[group];
|
||||
const RE2::Arg* args[group];
|
||||
RE2::Arg argv[group];
|
||||
|
||||
for (size_t i = 0; i < group; i++) {
|
||||
argv[i] = &find[i];
|
||||
args[i] = &argv[i];
|
||||
}
|
||||
while (re2::RE2::FindAndConsumeN(&str_sp, *const_re, args, group)) {
|
||||
str_col->append(Slice(find[group - 1].data(), find[group - 1].size()));
|
||||
index += 1;
|
||||
}
|
||||
offset_col->append(index);
|
||||
}
|
||||
|
||||
|
|
@ -3654,7 +3612,7 @@ static ColumnPtr regexp_extract_all_const(re2::RE2* const_re, const Columns& col
|
|||
|
||||
uint64_t index = 0;
|
||||
int max_matches = 1 + const_re->NumberOfCapturingGroups();
|
||||
if (group < 0 || group >= max_matches) {
|
||||
if (group <= 0 || group >= max_matches) {
|
||||
offset_col->append_value_multiple_times(&index, size);
|
||||
auto array = ArrayColumn::create(NullableColumn::create(std::move(str_col), NullColumn::create(0, 0)),
|
||||
std::move(offset_col));
|
||||
|
|
@ -3665,27 +3623,26 @@ static ColumnPtr regexp_extract_all_const(re2::RE2* const_re, const Columns& col
|
|||
return NullableColumn::create(std::move(array), std::move(nl_col));
|
||||
}
|
||||
|
||||
// Prepare arguments for FindAndConsumeN (only needed when group > 0)
|
||||
std::unique_ptr<re2::StringPiece[]> find;
|
||||
std::unique_ptr<const RE2::Arg*[]> args;
|
||||
std::unique_ptr<RE2::Arg[]> argv;
|
||||
re2::StringPiece find[group];
|
||||
const RE2::Arg* args[group];
|
||||
RE2::Arg argv[group];
|
||||
|
||||
if (group > 0) {
|
||||
find = std::make_unique<re2::StringPiece[]>(group);
|
||||
args = std::make_unique<const RE2::Arg*[]>(group);
|
||||
argv = std::make_unique<RE2::Arg[]>(group);
|
||||
|
||||
for (size_t i = 0; i < group; i++) {
|
||||
argv[i] = &find[i];
|
||||
args[i] = &argv[i];
|
||||
}
|
||||
for (size_t i = 0; i < group; i++) {
|
||||
argv[i] = &find[i];
|
||||
args[i] = &argv[i];
|
||||
}
|
||||
|
||||
// focuses only on iteration and offset management
|
||||
for (int row = 0; row < size; ++row) {
|
||||
if (!content_viewer.is_null(row)) {
|
||||
extract_regex_matches(content_viewer.value(row), *const_re, group, str_col.get(), index, find, args,
|
||||
max_matches);
|
||||
if (content_viewer.is_null(row)) {
|
||||
offset_col->append(index);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto str_value = content_viewer.value(row);
|
||||
re2::StringPiece str_sp(str_value.get_data(), str_value.get_size());
|
||||
while (re2::RE2::FindAndConsumeN(&str_sp, *const_re, args, group)) {
|
||||
str_col->append(Slice(find[group - 1].data(), find[group - 1].size()));
|
||||
|
||||
index += 1;
|
||||
}
|
||||
offset_col->append(index);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3897,39 +3897,6 @@ StatusOr<ColumnPtr> TimeFunctions::time_format(FunctionContext* context, const s
|
|||
return builder.build(ColumnHelper::is_all_const(columns));
|
||||
}
|
||||
|
||||
constexpr static const int64_t MAX_TIME = 3023999L;
|
||||
|
||||
static int64_t from_seconds_with_limit(int64_t time) {
|
||||
if (time > MAX_TIME) {
|
||||
return MAX_TIME;
|
||||
}
|
||||
if (time < -MAX_TIME) {
|
||||
return -MAX_TIME;
|
||||
}
|
||||
return time;
|
||||
}
|
||||
|
||||
StatusOr<ColumnPtr> TimeFunctions::sec_to_time(FunctionContext* context, const starrocks::Columns& columns) {
|
||||
const auto& bigint_column = columns[0];
|
||||
|
||||
RETURN_IF_COLUMNS_ONLY_NULL(columns);
|
||||
|
||||
auto bigint_viewer = ColumnViewer<TYPE_BIGINT>(bigint_column);
|
||||
const size_t size = bigint_column->size();
|
||||
auto builder = ColumnBuilder<TYPE_TIME>(size);
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (bigint_viewer.is_null(i)) {
|
||||
builder.append_null();
|
||||
continue;
|
||||
}
|
||||
auto time = static_cast<double>(from_seconds_with_limit(bigint_viewer.value(i)));
|
||||
builder.append(time);
|
||||
}
|
||||
|
||||
return builder.build(ColumnHelper::is_all_const(columns));
|
||||
}
|
||||
|
||||
} // namespace starrocks
|
||||
|
||||
#include "gen_cpp/opcode/TimeFunctions.inc"
|
||||
|
|
|
|||
|
|
@ -766,14 +766,6 @@ public:
|
|||
*/
|
||||
DEFINE_VECTORIZED_FN(time_to_sec);
|
||||
|
||||
/**
|
||||
* return time
|
||||
* @param: [int]
|
||||
* @paramType columns: [BinaryColumn]
|
||||
* @return Int64Column
|
||||
*/
|
||||
DEFINE_VECTORIZED_FN(sec_to_time);
|
||||
|
||||
/**
|
||||
* Returns the date of the first specified DOW (day of week) that occurs after the input date.
|
||||
* @param: [timestamp, dow]
|
||||
|
|
|
|||
|
|
@ -147,7 +147,7 @@ public:
|
|||
auto& src_null_data = src_nullable_column->null_column()->get_data();
|
||||
auto& dst_null_data = dst_nullable_column->null_column()->get_data();
|
||||
|
||||
size_t size = dst_null_data.size();
|
||||
size_t size = src_column->size();
|
||||
memcpy(dst_null_data.data(), src_null_data.data(), size);
|
||||
convert_int_to_int<SourceType, DestType>(src_data.data(), dst_data.data(), size);
|
||||
dst_nullable_column->set_has_null(src_nullable_column->has_null());
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "cache/disk_cache/block_cache.h"
|
||||
#include "cache/block_cache/block_cache.h"
|
||||
#include "column/vectorized_fwd.h"
|
||||
#include "common/status.h"
|
||||
#include "common/statusor.h"
|
||||
|
|
|
|||
|
|
@ -470,7 +470,7 @@ StatusOr<FileMetaDataPtr> FileMetaDataParser::get_file_metadata() {
|
|||
RETURN_IF_ERROR(_parse_footer(&file_metadata, &file_metadata_size));
|
||||
if (file_metadata_size > 0) {
|
||||
auto deleter = [](const starrocks::CacheKey& key, void* value) { delete (FileMetaDataPtr*)value; };
|
||||
MemCacheWriteOptions options;
|
||||
ObjectCacheWriteOptions options;
|
||||
options.evict_probability = _datacache_options->datacache_evict_probability;
|
||||
auto capture = std::make_unique<FileMetaDataPtr>(file_metadata);
|
||||
Status st = _cache->insert(metacache_key, (void*)(capture.get()), file_metadata_size, deleter, options,
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ static constexpr size_t kDefaultPageHeaderSize = 16 * 1024;
|
|||
// 16MB is borrowed from Arrow
|
||||
static constexpr size_t kMaxPageHeaderSize = 16 * 1024 * 1024;
|
||||
|
||||
PageReader::PageReader(io::SeekableInputStream* stream, size_t start_offset, size_t length, size_t num_values,
|
||||
PageReader::PageReader(io::SeekableInputStream* stream, uint64_t start_offset, uint64_t length, uint64_t num_values,
|
||||
const ColumnReaderOptions& opts, const tparquet::CompressionCodec::type codec)
|
||||
: _stream(stream),
|
||||
_finish_offset(start_offset + length),
|
||||
|
|
@ -92,7 +92,7 @@ Status PageReader::_deal_page_with_cache() {
|
|||
return Status::OK();
|
||||
}
|
||||
RETURN_IF_ERROR(_read_and_decompress_internal(true));
|
||||
MemCacheWriteOptions opts{.evict_probability = _opts.datacache_options->datacache_evict_probability};
|
||||
ObjectCacheWriteOptions opts{.evict_probability = _opts.datacache_options->datacache_evict_probability};
|
||||
auto st = _cache->insert(page_cache_key, _cache_buf, opts, &cache_handle);
|
||||
if (st.ok()) {
|
||||
_page_handle = PageHandle(std::move(cache_handle));
|
||||
|
|
|
|||
|
|
@ -266,8 +266,7 @@ StatusOr<::parquet::Compression::type> ParquetFileWriter::_convert_compression_t
|
|||
|
||||
// Check if arrow supports indicated compression type
|
||||
if (!::parquet::IsCodecSupported(converted_type)) {
|
||||
return Status::NotSupported(
|
||||
fmt::format("not supported compression codec {}", static_cast<int>(converted_type)));
|
||||
return Status::NotSupported(fmt::format("not supported compression codec {}", converted_type));
|
||||
}
|
||||
|
||||
return converted_type;
|
||||
|
|
@ -456,12 +455,13 @@ Status ParquetFileWriter::init() {
|
|||
|
||||
ParquetFileWriter::~ParquetFileWriter() = default;
|
||||
|
||||
ParquetFileWriterFactory::ParquetFileWriterFactory(
|
||||
std::shared_ptr<FileSystem> fs, TCompressionType::type compression_type,
|
||||
std::map<std::string, std::string> options, std::vector<std::string> column_names,
|
||||
std::shared_ptr<std::vector<std::unique_ptr<ColumnEvaluator>>> column_evaluators,
|
||||
std::optional<std::vector<formats::FileColumnId>> field_ids, PriorityThreadPool* executors,
|
||||
RuntimeState* runtime_state)
|
||||
ParquetFileWriterFactory::ParquetFileWriterFactory(std::shared_ptr<FileSystem> fs,
|
||||
TCompressionType::type compression_type,
|
||||
std::map<std::string, std::string> options,
|
||||
std::vector<std::string> column_names,
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>>&& column_evaluators,
|
||||
std::optional<std::vector<formats::FileColumnId>> field_ids,
|
||||
PriorityThreadPool* executors, RuntimeState* runtime_state)
|
||||
: _fs(std::move(fs)),
|
||||
_compression_type(compression_type),
|
||||
_field_ids(std::move(field_ids)),
|
||||
|
|
@ -472,7 +472,7 @@ ParquetFileWriterFactory::ParquetFileWriterFactory(
|
|||
_runtime_state(runtime_state) {}
|
||||
|
||||
Status ParquetFileWriterFactory::init() {
|
||||
RETURN_IF_ERROR(ColumnEvaluator::init(*_column_evaluators));
|
||||
RETURN_IF_ERROR(ColumnEvaluator::init(_column_evaluators));
|
||||
_parsed_options = std::make_shared<ParquetWriterOptions>();
|
||||
_parsed_options->column_ids = _field_ids;
|
||||
if (_options.contains(ParquetWriterOptions::USE_LEGACY_DECIMAL_ENCODING)) {
|
||||
|
|
@ -506,8 +506,8 @@ StatusOr<WriterAndStream> ParquetFileWriterFactory::create(const std::string& pa
|
|||
auto rollback_action = [fs = _fs, path = path]() {
|
||||
WARN_IF_ERROR(ignore_not_found(fs->delete_file(path)), "fail to delete file");
|
||||
};
|
||||
auto column_evaluators = ColumnEvaluator::clone(*_column_evaluators);
|
||||
auto types = ColumnEvaluator::types(*_column_evaluators);
|
||||
auto column_evaluators = ColumnEvaluator::clone(_column_evaluators);
|
||||
auto types = ColumnEvaluator::types(_column_evaluators);
|
||||
auto async_output_stream =
|
||||
std::make_unique<io::AsyncFlushOutputStream>(std::move(file), _executors, _runtime_state);
|
||||
auto parquet_output_stream = std::make_shared<parquet::AsyncParquetOutputStream>(async_output_stream.get());
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ class ParquetFileWriterFactory : public FileWriterFactory {
|
|||
public:
|
||||
ParquetFileWriterFactory(std::shared_ptr<FileSystem> fs, TCompressionType::type compression_type,
|
||||
std::map<std::string, std::string> options, std::vector<std::string> column_names,
|
||||
std::shared_ptr<std::vector<std::unique_ptr<ColumnEvaluator>>> column_evaluators,
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>>&& column_evaluators,
|
||||
std::optional<std::vector<formats::FileColumnId>> field_ids, PriorityThreadPool* executors,
|
||||
RuntimeState* runtime_state);
|
||||
|
||||
|
|
@ -178,7 +178,7 @@ private:
|
|||
std::shared_ptr<ParquetWriterOptions> _parsed_options;
|
||||
|
||||
std::vector<std::string> _column_names;
|
||||
std::shared_ptr<std::vector<std::unique_ptr<ColumnEvaluator>>> _column_evaluators;
|
||||
std::vector<std::unique_ptr<ColumnEvaluator>> _column_evaluators;
|
||||
PriorityThreadPool* _executors = nullptr;
|
||||
RuntimeState* _runtime_state = nullptr;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -92,6 +92,7 @@ HTTPSessionPtr makeHTTPSessionImpl(const std::string& host, Poco::UInt16 port, b
|
|||
}
|
||||
|
||||
EndpointHTTPSessionPool::Base::ObjectPtr EndpointHTTPSessionPool::allocObject() {
|
||||
SCOPED_THREAD_LOCAL_MEM_TRACKER_SETTER(_mem_tracker);
|
||||
auto session = makeHTTPSessionImpl(_host, _port, _is_https, true);
|
||||
return session;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -68,7 +68,9 @@ class EndpointHTTPSessionPool : public PoolBase<Poco::Net::HTTPClientSession> {
|
|||
public:
|
||||
using Base = PoolBase<Poco::Net::HTTPClientSession>;
|
||||
EndpointHTTPSessionPool(std::string host, uint16_t port, bool is_https)
|
||||
: Base(ENDPOINT_POOL_SIZE), _host(std::move(host)), _port(port), _is_https(is_https) {}
|
||||
: Base(ENDPOINT_POOL_SIZE), _host(std::move(host)), _port(port), _is_https(is_https) {
|
||||
_mem_tracker = GlobalEnv::GetInstance()->poco_connection_pool_mem_tracker();
|
||||
}
|
||||
|
||||
private:
|
||||
ObjectPtr allocObject() override;
|
||||
|
|
@ -78,6 +80,7 @@ private:
|
|||
const std::string _host;
|
||||
const uint16_t _port;
|
||||
const bool _is_https;
|
||||
MemTracker* _mem_tracker = nullptr;
|
||||
};
|
||||
|
||||
class HTTPSessionPools {
|
||||
|
|
|
|||
|
|
@ -28,11 +28,6 @@
|
|||
#include <mach/clock.h>
|
||||
#include <mach/mach.h>
|
||||
#include <mach/mach_time.h>
|
||||
// macOS system headers may define a function-like macro named current_task(),
|
||||
// which conflicts with StarRocks member function names. Undefine it here.
|
||||
#ifdef current_task
|
||||
#undef current_task
|
||||
#endif
|
||||
|
||||
#include "gutil/once.h"
|
||||
#endif // #if defined(__APPLE__)
|
||||
|
|
|
|||
|
|
@ -21,16 +21,15 @@
|
|||
|
||||
#include <string>
|
||||
|
||||
#include "cache/disk_cache/block_cache_hit_rate_counter.hpp"
|
||||
#include "cache/disk_cache/local_disk_cache_engine.h"
|
||||
#include "cache/mem_cache/local_mem_cache_engine.h"
|
||||
#include "cache/block_cache/block_cache_hit_rate_counter.hpp"
|
||||
#include "cache/local_cache_engine.h"
|
||||
#include "http/http_channel.h"
|
||||
#include "http/http_headers.h"
|
||||
#include "http/http_request.h"
|
||||
#include "http/http_status.h"
|
||||
|
||||
#ifdef WITH_STARCACHE
|
||||
#include "cache/disk_cache/starcache_engine.h"
|
||||
#include "cache/starcache_engine.h"
|
||||
#endif
|
||||
|
||||
namespace starrocks {
|
||||
|
|
@ -57,8 +56,10 @@ void DataCacheAction::handle(HttpRequest* req) {
|
|||
if (!_check_request(req)) {
|
||||
return;
|
||||
}
|
||||
if (!_disk_cache || !_disk_cache->is_initialized()) {
|
||||
if (!_local_cache || !_local_cache->is_initialized()) {
|
||||
_handle_error(req, strings::Substitute("Cache system is not ready"));
|
||||
} else if (_local_cache->engine_type() != LocalCacheEngineType::STARCACHE) {
|
||||
_handle_error(req, strings::Substitute("No more metrics for current cache engine type"));
|
||||
} else if (req->param(ACTION_KEY) == ACTION_STAT) {
|
||||
_handle_stat(req);
|
||||
} else {
|
||||
|
|
@ -80,29 +81,23 @@ void DataCacheAction::_handle(HttpRequest* req, const std::function<void(rapidjs
|
|||
void DataCacheAction::_handle_stat(HttpRequest* req) {
|
||||
_handle(req, [=](rapidjson::Document& root) {
|
||||
#ifdef WITH_STARCACHE
|
||||
DataCacheMemMetrics mem_metrics;
|
||||
if (_mem_cache != nullptr) {
|
||||
mem_metrics = _mem_cache->cache_metrics();
|
||||
}
|
||||
|
||||
auto& allocator = root.GetAllocator();
|
||||
auto* starcache = reinterpret_cast<StarCacheEngine*>(_disk_cache);
|
||||
auto* starcache = reinterpret_cast<StarCacheEngine*>(_local_cache);
|
||||
auto&& metrics = starcache->starcache_metrics(2);
|
||||
std::string status = DataCacheStatusUtils::to_string(static_cast<DataCacheStatus>(metrics.status));
|
||||
|
||||
rapidjson::Value status_value;
|
||||
status_value.SetString(status.c_str(), status.length(), allocator);
|
||||
root.AddMember("status", status_value, allocator);
|
||||
root.AddMember("mem_quota_bytes", rapidjson::Value(mem_metrics.mem_quota_bytes), allocator);
|
||||
root.AddMember("mem_used_bytes", rapidjson::Value(mem_metrics.mem_used_bytes), allocator);
|
||||
root.AddMember("mem_quota_bytes", rapidjson::Value(metrics.mem_quota_bytes), allocator);
|
||||
root.AddMember("mem_used_bytes", rapidjson::Value(metrics.mem_used_bytes), allocator);
|
||||
root.AddMember("disk_quota_bytes", rapidjson::Value(metrics.disk_quota_bytes), allocator);
|
||||
root.AddMember("disk_used_bytes", rapidjson::Value(metrics.disk_used_bytes), allocator);
|
||||
|
||||
auto mem_used_rate = 0.0;
|
||||
if (mem_metrics.mem_quota_bytes > 0) {
|
||||
if (metrics.mem_quota_bytes > 0) {
|
||||
mem_used_rate =
|
||||
std::round(double(mem_metrics.mem_used_bytes) / double(mem_metrics.mem_quota_bytes) * 100.0) /
|
||||
100.0;
|
||||
std::round(double(metrics.mem_used_bytes) / double(metrics.mem_quota_bytes) * 100.0) / 100.0;
|
||||
}
|
||||
auto disk_used_rate = 0.0;
|
||||
if (metrics.disk_quota_bytes > 0) {
|
||||
|
|
|
|||
|
|
@ -28,14 +28,11 @@
|
|||
|
||||
namespace starrocks {
|
||||
|
||||
class LocalDiskCacheEngine;
|
||||
class LocalMemCacheEngine;
|
||||
|
||||
class LocalCacheEngine;
|
||||
// TODO: support mem metrics
|
||||
class DataCacheAction : public HttpHandler {
|
||||
public:
|
||||
explicit DataCacheAction(LocalDiskCacheEngine* disk_cache, LocalMemCacheEngine* mem_cache)
|
||||
: _disk_cache(disk_cache), _mem_cache(mem_cache) {}
|
||||
explicit DataCacheAction(LocalCacheEngine* local_cache) : _local_cache(local_cache) {}
|
||||
~DataCacheAction() override = default;
|
||||
|
||||
void handle(HttpRequest* req) override;
|
||||
|
|
@ -47,8 +44,7 @@ private:
|
|||
void _handle_app_stat(HttpRequest* req);
|
||||
void _handle_error(HttpRequest* req, const std::string& error_msg);
|
||||
|
||||
LocalDiskCacheEngine* _disk_cache;
|
||||
LocalMemCacheEngine* _mem_cache;
|
||||
LocalCacheEngine* _local_cache;
|
||||
};
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@
|
|||
#include "agent/agent_common.h"
|
||||
#include "agent/agent_server.h"
|
||||
#include "cache/datacache.h"
|
||||
#include "cache/mem_cache/page_cache.h"
|
||||
#include "cache/object_cache/page_cache.h"
|
||||
#include "common/configbase.h"
|
||||
#include "common/status.h"
|
||||
#include "exec/workgroup/scan_executor.h"
|
||||
|
|
@ -115,7 +115,7 @@ Status UpdateConfigAction::update_config(const std::string& name, const std::str
|
|||
return Status::OK();
|
||||
});
|
||||
_config_callback.emplace("datacache_mem_size", [&]() -> Status {
|
||||
LocalMemCacheEngine* cache = DataCache::GetInstance()->local_mem_cache();
|
||||
LocalCacheEngine* cache = DataCache::GetInstance()->local_mem_cache();
|
||||
if (cache == nullptr || !cache->is_initialized()) {
|
||||
return Status::InternalError("Local cache is not initialized");
|
||||
}
|
||||
|
|
@ -127,10 +127,10 @@ Status UpdateConfigAction::update_config(const std::string& name, const std::str
|
|||
LOG(WARNING) << "Failed to update datacache mem size";
|
||||
return st;
|
||||
}
|
||||
return cache->update_mem_quota(mem_size);
|
||||
return cache->update_mem_quota(mem_size, true);
|
||||
});
|
||||
_config_callback.emplace("datacache_disk_size", [&]() -> Status {
|
||||
LocalDiskCacheEngine* cache = DataCache::GetInstance()->local_disk_cache();
|
||||
LocalCacheEngine* cache = DataCache::GetInstance()->local_disk_cache();
|
||||
if (cache == nullptr || !cache->is_initialized()) {
|
||||
return Status::InternalError("Local cache is not initialized");
|
||||
}
|
||||
|
|
@ -149,7 +149,7 @@ Status UpdateConfigAction::update_config(const std::string& name, const std::str
|
|||
return cache->update_disk_spaces(spaces);
|
||||
});
|
||||
_config_callback.emplace("datacache_inline_item_count_limit", [&]() -> Status {
|
||||
LocalDiskCacheEngine* cache = DataCache::GetInstance()->local_disk_cache();
|
||||
LocalCacheEngine* cache = DataCache::GetInstance()->local_disk_cache();
|
||||
if (cache == nullptr || !cache->is_initialized()) {
|
||||
return Status::InternalError("Local cache is not initialized");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -125,7 +125,7 @@ Status CacheInputStream::_read_from_cache(const int64_t offset, const int64_t si
|
|||
Status res;
|
||||
int64_t read_local_cache_ns = 0;
|
||||
BlockBuffer block;
|
||||
DiskCacheReadOptions options;
|
||||
ReadCacheOptions options;
|
||||
size_t read_size = 0;
|
||||
{
|
||||
options.use_adaptor = _enable_cache_io_adaptor;
|
||||
|
|
@ -153,13 +153,14 @@ Status CacheInputStream::_read_from_cache(const int64_t offset, const int64_t si
|
|||
read_size = block_size;
|
||||
|
||||
if (res.ok() && _enable_populate_cache) {
|
||||
DiskCacheWriteOptions write_options;
|
||||
write_options.async = _enable_async_populate_mode;
|
||||
write_options.priority = _priority;
|
||||
write_options.ttl_seconds = _ttl_seconds;
|
||||
write_options.frequency = _frequency;
|
||||
write_options.allow_zero_copy = true;
|
||||
_write_cache(block_offset, block.buffer, &write_options);
|
||||
WriteCacheOptions options;
|
||||
options.async = _enable_async_populate_mode;
|
||||
options.evict_probability = _datacache_evict_probability;
|
||||
options.priority = _priority;
|
||||
options.ttl_seconds = _ttl_seconds;
|
||||
options.frequency = _frequency;
|
||||
options.allow_zero_copy = true;
|
||||
_write_cache(block_offset, block.buffer, &options);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -202,7 +203,7 @@ Status CacheInputStream::_read_from_cache(const int64_t offset, const int64_t si
|
|||
return res;
|
||||
}
|
||||
|
||||
Status CacheInputStream::_read_peer_cache(off_t offset, size_t size, IOBuffer* iobuf, DiskCacheReadOptions* options) {
|
||||
Status CacheInputStream::_read_peer_cache(off_t offset, size_t size, IOBuffer* iobuf, ReadCacheOptions* options) {
|
||||
options->remote_host = _peer_host;
|
||||
options->remote_port = _peer_port;
|
||||
return _cache->read_buffer_from_remote_cache(_cache_key, offset, size, iobuf, options);
|
||||
|
|
@ -444,8 +445,9 @@ void CacheInputStream::_populate_to_cache(const char* p, int64_t offset, int64_t
|
|||
int64_t end = std::min((offset + count + _block_size - 1) / _block_size * _block_size, _size);
|
||||
p -= (offset - begin);
|
||||
auto f = [sb, this](const char* buf, size_t off, size_t size) {
|
||||
DiskCacheWriteOptions options;
|
||||
WriteCacheOptions options;
|
||||
options.async = _enable_async_populate_mode;
|
||||
options.evict_probability = _datacache_evict_probability;
|
||||
options.priority = _priority;
|
||||
options.ttl_seconds = _ttl_seconds;
|
||||
options.frequency = _frequency;
|
||||
|
|
@ -472,7 +474,7 @@ void CacheInputStream::_populate_to_cache(const char* p, int64_t offset, int64_t
|
|||
return;
|
||||
}
|
||||
|
||||
void CacheInputStream::_write_cache(int64_t offset, const IOBuffer& iobuf, DiskCacheWriteOptions* options) {
|
||||
void CacheInputStream::_write_cache(int64_t offset, const IOBuffer& iobuf, WriteCacheOptions* options) {
|
||||
DCHECK(offset % _block_size == 0);
|
||||
if (_already_populated_blocks.contains(offset / _block_size)) {
|
||||
// Already populate in CacheInputStream's lifecycle, ignore this time
|
||||
|
|
|
|||
|
|
@ -17,8 +17,8 @@
|
|||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "cache/disk_cache/block_cache.h"
|
||||
#include "cache/disk_cache/io_buffer.h"
|
||||
#include "cache/block_cache/block_cache.h"
|
||||
#include "cache/block_cache/io_buffer.h"
|
||||
#include "io/shared_buffered_input_stream.h"
|
||||
|
||||
namespace starrocks::io {
|
||||
|
|
@ -76,6 +76,8 @@ public:
|
|||
|
||||
void set_enable_cache_io_adaptor(bool v) { _enable_cache_io_adaptor = v; }
|
||||
|
||||
void set_datacache_evict_probability(int32_t v) { _datacache_evict_probability = v; }
|
||||
|
||||
void set_priority(const int8_t priority) { _priority = priority; }
|
||||
|
||||
void set_frequency(const int8_t frequency) { _frequency = frequency; }
|
||||
|
|
@ -106,9 +108,9 @@ protected:
|
|||
virtual Status _read_blocks_from_remote(const int64_t offset, const int64_t size, char* out);
|
||||
Status _read_from_cache(const int64_t offset, const int64_t size, const int64_t block_offset,
|
||||
const int64_t block_size, char* out);
|
||||
Status _read_peer_cache(off_t offset, size_t size, IOBuffer* iobuf, DiskCacheReadOptions* options);
|
||||
Status _read_peer_cache(off_t offset, size_t size, IOBuffer* iobuf, ReadCacheOptions* options);
|
||||
void _populate_to_cache(const char* src, int64_t offset, int64_t count, const SharedBufferPtr& sb);
|
||||
void _write_cache(int64_t offset, const IOBuffer& iobuf, DiskCacheWriteOptions* options);
|
||||
void _write_cache(int64_t offset, const IOBuffer& iobuf, WriteCacheOptions* options);
|
||||
|
||||
void _deduplicate_shared_buffer(const SharedBufferPtr& sb);
|
||||
bool _can_ignore_populate_error(const Status& status) const;
|
||||
|
|
@ -126,6 +128,7 @@ protected:
|
|||
bool _enable_async_populate_mode = false;
|
||||
bool _enable_block_buffer = false;
|
||||
bool _enable_cache_io_adaptor = false;
|
||||
int32_t _datacache_evict_probability = 100;
|
||||
|
||||
std::string _peer_host;
|
||||
int32_t _peer_port = 0;
|
||||
|
|
|
|||
|
|
@ -149,10 +149,6 @@ public:
|
|||
operator int32_t() const { return static_cast<int32_t>(operator int64_t()); }
|
||||
|
||||
operator size_t() const { return static_cast<size_t>(operator int64_t()); }
|
||||
#if defined(__APPLE__)
|
||||
// Disambiguate C-style casts to uint64_t on libc++
|
||||
operator uint64_t() const { return static_cast<uint64_t>(operator int64_t()); }
|
||||
#endif
|
||||
|
||||
operator float() const { return (float)operator double(); }
|
||||
|
||||
|
|
|
|||
|
|
@ -44,13 +44,6 @@ template <>
|
|||
struct unsigned_type<int128_t> {
|
||||
using type = uint128_t;
|
||||
};
|
||||
// On macOS with libc++, std::make_unsigned cannot be specialized for user types.
|
||||
// Provide an explicit mapping for int256_t to itself, which is sufficient for
|
||||
// decimal operations that only require absolute value and arithmetic.
|
||||
template <>
|
||||
struct unsigned_type<int256_t> {
|
||||
using type = int256_t;
|
||||
};
|
||||
|
||||
template <typename T, bool check_overflow>
|
||||
class DecimalV3Arithmetics {
|
||||
|
|
@ -302,8 +295,7 @@ public:
|
|||
|
||||
if constexpr (rule == ROUND_HALF_UP || rule == ROUND_HALF_EVEN) {
|
||||
//TODO(by satanson): ROUND_HALF_UP is different from ROUND_HALF_EVEN
|
||||
auto abs_remainder = (remainder >= 0) ? remainder : -remainder;
|
||||
need_round = abs_remainder >= (divisor >> 1);
|
||||
need_round = std::abs(remainder) >= (divisor >> 1);
|
||||
} else if constexpr (rule == ROUND_FLOOR) {
|
||||
need_round = remainder > 0 && quotient > 0;
|
||||
} else if constexpr (rule == ROUND_CEILING) {
|
||||
|
|
|
|||
|
|
@ -241,6 +241,7 @@ Status GlobalEnv::_init_mem_tracker() {
|
|||
_consistency_mem_tracker =
|
||||
regist_tracker(MemTrackerType::CONSISTENCY, consistency_mem_limit, process_mem_tracker());
|
||||
_datacache_mem_tracker = regist_tracker(MemTrackerType::DATACACHE, -1, process_mem_tracker());
|
||||
_poco_connection_pool_mem_tracker = regist_tracker(MemTrackerType::POCO_CONNECTION_POOL, -1, process_mem_tracker());
|
||||
_replication_mem_tracker = regist_tracker(MemTrackerType::REPLICATION, -1, process_mem_tracker());
|
||||
|
||||
MemChunkAllocator::init_metrics();
|
||||
|
|
|
|||
|
|
@ -160,6 +160,7 @@ public:
|
|||
MemTracker* consistency_mem_tracker() { return _consistency_mem_tracker.get(); }
|
||||
MemTracker* replication_mem_tracker() { return _replication_mem_tracker.get(); }
|
||||
MemTracker* datacache_mem_tracker() { return _datacache_mem_tracker.get(); }
|
||||
MemTracker* poco_connection_pool_mem_tracker() { return _poco_connection_pool_mem_tracker.get(); }
|
||||
MemTracker* jemalloc_metadata_traker() { return _jemalloc_metadata_tracker.get(); }
|
||||
std::shared_ptr<MemTracker> get_mem_tracker_by_type(MemTrackerType type);
|
||||
std::vector<std::shared_ptr<MemTracker>> mem_trackers() const;
|
||||
|
|
@ -234,6 +235,9 @@ private:
|
|||
// The memory used for datacache
|
||||
std::shared_ptr<MemTracker> _datacache_mem_tracker;
|
||||
|
||||
// The memory used for poco connection pool
|
||||
std::shared_ptr<MemTracker> _poco_connection_pool_mem_tracker;
|
||||
|
||||
std::map<MemTrackerType, std::shared_ptr<MemTracker>> _mem_tracker_map;
|
||||
};
|
||||
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue