[Enhancement] support column zero copy read from page cache (#62331)

Signed-off-by: stdpain <drfeng08@gmail.com>
This commit is contained in:
stdpain 2025-09-08 14:17:12 +08:00 committed by GitHub
parent 726fa67c3f
commit 6c0693fbf6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
215 changed files with 1916 additions and 1313 deletions

View File

@ -767,16 +767,15 @@ endif()
# Debug information is stored as dwarf2 to be as compatible as possible
# -Werror: compile warnings should be errors when using the toolchain compiler.
# Only enable for debug builds because this is what we test in pre-commit tests.
set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -ggdb -O0 -gdwarf-4 -DDEBUG")
set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -ggdb -O0 -gdwarf-5 -DDEBUG")
# For CMAKE_BUILD_TYPE=Release
# -O3: Enable all compiler optimizations
# -DNDEBUG: Turn off dchecks/asserts/debug only code.
# -gdwarf-4: Debug information is stored as dwarf2 to be as compatible as possible
set(CXX_FLAGS_RELEASE "${CXX_GCC_FLAGS} -O3 -gdwarf-4 -DNDEBUG")
SET(CXX_FLAGS_ASAN "${CXX_GCC_FLAGS} -ggdb3 -O0 -gdwarf-4 -fsanitize=address -DADDRESS_SANITIZER")
SET(CXX_FLAGS_LSAN "${CXX_GCC_FLAGS} -ggdb3 -O0 -gdwarf-4 -fsanitize=leak -DLEAK_SANITIZER")
SET(CXX_FLAGS_ASAN "${CXX_GCC_FLAGS} -ggdb3 -O0 -gdwarf-5 -fsanitize=address -DADDRESS_SANITIZER")
SET(CXX_FLAGS_LSAN "${CXX_GCC_FLAGS} -ggdb3 -O0 -gdwarf-5 -fsanitize=leak -DLEAK_SANITIZER")
# Set the flags to the undefined behavior sanitizer, also known as "ubsan"
# Turn on sanitizer and debug symbols to get stack traces:

View File

@ -21,6 +21,7 @@
#include "fmt/format.h"
#include "gen_cpp/AgentService_types.h"
#include "gutil/strings/join.h"
#include "runtime/exec_env.h"
#include "storage/data_dir.h"
#include "storage/replication_txn_manager.h"
#include "storage/storage_engine.h"

View File

@ -22,6 +22,7 @@
#include "common/status.h"
#include "gutil/strings/split.h"
#include "gutil/strings/strip.h"
#include "runtime/exec_env.h"
#include "storage/options.h"
#include "util/parse_util.h"

View File

@ -23,7 +23,7 @@ namespace starrocks {
class LRUCacheEngine final : public LocalCacheEngine {
public:
LRUCacheEngine() = default;
virtual ~LRUCacheEngine() override = default;
~LRUCacheEngine() override = default;
Status init(const MemCacheOptions& options);
bool is_initialized() const override { return _initialized.load(std::memory_order_relaxed); }

View File

@ -44,9 +44,13 @@
namespace starrocks {
std::atomic<size_t> StoragePageCacheMetrics::returned_page_handle_count{};
std::atomic<size_t> StoragePageCacheMetrics::released_page_handle_count{};
METRIC_DEFINE_UINT_GAUGE(page_cache_lookup_count, MetricUnit::OPERATIONS);
METRIC_DEFINE_UINT_GAUGE(page_cache_hit_count, MetricUnit::OPERATIONS);
METRIC_DEFINE_UINT_GAUGE(page_cache_capacity, MetricUnit::BYTES);
METRIC_DEFINE_UINT_GAUGE(page_cache_pinned_count, MetricUnit::BYTES);
void StoragePageCache::init_metrics() {
StarRocksMetrics::instance()->metrics()->register_metric("page_cache_lookup_count", &page_cache_lookup_count);
@ -60,6 +64,10 @@ void StoragePageCache::init_metrics() {
StarRocksMetrics::instance()->metrics()->register_metric("page_cache_capacity", &page_cache_capacity);
StarRocksMetrics::instance()->metrics()->register_hook("page_cache_capacity",
[this]() { page_cache_capacity.set_value(get_capacity()); });
StarRocksMetrics::instance()->metrics()->register_metric("page_cache_pinned_count", &page_cache_pinned_count);
StarRocksMetrics::instance()->metrics()->register_hook(
"page_cache_pinned_count", [this]() { page_cache_pinned_count.set_value(get_pinned_count()); });
}
void StoragePageCache::prune() {
@ -92,12 +100,17 @@ bool StoragePageCache::adjust_capacity(int64_t delta, size_t min_capacity) {
return true;
}
size_t StoragePageCache::get_pinned_count() const {
return StoragePageCacheMetrics::returned_page_handle_count - StoragePageCacheMetrics::released_page_handle_count;
}
bool StoragePageCache::lookup(const std::string& key, PageCacheHandle* handle) {
ObjectCacheHandle* obj_handle = nullptr;
Status st = _cache->lookup(key, &obj_handle);
if (!st.ok()) {
return false;
}
StoragePageCacheMetrics::returned_page_handle_count++;
*handle = PageCacheHandle(_cache, obj_handle);
return true;
}
@ -124,6 +137,7 @@ Status StoragePageCache::insert(const std::string& key, std::vector<uint8_t>* da
Status st = _cache->insert(key, (void*)data, mem_size, deleter, &obj_handle, opts);
if (st.ok()) {
*handle = PageCacheHandle(_cache, obj_handle);
StoragePageCacheMetrics::returned_page_handle_count++;
}
return st;
}
@ -134,6 +148,7 @@ Status StoragePageCache::insert(const std::string& key, void* data, int64_t size
Status st = _cache->insert(key, data, size, deleter, &obj_handle, opts);
if (st.ok()) {
*handle = PageCacheHandle(_cache, obj_handle);
StoragePageCacheMetrics::returned_page_handle_count++;
}
return st;
}

View File

@ -34,25 +34,25 @@
#pragma once
#include <memory>
#include <string>
#include <utility>
#include "cache/datacache.h"
#include "gutil/macros.h" // for DISALLOW_COPY
#include "runtime/current_thread.h"
#include "runtime/exec_env.h"
#include "util/defer_op.h"
namespace starrocks {
class PageCacheHandle;
class MemTracker;
class ObjectCacheWriteOptions;
struct ObjectCacheWriteOptions;
// Page cache min size is 256MB
static constexpr int64_t kcacheMinSize = 268435456;
struct StoragePageCacheMetrics {
static std::atomic<size_t> returned_page_handle_count;
static std::atomic<size_t> released_page_handle_count;
};
// Wrapper around Cache, and used for cache page of column datas in Segment.
// TODO(zc): We should add some metric to see cache hit/miss rate.
class StoragePageCache {
@ -110,6 +110,10 @@ public:
bool is_initialized() const { return _initialized.load(std::memory_order_relaxed); }
bool available() const { return is_initialized() && _cache->mem_cache_available(); }
// get the number of pinned pages in the page cache
// used for metrics
size_t get_pinned_count() const;
private:
LocalCacheEngine* _cache = nullptr;
std::atomic<bool> _initialized = false;
@ -124,6 +128,7 @@ public:
PageCacheHandle(LocalCacheEngine* cache, ObjectCacheHandle* handle) : _cache(cache), _handle(handle) {}
~PageCacheHandle() {
if (_handle != nullptr) {
StoragePageCacheMetrics::released_page_handle_count++;
_cache->release(_handle);
}
}

View File

@ -35,7 +35,7 @@ size_t AdaptiveNullableColumn::null_count() const {
if (!_has_null) {
return 0;
}
return SIMD::count_nonzero(_null_column->get_data());
return SIMD::count_nonzero(_null_column->immutable_data());
}
}
}
@ -53,7 +53,7 @@ size_t AdaptiveNullableColumn::null_count(size_t offset, size_t count) const {
if (!_has_null) {
return 0;
}
return SIMD::count_nonzero(_null_column->get_data());
return SIMD::count_nonzero(_null_column->immutable_data());
}
}
}
@ -271,7 +271,7 @@ void AdaptiveNullableColumn::serialize_batch(uint8_t* dst, Buffer<uint32_t>& sli
uint32_t max_one_row_size) const {
materialized_nullable();
_data_column->serialize_batch_with_null_masks(dst, slice_sizes, chunk_size, max_one_row_size,
_null_column->get_data().data(), _has_null);
_null_column->immutable_data().data(), _has_null);
}
const uint8_t* AdaptiveNullableColumn::deserialize_and_append(const uint8_t* pos) {

View File

@ -86,7 +86,7 @@ public:
AdaptiveNullableColumn(const AdaptiveNullableColumn& rhs) { CHECK(false) << "unimplemented"; }
AdaptiveNullableColumn(AdaptiveNullableColumn&& rhs) { CHECK(false) << "unimplemented"; }
AdaptiveNullableColumn(AdaptiveNullableColumn&& rhs) noexcept { CHECK(false) << "unimplemented"; }
AdaptiveNullableColumn& operator=(const AdaptiveNullableColumn& rhs) {
AdaptiveNullableColumn tmp(rhs);
@ -94,7 +94,7 @@ public:
return *this;
}
AdaptiveNullableColumn& operator=(AdaptiveNullableColumn&& rhs) {
AdaptiveNullableColumn& operator=(AdaptiveNullableColumn&& rhs) noexcept {
AdaptiveNullableColumn tmp(std::move(rhs));
this->swap_column(tmp);
return *this;
@ -160,7 +160,7 @@ public:
return false;
}
case State::kMaterialized: {
return _has_null && _null_column->get_data()[index];
return _has_null && _null_column->immutable_data()[index];
}
default: {
__builtin_unreachable();
@ -346,7 +346,7 @@ public:
uint32_t serialize_size(size_t idx) const override {
materialized_nullable();
if (_null_column->get_data()[idx]) {
if (_null_column->immutable_data()[idx]) {
return sizeof(uint8_t);
}
return sizeof(uint8_t) + _data_column->serialize_size(idx);
@ -458,10 +458,10 @@ public:
// however, this may is not user want because once adaptive nullable column materialized,
// its performance will be degraded to nullable column. Due to the following reason, we add
// DCHECK(false) here and disable the behaviour.
const NullData& immutable_null_column_data() const {
const ImmutableNullData immutable_null_column_data() const {
DCHECK(false);
materialized_nullable();
return _null_column->get_data();
return _null_column->immutable_data();
}
Column* mutable_data_column() {

View File

@ -29,7 +29,8 @@
namespace starrocks {
void ArrayColumn::check_or_die() const {
CHECK_EQ(_offsets->get_data().back(), _elements->size());
const auto offsets = _offsets->immutable_data();
CHECK_EQ(offsets.back(), _elements->size());
DCHECK(_elements->is_nullable());
_offsets->check_or_die();
_elements->check_or_die();
@ -60,15 +61,14 @@ uint8_t* ArrayColumn::mutable_raw_data() {
}
size_t ArrayColumn::byte_size(size_t from, size_t size) const {
const auto offsets = _offsets->immutable_data();
DCHECK_LE(from + size, this->size()) << "Range error";
return _elements->byte_size(_offsets->get_data()[from],
_offsets->get_data()[from + size] - _offsets->get_data()[from]) +
_offsets->byte_size(from, size);
return _elements->byte_size(offsets[from], offsets[from + size] - offsets[from]) + _offsets->byte_size(from, size);
}
size_t ArrayColumn::byte_size(size_t idx) const {
return _elements->byte_size(_offsets->get_data()[idx], _offsets->get_data()[idx + 1]) +
sizeof(_offsets->get_data()[idx]);
const auto offsets = _offsets->immutable_data();
return _elements->byte_size(offsets[idx], offsets[idx + 1]) + sizeof(offsets[idx]);
}
void ArrayColumn::reserve(size_t n) {
@ -109,14 +109,14 @@ void ArrayColumn::append_array_element(const Column& elem, size_t null_elem) {
void ArrayColumn::append(const Column& src, size_t offset, size_t count) {
const auto& array_column = down_cast<const ArrayColumn&>(src);
const OffsetColumn& src_offsets = array_column.offsets();
size_t src_offset = src_offsets.get_data()[offset];
size_t src_count = src_offsets.get_data()[offset + count] - src_offset;
const auto src_offsets = array_column.offsets().immutable_data();
size_t src_offset = src_offsets[offset];
size_t src_count = src_offsets[offset + count] - src_offset;
_elements->append(array_column.elements(), src_offset, src_count);
for (size_t i = offset; i < offset + count; i++) {
uint32_t l = src_offsets.get_data()[i + 1] - src_offsets.get_data()[i];
uint32_t l = src_offsets[i + 1] - src_offsets[i];
_offsets->append(_offsets->get_data().back() + l);
}
}
@ -180,12 +180,12 @@ void ArrayColumn::fill_default(const Filter& filter) {
void ArrayColumn::update_rows(const Column& src, const uint32_t* indexes) {
const auto& array_column = down_cast<const ArrayColumn&>(src);
const OffsetColumn& src_offsets = array_column.offsets();
const auto src_offsets = array_column.offsets().immutable_data();
auto& offsets = _offsets->get_data();
size_t replace_num = src.size();
bool need_resize = false;
for (size_t i = 0; i < replace_num; ++i) {
if (_offsets->get_data()[indexes[i] + 1] - _offsets->get_data()[indexes[i]] !=
src_offsets.get_data()[i + 1] - src_offsets.get_data()[i]) {
if (offsets[indexes[i] + 1] - offsets[indexes[i]] != src_offsets[i + 1] - src_offsets[i]) {
need_resize = true;
break;
}
@ -194,8 +194,8 @@ void ArrayColumn::update_rows(const Column& src, const uint32_t* indexes) {
if (!need_resize) {
Buffer<uint32_t> element_idxes;
for (size_t i = 0; i < replace_num; ++i) {
size_t element_count = src_offsets.get_data()[i + 1] - src_offsets.get_data()[i];
size_t element_offset = _offsets->get_data()[indexes[i]];
size_t element_count = src_offsets[i + 1] - src_offsets[i];
size_t element_offset = offsets[indexes[i]];
for (size_t j = 0; j < element_count; j++) {
element_idxes.emplace_back(element_offset + j);
}
@ -210,7 +210,7 @@ void ArrayColumn::update_rows(const Column& src, const uint32_t* indexes) {
new_array_column->append(src, i, 1);
idx_begin = indexes[i] + 1;
}
int64_t remain_count = _offsets->size() - idx_begin - 1;
int64_t remain_count = offsets.size() - idx_begin - 1;
if (remain_count > 0) {
new_array_column->append(*this, idx_begin, remain_count);
}
@ -233,8 +233,10 @@ void ArrayColumn::remove_first_n_values(size_t count) {
}
uint32_t ArrayColumn::serialize(size_t idx, uint8_t* pos) const {
uint32_t offset = _offsets->get_data()[idx];
uint32_t array_size = _offsets->get_data()[idx + 1] - offset;
const auto offsets = _offsets->immutable_data();
uint32_t offset = offsets[idx];
uint32_t array_size = offsets[idx + 1] - offset;
strings::memcpy_inlined(pos, &array_size, sizeof(array_size));
size_t ser_size = sizeof(array_size);
@ -273,8 +275,9 @@ uint32_t ArrayColumn::max_one_element_serialize_size() const {
}
uint32_t ArrayColumn::serialize_size(size_t idx) const {
uint32_t offset = _offsets->get_data()[idx];
uint32_t array_size = _offsets->get_data()[idx + 1] - offset;
const auto offsets = _offsets->immutable_data();
uint32_t offset = offsets[idx];
uint32_t array_size = offsets[idx + 1] - offset;
uint32_t ser_size = sizeof(array_size);
for (size_t i = 0; i < array_size; ++i) {
@ -386,13 +389,14 @@ size_t ArrayColumn::filter_range(const Filter& filter, size_t from, size_t to) {
int ArrayColumn::compare_at(size_t left, size_t right, const Column& right_column, int nan_direction_hint) const {
const auto& rhs = down_cast<const ArrayColumn&>(right_column);
const auto offsets = _offsets->immutable_data();
size_t lhs_offset = _offsets->get_data()[left];
size_t lhs_size = _offsets->get_data()[left + 1] - lhs_offset;
size_t lhs_offset = offsets[left];
size_t lhs_size = offsets[left + 1] - lhs_offset;
const OffsetColumn& rhs_offsets = rhs.offsets();
size_t rhs_offset = rhs_offsets.get_data()[right];
size_t rhs_size = rhs_offsets.get_data()[right + 1] - rhs_offset;
const auto rhs_offsets = rhs.offsets().immutable_data();
size_t rhs_offset = rhs_offsets[right];
size_t rhs_size = rhs_offsets[right + 1] - rhs_offset;
size_t min_size = std::min(lhs_size, rhs_size);
for (size_t i = 0; i < min_size; ++i) {
int res = _elements->compare_at(lhs_offset + i, rhs_offset + i, rhs.elements(), nan_direction_hint);
@ -407,11 +411,14 @@ int ArrayColumn::compare_at(size_t left, size_t right, const Column& right_colum
int ArrayColumn::equals(size_t left, const Column& rhs, size_t right, bool safe_eq) const {
const auto& rhs_array = down_cast<const ArrayColumn&>(rhs);
size_t lhs_offset = _offsets->get_data()[left];
size_t lhs_end = _offsets->get_data()[left + 1];
const auto offsets = _offsets->immutable_data();
const auto rhs_offsets = rhs_array.offsets().immutable_data();
size_t rhs_offset = rhs_array._offsets->get_data()[right];
size_t rhs_end = rhs_array._offsets->get_data()[right + 1];
size_t lhs_offset = offsets[left];
size_t lhs_end = offsets[left + 1];
size_t rhs_offset = rhs_offsets[right];
size_t rhs_end = rhs_offsets[right + 1];
if (lhs_end - lhs_offset != rhs_end - rhs_offset) {
return EQUALS_FALSE;
@ -451,9 +458,11 @@ void ArrayColumn::compare_column(const Column& rhs_column, std::vector<int8_t>*
void ArrayColumn::fnv_hash_at(uint32_t* hash, uint32_t idx) const {
DCHECK_LT(idx + 1, _offsets->size()) << "idx + 1 should be less than offsets size";
uint32_t offset = _offsets->get_data()[idx];
const auto offsets = _offsets->immutable_data();
uint32_t offset = offsets[idx];
// Should use size_t not uint32_t for compatible
size_t array_size = _offsets->get_data()[idx + 1] - offset;
size_t array_size = offsets[idx + 1] - offset;
*hash = HashUtil::fnv_hash(&array_size, sizeof(array_size), *hash);
for (size_t i = 0; i < array_size; ++i) {
@ -464,9 +473,11 @@ void ArrayColumn::fnv_hash_at(uint32_t* hash, uint32_t idx) const {
void ArrayColumn::crc32_hash_at(uint32_t* hash, uint32_t idx) const {
DCHECK_LT(idx + 1, _offsets->size()) << "idx + 1 should be less than offsets size";
uint32_t offset = _offsets->get_data()[idx];
const auto offsets = _offsets->immutable_data();
uint32_t offset = offsets[idx];
// Should use size_t not uint32_t for compatible
size_t array_size = _offsets->get_data()[idx + 1] - offset;
size_t array_size = offsets[idx + 1] - offset;
*hash = HashUtil::zlib_crc_hash(&array_size, static_cast<uint32_t>(sizeof(array_size)), *hash);
for (size_t i = 0; i < array_size; ++i) {
@ -492,22 +503,24 @@ void ArrayColumn::crc32_hash(uint32_t* hash, uint32_t from, uint32_t to) const {
}
int64_t ArrayColumn::xor_checksum(uint32_t from, uint32_t to) const {
const auto offsets = _offsets->immutable_data();
// The XOR of ArrayColumn
// XOR the offsets column and elements column
int64_t xor_checksum = 0;
for (size_t idx = from; idx < to; ++idx) {
int64_t array_size = _offsets->get_data()[idx + 1] - _offsets->get_data()[idx];
int64_t array_size = offsets[idx + 1] - offsets[idx];
xor_checksum ^= array_size;
}
uint32_t element_from = _offsets->get_data()[from];
uint32_t element_to = _offsets->get_data()[to];
uint32_t element_from = offsets[from];
uint32_t element_to = offsets[to];
return (xor_checksum ^ _elements->xor_checksum(element_from, element_to));
}
void ArrayColumn::put_mysql_row_buffer(MysqlRowBuffer* buf, size_t idx, bool is_binary_protocol) const {
DCHECK_LT(idx, size());
const size_t offset = _offsets->get_data()[idx];
const size_t array_size = _offsets->get_data()[idx + 1] - offset;
const auto offsets = _offsets->immutable_data();
const size_t offset = offsets[idx];
const size_t array_size = offsets[idx + 1] - offset;
buf->begin_push_array();
auto* elements = _elements.get();
@ -523,8 +536,9 @@ void ArrayColumn::put_mysql_row_buffer(MysqlRowBuffer* buf, size_t idx, bool is_
Datum ArrayColumn::get(size_t idx) const {
DCHECK_LT(idx + 1, _offsets->size()) << "idx + 1 should be less than offsets size";
size_t offset = _offsets->get_data()[idx];
size_t array_size = _offsets->get_data()[idx + 1] - offset;
const auto offsets = _offsets->immutable_data();
size_t offset = offsets[idx];
size_t array_size = offsets[idx + 1] - offset;
DatumArray res(array_size);
for (size_t i = 0; i < array_size; ++i) {
@ -535,8 +549,9 @@ Datum ArrayColumn::get(size_t idx) const {
std::pair<size_t, size_t> ArrayColumn::get_element_offset_size(size_t idx) const {
DCHECK_LT(idx + 1, _offsets->size());
size_t offset = _offsets->get_data()[idx];
size_t size = _offsets->get_data()[idx + 1] - _offsets->get_data()[idx];
const auto offsets = _offsets->immutable_data();
size_t offset = offsets[idx];
size_t size = offsets[idx + 1] - offsets[idx];
return {offset, size};
}
@ -548,7 +563,8 @@ size_t ArrayColumn::get_element_null_count(size_t idx) const {
size_t ArrayColumn::get_element_size(size_t idx) const {
DCHECK_LT(idx + 1, _offsets->size());
return _offsets->get_data()[idx + 1] - _offsets->get_data()[idx];
const auto offsets = _offsets->immutable_data();
return offsets[idx + 1] - offsets[idx];
}
bool ArrayColumn::set_null(size_t idx) {
@ -557,8 +573,9 @@ bool ArrayColumn::set_null(size_t idx) {
size_t ArrayColumn::reference_memory_usage(size_t from, size_t size) const {
DCHECK_LE(from + size, this->size()) << "Range error";
size_t start_offset = _offsets->get_data()[from];
size_t elements_num = _offsets->get_data()[from + size] - start_offset;
const auto offsets = _offsets->immutable_data();
size_t start_offset = offsets[from];
size_t elements_num = offsets[from + size] - start_offset;
return _elements->reference_memory_usage(start_offset, elements_num) + _offsets->reference_memory_usage(from, size);
}
@ -576,8 +593,9 @@ void ArrayColumn::reset_column() {
std::string ArrayColumn::debug_item(size_t idx) const {
DCHECK_LT(idx, size());
uint32_t offset = _offsets->get_data()[idx];
uint32_t array_size = _offsets->get_data()[idx + 1] - offset;
const auto offsets = _offsets->immutable_data();
uint32_t offset = offsets[idx];
uint32_t array_size = offsets[idx + 1] - offset;
std::stringstream ss;
ss << "[";
@ -629,10 +647,11 @@ size_t ArrayColumn::get_total_elements_num(const NullColumnPtr& null_column) con
DCHECK_LE(_offsets->size() - 1, null_column->size());
size_t elements_num = 0;
size_t num_rows = _offsets->size() - 1;
const auto& null_data = null_column->get_data();
const auto offsets = _offsets->immutable_data();
const auto null_data = null_column->immutable_data();
for (size_t i = 0; i < num_rows; i++) {
if (!null_data[i]) {
elements_num += _offsets->get_data()[i + 1] - _offsets->get_data()[i];
elements_num += offsets[i + 1] - offsets[i];
}
}
return elements_num;
@ -643,7 +662,7 @@ bool ArrayColumn::compare_lengths_from_offsets(const OffsetColumn& v1, const Off
const NullColumnPtr& null_column) {
[[maybe_unused]] const uint8_t* null_data = nullptr;
if constexpr (!IgnoreNull) {
null_data = null_column->get_data().data();
null_data = null_column->immutable_data().data();
}
size_t num_rows = v1.size() - 1;
@ -652,8 +671,8 @@ bool ArrayColumn::compare_lengths_from_offsets(const OffsetColumn& v1, const Off
num_rows = 1;
}
bool result = true;
const auto& offsets_v1 = v1.get_data();
const auto& offsets_v2 = v2.get_data();
const auto offsets_v1 = v1.immutable_data();
const auto offsets_v2 = v2.immutable_data();
for (size_t i = 0; i < num_rows && result; i++) {
[[maybe_unused]] uint32_t len1 =

View File

@ -136,10 +136,10 @@ size_t ArrayViewColumn::filter_range(const Filter& filter, size_t from, size_t t
int ArrayViewColumn::compare_at(size_t left, size_t right, const Column& right_column, int nan_direction_hint) const {
// @TODO support compare with ArrayColumn
const auto& rhs = down_cast<const ArrayViewColumn&>(right_column);
size_t lhs_offset = _offsets->get_data()[left];
size_t lhs_length = _lengths->get_data()[left];
size_t rhs_offset = rhs._offsets->get_data()[right];
size_t rhs_length = rhs._lengths->get_data()[right];
size_t lhs_offset = _offsets->immutable_data()[left];
size_t lhs_length = _lengths->immutable_data()[left];
size_t rhs_offset = rhs._offsets->immutable_data()[right];
size_t rhs_length = rhs._lengths->immutable_data()[right];
size_t min_size = std::min(lhs_length, rhs_length);
for (size_t i = 0; i < min_size; i++) {
@ -165,10 +165,10 @@ void ArrayViewColumn::compare_column(const Column& rhs, std::vector<int8_t>* out
int ArrayViewColumn::equals(size_t left, const Column& right_column, size_t right, bool safe_eq) const {
const auto& rhs = down_cast<const ArrayViewColumn&>(right_column);
size_t lhs_offset = _offsets->get_data()[left];
size_t lhs_length = _lengths->get_data()[left];
size_t rhs_offset = rhs._offsets->get_data()[right];
size_t rhs_length = rhs._lengths->get_data()[right];
size_t lhs_offset = _offsets->immutable_data()[left];
size_t lhs_length = _lengths->immutable_data()[left];
size_t rhs_offset = rhs._offsets->immutable_data()[right];
size_t rhs_length = rhs._lengths->immutable_data()[right];
if (lhs_length != rhs_length) {
return EQUALS_FALSE;
@ -188,8 +188,8 @@ int ArrayViewColumn::equals(size_t left, const Column& right_column, size_t righ
Datum ArrayViewColumn::get(size_t idx) const {
DCHECK_LT(idx, _offsets->size()) << "idx should be less than offsets size";
size_t offset = _offsets->get_data()[idx];
size_t length = _lengths->get_data()[idx];
size_t offset = _offsets->immutable_data()[idx];
size_t length = _lengths->immutable_data()[idx];
DatumArray res(length);
for (size_t i = 0; i < length; ++i) {
@ -229,15 +229,15 @@ void ArrayViewColumn::put_mysql_row_buffer(MysqlRowBuffer* buf, size_t idx, bool
}
size_t ArrayViewColumn::get_element_null_count(size_t idx) const {
size_t offset = _offsets->get_data()[idx];
size_t length = _lengths->get_data()[idx];
size_t offset = _offsets->immutable_data()[idx];
size_t length = _lengths->immutable_data()[idx];
auto nullable_column = down_cast<const NullableColumn*>(_elements.get());
return nullable_column->null_count(offset, length);
}
size_t ArrayViewColumn::get_element_size(size_t idx) const {
DCHECK_LT(idx, _lengths->size());
return _lengths->get_data()[idx];
return _lengths->immutable_data()[idx];
}
void ArrayViewColumn::check_or_die() const {
@ -246,16 +246,16 @@ void ArrayViewColumn::check_or_die() const {
DCHECK(_lengths);
DCHECK_EQ(_offsets->size(), _lengths->size());
for (size_t i = 0; i < _offsets->size(); i++) {
uint32_t offset = _offsets->get_data()[i];
uint32_t length = _lengths->get_data()[i];
uint32_t offset = _offsets->immutable_data()[i];
uint32_t length = _lengths->immutable_data()[i];
DCHECK_LE(offset + length, _elements->size());
}
}
std::string ArrayViewColumn::debug_item(size_t idx) const {
DCHECK_LT(idx, size());
uint32_t offset = _offsets->get_data()[idx];
uint32_t length = _lengths->get_data()[idx];
uint32_t offset = _offsets->immutable_data()[idx];
uint32_t length = _lengths->immutable_data()[idx];
std::stringstream ss;
ss << "[";
@ -300,8 +300,8 @@ ColumnPtr ArrayViewColumn::to_array_column() const {
uint32_t last_offset = 0;
size_t num_rows = _offsets->size();
for (size_t i = 0; i < num_rows; i++) {
uint32_t offset = _offsets->get_data()[i];
uint32_t length = _lengths->get_data()[i];
uint32_t offset = _offsets->immutable_data()[i];
uint32_t length = _lengths->immutable_data()[i];
// append lement
array_elements->append(*_elements, offset, length);
array_offsets->append(last_offset + length);
@ -334,9 +334,9 @@ ColumnPtr ArrayViewColumn::from_array_column(const ColumnPtr& column) {
if (column->is_nullable()) {
auto nullable_column = down_cast<const NullableColumn*>(column.get());
DCHECK(nullable_column != nullptr);
const auto& null_data = nullable_column->null_column()->get_data();
const auto null_data = nullable_column->immutable_null_column_data();
auto array_column = down_cast<const ArrayColumn*>(nullable_column->data_column().get());
const auto& array_offsets = array_column->offsets().get_data();
const auto array_offsets = array_column->offsets().immutable_data();
view_elements = array_column->elements_column();
@ -355,7 +355,7 @@ ColumnPtr ArrayViewColumn::from_array_column(const ColumnPtr& column) {
auto array_column = down_cast<const ArrayColumn*>(column.get());
view_elements = array_column->elements_column();
const auto& array_offsets = array_column->offsets().get_data();
const auto array_offsets = array_column->offsets().immutable_data();
for (size_t i = 0; i < column->size(); i++) {
uint32_t offset = array_offsets[i];

View File

@ -49,6 +49,7 @@ public:
using Container = Buffer<Slice>;
using ProxyContainer = BinaryDataProxyContainer;
using ImmContainer = BinaryDataProxyContainer;
// TODO(kks): when we create our own vector, we could let vector[-1] = 0,
// and then we don't need explicitly emplace_back zero value

View File

@ -15,6 +15,7 @@
#pragma once
#include <cstdint>
#include <span>
#include <vector>
#include "runtime/memory/column_allocator.h"
@ -24,6 +25,7 @@ namespace starrocks {
// Bytes is a special vector<uint8_t> in which the internal memory is always allocated with an additional 16 bytes,
// to make life easier with 128 bit instructions.
typedef starrocks::raw::RawVectorPad16<uint8_t, ColumnAllocator<uint8_t>> Bytes;
using Bytes = starrocks::raw::RawVectorPad16<uint8_t, ColumnAllocator<uint8_t>>;
using ImmBytes = const std::span<const uint8_t>;
} // namespace starrocks

View File

@ -67,7 +67,7 @@ StatusOr<ColumnPtr> Column::upgrade_helper_func(Ptr* col) {
}
}
bool Column::empty_null_in_complex_column(const Filter& null_data, const Buffer<uint32_t>& offsets) {
bool Column::empty_null_in_complex_column(const ImmBuffer<uint8_t>& null_data, const ImmBuffer<uint32_t>& offsets) {
DCHECK_EQ(null_data.size(), this->size());
if (!is_array() && !is_map()) {
throw std::runtime_error("empty_null_in_complex_column() only works for array and map column.");

View File

@ -15,16 +15,17 @@
#pragma once
#include <cstdint>
#include <memory>
#include <string>
#include <type_traits>
#include "column/column_visitor.h"
#include "column/column_visitor_mutable.h"
#include "column/container_resource.h"
#include "column/vectorized_fwd.h"
#include "common/cow.h"
#include "common/statusor.h"
#include "gutil/casts.h"
#include "runtime/memory/column_allocator.h"
#include "storage/delete_condition.h" // for DelCondSatisfied
#include "util/slice.h"
@ -275,6 +276,7 @@ public:
// - the count of copied integers on success.
// - -1 if this is not a numeric column.
[[nodiscard]] virtual size_t append_numbers(const void* buff, size_t length) = 0;
virtual size_t append_numbers(const ContainerResource& res) { return append_numbers(res.data(), res.length()); }
// Append |*value| |count| times, this is only used when load default value.
virtual void append_value_multiple_times(const void* value, size_t count) = 0;
@ -348,7 +350,7 @@ public:
inline size_t filter(const Filter& filter, size_t count) { return filter_range(filter, 0, count); }
// get rid of the case where the map/array is null but the map/array'elements are not empty.
bool empty_null_in_complex_column(const Filter& null_data, const Buffer<uint32_t>& offsets);
bool empty_null_in_complex_column(const ImmBuffer<uint8_t>& null_data, const ImmBuffer<uint32_t>& offsets);
// FIXME: Many derived implementation assume |to| equals to size().
virtual size_t filter_range(const Filter& filter, size_t from, size_t to) = 0;

View File

@ -58,7 +58,8 @@ void ColumnHelper::merge_two_filters(const ColumnPtr& column, Filter* __restrict
// NOTE(zc): Must use uint8_t* to enable auto-vectorized.
const auto nulls = nullable_column->null_column_data().data();
const auto datas = (down_cast<const UInt8Column*>(nullable_column->data_column().get()))->get_data().data();
const auto datas =
(down_cast<const UInt8Column*>(nullable_column->data_column().get()))->immutable_data().data();
auto num_rows = nullable_column->size();
// we treat null(1) as false(0)
for (size_t j = 0; j < num_rows; ++j) {
@ -66,7 +67,7 @@ void ColumnHelper::merge_two_filters(const ColumnPtr& column, Filter* __restrict
}
} else {
size_t num_rows = column->size();
const auto datas = as_raw_const_column<UInt8Column>(column)->get_data().data();
const auto datas = as_raw_const_column<UInt8Column>(column)->immutable_data().data();
for (size_t j = 0; j < num_rows; ++j) {
(*filter)[j] &= datas[j];
}
@ -499,7 +500,7 @@ ColumnPtr ColumnHelper::convert_time_column_from_double_to_str(const ColumnPtr&
new_data_column->reserve(size);
for (int row = 0; row < size; ++row) {
auto time = data_column->get_data()[row];
auto time = data_column->immutable_data()[row];
std::string time_str = time_str_from_double(time);
new_data_column->append(time_str);
}

View File

@ -132,8 +132,8 @@ public:
if (offset0->size() != offset1->size()) {
return false;
}
const auto& data1 = offset0->get_data();
const auto& data2 = offset1->get_data();
const auto data1 = offset0->immutable_data();
const auto data2 = offset1->immutable_data();
return std::equal(data1.begin(), data1.end(), data2.begin());
}
@ -546,7 +546,7 @@ public:
static size_t compute_bytes_size(ColumnsConstIterator const& begin, ColumnsConstIterator const& end);
template <typename T, bool avx512f>
static size_t t_filter_range(const Filter& filter, T* data, size_t from, size_t to) {
static size_t t_filter_range(const Filter& filter, T* dst_data, const T* src_data, size_t from, size_t to) {
auto start_offset = from;
auto result_offset = from;
@ -567,21 +567,21 @@ public:
// all no hit, pass
} else if (mask == 0xffffffff) {
// all hit, copy all
memmove(data + result_offset, data + start_offset, kBatchNums * data_type_size);
memmove(dst_data + result_offset, src_data + start_offset, kBatchNums * data_type_size);
result_offset += kBatchNums;
} else {
// clang-format off
#define AVX512_COPY(SHIFT, MASK, WIDTH) \
{ \
auto m = (mask >> SHIFT) & MASK; \
if (m) { \
__m512i dst; \
__m512i src = _mm512_loadu_epi## WIDTH(data + start_offset + SHIFT); \
dst = _mm512_mask_compress_epi## WIDTH(dst, m, src); \
_mm512_storeu_epi## WIDTH(data + result_offset, dst); \
result_offset += __builtin_popcount(m); \
} \
#define AVX512_COPY(SHIFT, MASK, WIDTH) \
{ \
auto m = (mask >> SHIFT) & MASK; \
if (m) { \
__m512i dst; \
__m512i src = _mm512_loadu_epi##WIDTH(src_data + start_offset + SHIFT); \
dst = _mm512_mask_compress_epi##WIDTH(dst, m, src); \
_mm512_storeu_epi##WIDTH(dst_data + result_offset, dst); \
result_offset += __builtin_popcount(m); \
} \
}
// In theory we should put k1 in clobbers.
@ -590,8 +590,8 @@ public:
{ \
auto m = (mask >> SHIFT) & MASK; \
if (m) { \
T* src = data + start_offset + SHIFT; \
T* dst = data + result_offset; \
const T* src = src_data + start_offset + SHIFT; \
T* dst = dst_data + result_offset; \
__asm__ volatile("vmovdqu" #WIDTH \
" (%[s]), %%zmm1\n" \
"kmovw %[mask], %%k1\n" \
@ -611,7 +611,7 @@ public:
} else {
phmap::priv::BitMask<uint32_t, 32> bitmask(mask);
for (auto idx : bitmask) {
*(data + result_offset++) = *(data + start_offset + idx);
*(dst_data + result_offset++) = *(src_data + start_offset + idx);
}
}
}
@ -630,14 +630,14 @@ public:
if (nibble_mask == 0) {
// skip
} else if (nibble_mask == 0xffff'ffff'ffff'ffffull) {
memmove(data + result_offset, data + start_offset, kBatchNums * data_type_size);
memmove(dst_data + result_offset, src_data + start_offset, kBatchNums * data_type_size);
result_offset += kBatchNums;
} else {
// Make each nibble only keep the highest bit 1, that is 0b1111 -> 0b1000.
nibble_mask &= 0x8888'8888'8888'8888ull;
for (; nibble_mask > 0; nibble_mask &= nibble_mask - 1) {
uint32_t index = __builtin_ctzll(nibble_mask) >> 2;
*(data + result_offset++) = *(data + start_offset + index);
*(dst_data + result_offset++) = *(src_data + start_offset + index);
}
}
@ -648,7 +648,7 @@ public:
// clang-format on
for (auto i = start_offset; i < to; ++i) {
if (filter[i]) {
*(data + result_offset) = *(data + i);
*(dst_data + result_offset) = *(src_data + i);
result_offset++;
}
}
@ -659,9 +659,18 @@ public:
template <typename T>
static size_t filter_range(const Filter& filter, T* data, size_t from, size_t to) {
if (base::CPU::instance()->has_avx512f()) {
return t_filter_range<T, true>(filter, data, from, to);
return t_filter_range<T, true>(filter, data, data, from, to);
} else {
return t_filter_range<T, false>(filter, data, from, to);
return t_filter_range<T, false>(filter, data, data, from, to);
}
}
template <typename T>
static size_t filter_range(const Filter& filter, T* dst_data, const T* src_data, size_t from, size_t to) {
if (base::CPU::instance()->has_avx512f()) {
return t_filter_range<T, true>(filter, dst_data, src_data, from, to);
} else {
return t_filter_range<T, false>(filter, dst_data, src_data, from, to);
}
}
@ -674,7 +683,7 @@ public:
static auto call_nullable_func(const Column* column, FastPath&& fast_path, SlowPath&& slow_path) {
if (column->is_nullable()) {
const auto* nullable_column = down_cast<const NullableColumn*>(column);
const auto& null_data = nullable_column->immutable_null_column_data();
const auto null_data = nullable_column->immutable_null_column_data();
const Column* data_column = nullable_column->data_column().get();
if (column->has_null()) {
return std::forward<SlowPath>(slow_path)(null_data, data_column);
@ -713,27 +722,41 @@ struct ChunkSliceTemplate {
template <LogicalType ltype>
struct GetContainer {
using ColumnType = typename RunTimeTypeTraits<ltype>::ColumnType;
static const auto& get_data(const Column* column) {
return ColumnHelper::as_raw_column<ColumnType>(column)->get_data();
static const auto get_data(const Column* column) {
return ColumnHelper::as_raw_column<ColumnType>(column)->immutable_data();
}
static const auto& get_data(const ColumnPtr& column) {
return ColumnHelper::as_raw_column<ColumnType>(column.get())->get_data();
static const auto get_data(const ColumnPtr& column) {
return ColumnHelper::as_raw_column<ColumnType>(column.get())->immutable_data();
}
};
#define GET_CONTAINER(ltype) \
template <> \
struct GetContainer<ltype> { \
static const auto& get_data(const Column* column) { \
static const auto get_data(const Column* column) { \
return ColumnHelper::as_raw_column<BinaryColumn>(column)->get_proxy_data(); \
} \
static const auto& get_data(const ColumnPtr& column) { \
static const auto get_data(const ColumnPtr& column) { \
return ColumnHelper::as_raw_column<BinaryColumn>(column)->get_proxy_data(); \
} \
};
APPLY_FOR_ALL_STRING_TYPE(GET_CONTAINER)
#undef GET_CONTAINER
#define GET_CONTAINER(ltype) \
template <> \
struct GetContainer<ltype> { \
using ColumnType = typename RunTimeTypeTraits<ltype>::ColumnType; \
static const auto get_data(const Column* column) { \
return ColumnHelper::as_raw_column<ColumnType>(column)->get_data(); \
} \
static const auto get_data(const ColumnPtr& column) { \
return ColumnHelper::as_raw_column<ColumnType>(column)->get_data(); \
} \
};
// GET_CONTAINER(TYPE_JSON)
#undef GET_CONTAINER
using ChunkSlice = ChunkSliceTemplate<ChunkUniquePtr>;
using ChunkSharedSlice = ChunkSliceTemplate<ChunkPtr>;
using SegmentedChunkSlice = ChunkSliceTemplate<SegmentedChunkPtr>;

View File

@ -0,0 +1,87 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <span>
#include "storage/rowset/page_handle_fwd.h"
namespace starrocks {
class faststring;
class ContainerResource {
public:
ContainerResource() = default;
ContainerResource(const std::shared_ptr<PageHandle>& handle, const void* data, size_t length)
: _handle(handle), _data(data), _length(length) {}
ContainerResource(const ContainerResource& other) {
this->_data = other._data;
this->_length = other._length;
this->_handle = other._handle;
}
ContainerResource(ContainerResource&& other) noexcept {
std::swap(this->_data, other._data);
std::swap(this->_length, other._length);
std::swap(this->_handle, other._handle);
}
ContainerResource& operator=(ContainerResource&& other) noexcept {
std::swap(this->_data, other._data);
std::swap(this->_length, other._length);
std::swap(this->_handle, other._handle);
return *this;
}
void acquire(const ContainerResource& other) {
reset();
_handle = other._handle;
}
template <class T>
std::span<const T> span() const {
return {reinterpret_cast<const T*>(_data), _length};
}
void reset() {
_handle.reset();
_data = nullptr;
}
bool empty() const { return _data == nullptr; }
const void* data() const { return _data; }
size_t length() const { return _length; }
void set_data(const void* data) { _data = data; }
void set_length(size_t length) { _length = length; }
template <class T>
bool is_aligned() const {
if ((uintptr_t)_data % alignof(T) == 0) {
return true;
}
return false;
}
private:
std::shared_ptr<PageHandle> _handle;
const void* _data{};
size_t _length{};
};
} // namespace starrocks

View File

@ -62,20 +62,20 @@ int DecimalV3Column<T>::scale() const {
template <typename T>
void DecimalV3Column<T>::put_mysql_row_buffer(MysqlRowBuffer* buf, size_t idx, bool is_binary_protocol) const {
auto& data = this->get_data();
const auto data = this->immutable_data();
auto s = DecimalV3Cast::to_string<T>(data[idx], _precision, _scale);
buf->push_decimal(s);
}
template <typename T>
std::string DecimalV3Column<T>::debug_item(size_t idx) const {
auto& data = this->get_data();
const auto data = this->immutable_data();
return DecimalV3Cast::to_string<T>(data[idx], _precision, _scale);
}
template <typename T>
void DecimalV3Column<T>::crc32_hash(uint32_t* hash, uint32_t from, uint32_t to) const {
const auto& data = this->get_data();
const auto data = this->immutable_data();
// When decimal-v2 columns are used as distribution keys and users try to upgrade
// decimal-v2 column to decimal-v3 by schema change, decimal128(27,9) shall be the
// only acceptable target type, so keeping result of crc32_hash on type decimal128(27,9)
@ -101,7 +101,7 @@ template <typename T>
int64_t DecimalV3Column<T>::xor_checksum(uint32_t from, uint32_t to) const {
// The XOR of DecimalV3Column
// XOR all the decimals one by one
auto& data = this->get_data();
const auto data = this->immutable_data();
int64_t xor_checksum = 0;
const T* src = reinterpret_cast<const T*>(data.data());

View File

@ -12,13 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gutil/strings/fastmem.h>
#include "column/fixed_length_column_base.h"
#include "column/column_helper.h"
#include "column/fixed_length_column.h"
#include "column/vectorized_fwd.h"
#include "common/config.h"
#include "exec/sorting/sort_helper.h"
#include "gutil/casts.h"
#include "gutil/strings/fastmem.h"
#include "gutil/strings/substitute.h"
#include "simd/gather.h"
#include "storage/decimal12.h"
#include "types/int256.h"
@ -39,22 +41,24 @@ template <typename T>
void FixedLengthColumnBase<T>::append(const Column& src, size_t offset, size_t count) {
DCHECK(this != &src);
const size_t orig_size = _data.size();
raw::stl_vector_resize_uninitialized(&_data, orig_size + count);
auto& datas = get_data();
const size_t orig_size = datas.size();
raw::stl_vector_resize_uninitialized(&datas, orig_size + count);
const T* src_data = reinterpret_cast<const T*>(src.raw_data());
strings::memcpy_inlined(_data.data() + orig_size, src_data + offset, count * sizeof(T));
strings::memcpy_inlined(datas.data() + orig_size, src_data + offset, count * sizeof(T));
}
template <typename T>
void FixedLengthColumnBase<T>::append_selective(const Column& src, const uint32_t* indexes, uint32_t from,
uint32_t size) {
DCHECK(this != &src);
indexes += from;
const size_t orig_size = _data.size();
raw::stl_vector_resize_uninitialized(&_data, orig_size + size);
auto* dest_data = _data.data() + orig_size;
indexes += from;
auto& datas = get_data();
const size_t orig_size = datas.size();
raw::stl_vector_resize_uninitialized(&datas, orig_size + size);
auto* dest_data = datas.data() + orig_size;
const T* src_data = reinterpret_cast<const T*>(src.raw_data());
SIMDGather::gather(dest_data, src_data, indexes, size);
@ -63,25 +67,59 @@ void FixedLengthColumnBase<T>::append_selective(const Column& src, const uint32_
template <typename T>
void FixedLengthColumnBase<T>::append_value_multiple_times(const Column& src, uint32_t index, uint32_t size) {
DCHECK(this != &src);
size_t orig_size = _data.size();
_data.resize(orig_size + size);
const T* src_data = reinterpret_cast<const T*>(src.raw_data());
auto& datas = get_data();
size_t orig_size = datas.size();
datas.resize(orig_size + size);
const auto& src_col = down_cast<const FixedLengthColumnBase<T>&>(src);
const auto src_datas = src_col.immutable_data();
const T* src_data = src_datas.data();
for (size_t i = 0; i < size; ++i) {
_data[orig_size + i] = src_data[index];
datas[orig_size + i] = src_data[index];
}
}
template <typename T>
size_t FixedLengthColumnBase<T>::append_numbers(const ContainerResource& res) {
if (config::enable_zero_copy_from_page_cache && empty() && _resource.empty() && _resource.is_aligned<T>()) {
DCHECK(res.length() % sizeof(ValueType) == 0);
_resource.acquire(res);
_resource.set_data(res.data());
_resource.set_length(res.length() / sizeof(ValueType));
return _resource.length();
} else {
return append_numbers(res.data(), res.length());
}
}
template <typename T>
void FixedLengthColumnBase<T>::append_default() {
auto& datas = get_data();
datas.emplace_back(DefaultValueGenerator<ValueType>::next_value());
}
template <typename T>
void FixedLengthColumnBase<T>::append_default(size_t count) {
auto& datas = get_data();
datas.resize(datas.size() + count, DefaultValueGenerator<ValueType>::next_value());
}
//TODO(fzh): optimize copy using SIMD
template <typename T>
StatusOr<ColumnPtr> FixedLengthColumnBase<T>::replicate(const Buffer<uint32_t>& offsets) {
auto dest = this->clone_empty();
auto& dest_data = down_cast<FixedLengthColumnBase<T>&>(*dest);
dest_data._data.resize(offsets.back());
auto& dest_datas = dest_data.get_data();
const auto datas = this->immutable_data();
dest_datas.resize(offsets.back());
size_t orig_size = offsets.size() - 1; // this->size() may be large than offsets->size() -1
for (auto i = 0; i < orig_size; ++i) {
for (auto j = offsets[i]; j < offsets[i + 1]; ++j) {
dest_data._data[j] = _data[i];
dest_datas[j] = datas[i];
}
}
return dest;
@ -89,21 +127,25 @@ StatusOr<ColumnPtr> FixedLengthColumnBase<T>::replicate(const Buffer<uint32_t>&
template <typename T>
void FixedLengthColumnBase<T>::fill_default(const Filter& filter) {
auto& datas = get_data();
T val = DefaultValueGenerator<T>::next_value();
for (size_t i = 0; i < filter.size(); i++) {
if (filter[i] == 1) {
_data[i] = val;
datas[i] = val;
}
}
}
template <typename T>
Status FixedLengthColumnBase<T>::fill_range(const std::vector<T>& ids, const Filter& filter) {
DCHECK_EQ(filter.size(), _data.size());
auto& datas = get_data();
DCHECK_EQ(filter.size(), datas.size());
size_t j = 0;
for (size_t i = 0; i < _data.size(); ++i) {
for (size_t i = 0; i < datas.size(); ++i) {
if (filter[i] == 1) {
_data[i] = ids[j];
datas[i] = ids[j];
++j;
}
}
@ -114,34 +156,45 @@ Status FixedLengthColumnBase<T>::fill_range(const std::vector<T>& ids, const Fil
template <typename T>
void FixedLengthColumnBase<T>::update_rows(const Column& src, const uint32_t* indexes) {
const T* src_data = reinterpret_cast<const T*>(src.raw_data());
auto& datas = get_data();
const auto& src_col = down_cast<const FixedLengthColumnBase<T>&>(src);
const auto src_datas = src_col.immutable_data();
const T* src_data = src_datas.data();
size_t replace_num = src.size();
for (uint32_t i = 0; i < replace_num; ++i) {
DCHECK_LT(indexes[i], _data.size());
_data[indexes[i]] = src_data[i];
datas[indexes[i]] = src_data[i];
}
}
template <typename T>
size_t FixedLengthColumnBase<T>::filter_range(const Filter& filter, size_t from, size_t to) {
auto size = ColumnHelper::filter_range<T>(filter, _data.data(), from, to);
this->resize(size);
// TODO: FIXME
const auto src = immutable_data();
raw::stl_vector_resize_uninitialized(&_data, src.size());
auto size = ColumnHelper::filter_range<T>(filter, _data.data(), src.data(), from, to);
_data.resize(size);
_resource.reset();
return size;
}
template <typename T>
int FixedLengthColumnBase<T>::compare_at(size_t left, size_t right, const Column& rhs, int nan_direction_hint) const {
DCHECK_LT(left, _data.size());
DCHECK_LT(right, rhs.size());
DCHECK(dynamic_cast<const FixedLengthColumnBase<T>*>(&rhs) != nullptr);
T x = _data[left];
T y = down_cast<const FixedLengthColumnBase<T>&>(rhs)._data[right];
const auto lhs_datas = this->immutable_data();
const auto rhs_datas = down_cast<const FixedLengthColumnBase<T>&>(rhs).immutable_data();
DCHECK_LT(left, lhs_datas.size());
DCHECK_LT(right, rhs_datas.size());
T x = lhs_datas[left];
T y = rhs_datas[right];
return SorterComparator<T>::compare(x, y);
}
template <typename T>
uint32_t FixedLengthColumnBase<T>::serialize(size_t idx, uint8_t* pos) const {
memcpy(pos, &_data[idx], sizeof(T));
const auto datas = this->immutable_data();
memcpy(pos, &datas[idx], sizeof(T));
return sizeof(T);
}
@ -156,7 +209,7 @@ template <typename T>
void FixedLengthColumnBase<T>::serialize_batch(uint8_t* __restrict__ dst, Buffer<uint32_t>& slice_sizes,
size_t chunk_size, uint32_t max_one_row_size) const {
uint32_t* sizes = slice_sizes.data();
const T* __restrict__ src = _data.data();
const T* __restrict__ src = this->immutable_data().data();
for (size_t i = 0; i < chunk_size; ++i) {
memcpy(dst + i * max_one_row_size + sizes[i], src + i, sizeof(T));
@ -172,7 +225,7 @@ void FixedLengthColumnBase<T>::serialize_batch_with_null_masks(uint8_t* __restri
size_t chunk_size, uint32_t max_one_row_size,
const uint8_t* null_masks, bool has_null) const {
uint32_t* sizes = slice_sizes.data();
const T* __restrict__ src = _data.data();
const T* __restrict__ src = this->immutable_data().data();
if (!has_null) {
for (size_t i = 0; i < chunk_size; ++i) {
@ -201,7 +254,7 @@ template <typename T>
size_t FixedLengthColumnBase<T>::serialize_batch_at_interval(uint8_t* dst, size_t byte_offset, size_t byte_interval,
size_t start, size_t count) const {
const size_t value_size = sizeof(T);
const auto& key_data = get_data();
const auto key_data = this->immutable_data();
uint8_t* buf = dst + byte_offset;
for (size_t i = start; i < start + count; ++i) {
strings::memcpy_inlined(buf, &key_data[i], value_size);
@ -214,98 +267,109 @@ template <typename T>
const uint8_t* FixedLengthColumnBase<T>::deserialize_and_append(const uint8_t* pos) {
T value{};
memcpy(&value, pos, sizeof(T));
_data.emplace_back(value);
this->get_data().emplace_back(value);
return pos + sizeof(T);
}
template <typename T>
void FixedLengthColumnBase<T>::deserialize_and_append_batch(Buffer<Slice>& srcs, size_t chunk_size) {
raw::make_room(&_data, chunk_size);
auto& datas = this->get_data();
raw::make_room(&datas, chunk_size);
for (size_t i = 0; i < chunk_size; ++i) {
memcpy(&_data[i], srcs[i].data, sizeof(T));
memcpy(&datas[i], srcs[i].data, sizeof(T));
srcs[i].data = srcs[i].data + sizeof(T);
}
}
template <typename T>
void FixedLengthColumnBase<T>::fnv_hash(uint32_t* hash, uint32_t from, uint32_t to) const {
const auto datas = this->immutable_data();
for (uint32_t i = from; i < to; ++i) {
hash[i] = HashUtil::fnv_hash(&_data[i], sizeof(ValueType), hash[i]);
hash[i] = HashUtil::fnv_hash(&datas[i], sizeof(ValueType), hash[i]);
}
}
template <typename T>
void FixedLengthColumnBase<T>::fnv_hash_with_selection(uint32_t* hash, uint8_t* selection, uint16_t from,
uint16_t to) const {
const auto datas = this->immutable_data();
for (uint16_t i = from; i < to; i++) {
if (selection[i]) {
hash[i] = HashUtil::fnv_hash(&_data[i], sizeof(ValueType), hash[i]);
hash[i] = HashUtil::fnv_hash(&datas[i], sizeof(ValueType), hash[i]);
}
}
}
template <typename T>
void FixedLengthColumnBase<T>::fnv_hash_selective(uint32_t* hash, uint16_t* sel, uint16_t sel_size) const {
const auto datas = this->immutable_data();
for (uint16_t i = 0; i < sel_size; i++) {
hash[sel[i]] = HashUtil::fnv_hash(&_data[sel[i]], sizeof(ValueType), hash[sel[i]]);
hash[sel[i]] = HashUtil::fnv_hash(&datas[sel[i]], sizeof(ValueType), hash[sel[i]]);
}
}
// Must same with RawValue::zlib_crc32
template <typename T>
void FixedLengthColumnBase<T>::crc32_hash(uint32_t* hash, uint32_t from, uint32_t to) const {
const auto datas = this->immutable_data();
for (uint32_t i = from; i < to; ++i) {
if constexpr (IsDate<T> || IsTimestamp<T>) {
std::string str = _data[i].to_string();
std::string str = datas[i].to_string();
hash[i] = HashUtil::zlib_crc_hash(str.data(), static_cast<int32_t>(str.size()), hash[i]);
} else if constexpr (IsDecimal<T>) {
int64_t int_val = _data[i].int_value();
int32_t frac_val = _data[i].frac_value();
int64_t int_val = datas[i].int_value();
int32_t frac_val = datas[i].frac_value();
uint32_t seed = HashUtil::zlib_crc_hash(&int_val, sizeof(int_val), hash[i]);
hash[i] = HashUtil::zlib_crc_hash(&frac_val, sizeof(frac_val), seed);
} else {
hash[i] = HashUtil::zlib_crc_hash(&_data[i], sizeof(ValueType), hash[i]);
hash[i] = HashUtil::zlib_crc_hash(&datas[i], sizeof(ValueType), hash[i]);
}
}
}
template <typename T>
void FixedLengthColumnBase<T>::crc32_hash_with_selection(uint32_t* hash, uint8_t* selection, uint16_t from,
uint16_t to) const {
const auto datas = this->immutable_data();
for (uint16_t i = from; i < to; i++) {
if (!selection[i]) {
continue;
}
if constexpr (IsDate<T> || IsTimestamp<T>) {
std::string str = _data[i].to_string();
std::string str = datas[i].to_string();
hash[i] = HashUtil::zlib_crc_hash(str.data(), static_cast<int32_t>(str.size()), hash[i]);
} else if constexpr (IsDecimal<T>) {
int64_t int_val = _data[i].int_value();
int32_t frac_val = _data[i].frac_value();
int64_t int_val = datas[i].int_value();
int32_t frac_val = datas[i].frac_value();
uint32_t seed = HashUtil::zlib_crc_hash(&int_val, sizeof(int_val), hash[i]);
hash[i] = HashUtil::zlib_crc_hash(&frac_val, sizeof(frac_val), seed);
} else {
hash[i] = HashUtil::zlib_crc_hash(&_data[i], sizeof(ValueType), hash[i]);
hash[i] = HashUtil::zlib_crc_hash(&datas[i], sizeof(ValueType), hash[i]);
}
}
}
template <typename T>
void FixedLengthColumnBase<T>::crc32_hash_selective(uint32_t* hash, uint16_t* sel, uint16_t sel_size) const {
const auto datas = this->immutable_data();
for (uint16_t i = 0; i < sel_size; i++) {
if constexpr (IsDate<T> || IsTimestamp<T>) {
std::string str = _data[sel[i]].to_string();
std::string str = datas[sel[i]].to_string();
hash[sel[i]] = HashUtil::zlib_crc_hash(str.data(), static_cast<int32_t>(str.size()), hash[sel[i]]);
} else if constexpr (IsDecimal<T>) {
int64_t int_val = _data[sel[i]].int_value();
int32_t frac_val = _data[sel[i]].frac_value();
int64_t int_val = datas[sel[i]].int_value();
int32_t frac_val = datas[sel[i]].frac_value();
uint32_t seed = HashUtil::zlib_crc_hash(&int_val, sizeof(int_val), hash[sel[i]]);
hash[sel[i]] = HashUtil::zlib_crc_hash(&frac_val, sizeof(frac_val), seed);
} else {
hash[sel[i]] = HashUtil::zlib_crc_hash(&_data[sel[i]], sizeof(ValueType), hash[sel[i]]);
hash[sel[i]] = HashUtil::zlib_crc_hash(&datas[sel[i]], sizeof(ValueType), hash[sel[i]]);
}
}
}
template <typename T>
void FixedLengthColumnBase<T>::murmur_hash3_x86_32(uint32_t* hash, uint32_t from, uint32_t to) const {
const auto datas = this->immutable_data();
for (uint32_t i = from; i < to; ++i) {
uint32_t hash_value = 0;
if constexpr (IsDate<T>) {
@ -313,15 +377,15 @@ void FixedLengthColumnBase<T>::murmur_hash3_x86_32(uint32_t* hash, uint32_t from
// TODO, This is not a good place to do a project, this is just for test.
// If we need to make it more general, we should do this project in `IcebergMurmurHashProject`
// but consider that use date type column as bucket transform is rare, we can do it later.
int64_t long_value = _data[i].julian() - date::UNIX_EPOCH_JULIAN;
int64_t long_value = datas[i].julian() - date::UNIX_EPOCH_JULIAN;
hash_value = HashUtil::murmur_hash3_32(&long_value, sizeof(int64_t), 0);
} else if constexpr (std::is_same<T, int32_t>::value) {
// Integer and long hash results must be identical for all integer values.
// This ensures that schema evolution does not change bucket partition values if integer types are promoted.
int64_t long_value = _data[i];
int64_t long_value = datas[i];
hash_value = HashUtil::murmur_hash3_32(&long_value, sizeof(int64_t), 0);
} else if constexpr (std::is_same<T, int64_t>::value) {
hash_value = HashUtil::murmur_hash3_32(&_data[i], sizeof(ValueType), 0);
hash_value = HashUtil::murmur_hash3_32(&datas[i], sizeof(ValueType), 0);
} else {
// for decimal/timestamp type, the storage is very different from iceberg,
// and consider they are merely used, these types are forbidden by fe
@ -334,22 +398,24 @@ void FixedLengthColumnBase<T>::murmur_hash3_x86_32(uint32_t* hash, uint32_t from
template <typename T>
int64_t FixedLengthColumnBase<T>::xor_checksum(uint32_t from, uint32_t to) const {
const auto datas = this->immutable_data();
int64_t xor_checksum = 0;
if constexpr (IsDate<T>) {
for (size_t i = from; i < to; ++i) {
xor_checksum ^= _data[i].to_date_literal();
xor_checksum ^= datas[i].to_date_literal();
}
} else if constexpr (IsTimestamp<T>) {
for (size_t i = from; i < to; ++i) {
xor_checksum ^= _data[i].to_timestamp_literal();
xor_checksum ^= datas[i].to_timestamp_literal();
}
} else if constexpr (IsDecimal<T>) {
for (size_t i = from; i < to; ++i) {
xor_checksum ^= _data[i].int_value();
xor_checksum ^= _data[i].frac_value();
xor_checksum ^= datas[i].int_value();
xor_checksum ^= datas[i].frac_value();
}
} else if constexpr (is_signed_integer<T>) {
const T* src = reinterpret_cast<const T*>(_data.data());
const T* src = reinterpret_cast<const T*>(datas.data());
for (size_t i = from; i < to; ++i) {
if constexpr (std::is_same_v<T, int128_t>) {
xor_checksum ^= static_cast<int64_t>(src[i] >> 64);
@ -370,43 +436,48 @@ int64_t FixedLengthColumnBase<T>::xor_checksum(uint32_t from, uint32_t to) const
template <typename T>
void FixedLengthColumnBase<T>::put_mysql_row_buffer(MysqlRowBuffer* buf, size_t idx, bool is_binary_protocol) const {
const auto datas = this->immutable_data();
if constexpr (IsDecimal<T>) {
buf->push_decimal(_data[idx].to_string());
buf->push_decimal(datas[idx].to_string());
} else if constexpr (IsDate<T>) {
buf->push_date(_data[idx], is_binary_protocol);
buf->push_date(datas[idx], is_binary_protocol);
} else if constexpr (IsTimestamp<T>) {
buf->push_timestamp(_data[idx], is_binary_protocol);
buf->push_timestamp(datas[idx], is_binary_protocol);
} else if constexpr (std::is_arithmetic_v<T>) {
buf->push_number(_data[idx], is_binary_protocol);
buf->push_number(datas[idx], is_binary_protocol);
} else {
std::string s = _data[idx].to_string();
std::string s = datas[idx].to_string();
buf->push_string(s.data(), s.size());
}
}
template <typename T>
void FixedLengthColumnBase<T>::remove_first_n_values(size_t count) {
size_t remain_size = _data.size() - count;
// TODO: avoid memcpy here
auto& datas = this->get_data();
size_t remain_size = datas.size() - count;
memmove(_data.data(), _data.data() + count, remain_size * sizeof(T));
_data.resize(remain_size);
}
template <typename T>
std::string FixedLengthColumnBase<T>::debug_item(size_t idx) const {
const auto datas = this->immutable_data();
std::stringstream ss;
if constexpr (sizeof(T) == 1) {
// for bool, int8_t
ss << (int)_data[idx];
ss << (int)datas[idx];
} else {
ss << _data[idx];
ss << datas[idx];
}
return ss.str();
}
template <>
std::string FixedLengthColumnBase<int128_t>::debug_item(size_t idx) const {
const auto datas = this->immutable_data();
std::stringstream ss;
starrocks::operator<<(ss, _data[idx]);
starrocks::operator<<(ss, datas[idx]);
return ss.str();
}
@ -425,6 +496,15 @@ std::string FixedLengthColumnBase<T>::debug_string() const {
return ss.str();
}
template <typename T>
Status FixedLengthColumnBase<T>::capacity_limit_reached() const {
if (_data.size() > Column::MAX_CAPACITY_LIMIT) {
return Status::CapacityLimitExceed(strings::Substitute("row count of fixed length column exceend the limit: $0",
std::to_string(Column::MAX_CAPACITY_LIMIT)));
}
return Status::OK();
}
template <typename T>
std::string FixedLengthColumnBase<T>::get_name() const {
if constexpr (IsDecimal<T>) {

View File

@ -14,19 +14,15 @@
#pragma once
#include <memory>
#include <span>
#include <utility>
#include "column/column.h"
#include "column/container_resource.h"
#include "column/datum.h"
#include "column/vectorized_fwd.h"
#include "common/statusor.h"
#include "gutil/strings/substitute.h"
#include "runtime/decimalv2_value.h"
#include "types/date_value.hpp"
#include "types/timestamp_value.h"
#include "util/raw_container.h"
#include "util/value_generator.h"
namespace starrocks {
@ -55,6 +51,7 @@ class FixedLengthColumnBase : public Column {
public:
using ValueType = T;
using Container = Buffer<ValueType>;
using ImmContainer = std::span<const ValueType>;
FixedLengthColumnBase() = default;
@ -62,14 +59,16 @@ public:
FixedLengthColumnBase(const size_t n, const ValueType x) : _data(n, x) {}
FixedLengthColumnBase(const FixedLengthColumnBase& src) : _data(src._data.begin(), src._data.end()) {}
FixedLengthColumnBase(const FixedLengthColumnBase& src)
: _resource(src._resource), _data(src.immutable_data().begin(), src.immutable_data().end()) {}
// Only used as a underlying type for other column type(i.e. DecimalV3Column), C++
// is weak to implement delegation for composite type like golang, so we have to use
// inheritance to wrap an underlying type. When constructing a wrapper object, we must
// construct the wrapped object first, move constructor is used to prevent the unnecessary
// time-consuming copy operation.
FixedLengthColumnBase(FixedLengthColumnBase&& src) noexcept : _data(std::move(src._data)) {}
FixedLengthColumnBase(FixedLengthColumnBase&& src) noexcept
: _resource(std::move(src._resource)), _data(std::move(src._data)) {}
bool is_numeric() const override { return std::is_arithmetic_v<ValueType>; }
@ -79,17 +78,20 @@ public:
bool is_timestamp() const override { return IsTimestamp<ValueType>; }
const uint8_t* raw_data() const override { return reinterpret_cast<const uint8_t*>(_data.data()); }
const uint8_t* raw_data() const override { return reinterpret_cast<const uint8_t*>(immutable_data().data()); }
uint8_t* mutable_raw_data() override { return reinterpret_cast<uint8_t*>(_data.data()); }
uint8_t* mutable_raw_data() override {
get_data();
return reinterpret_cast<uint8_t*>(_data.data());
}
size_t type_size() const override { return sizeof(T); }
size_t size() const override { return _data.size(); }
size_t size() const override { return immutable_data().size(); }
size_t capacity() const override { return _data.capacity(); }
size_t byte_size() const override { return _data.size() * sizeof(ValueType); }
size_t byte_size() const override { return this->size() * sizeof(ValueType); }
size_t byte_size(size_t idx __attribute__((unused))) const override { return sizeof(ValueType); }
@ -100,19 +102,39 @@ public:
void reserve(size_t n) override { _data.reserve(n); }
void resize(size_t n) override { _data.resize(n); }
void resize(size_t n) override { get_data().resize(n); }
void resize_uninitialized(size_t n) override { raw::stl_vector_resize_uninitialized(&_data, n); }
void resize_uninitialized(size_t n) override {
auto& data = get_data();
raw::stl_vector_resize_uninitialized(&data, n);
}
void assign(size_t n, size_t idx) override { _data.assign(n, _data[idx]); }
void assign(size_t n, size_t idx) override {
auto& datas = get_data();
datas.assign(n, _data[idx]);
}
void remove_first_n_values(size_t count) override;
void append(const T value) { _data.emplace_back(value); }
void append(const T value) {
auto& datas = get_data();
datas.emplace_back(value);
}
void append(const Buffer<T>& values) { _data.insert(_data.end(), values.begin(), values.end()); }
void append(const Buffer<T>& values) {
auto& datas = get_data();
datas.insert(datas.end(), values.begin(), values.end());
}
void append_datum(const Datum& datum) override { _data.emplace_back(datum.get<ValueType>()); }
void append(const ImmBuffer<T> values) {
auto& datas = get_data();
datas.insert(datas.end(), values.begin(), values.end());
}
void append_datum(const Datum& datum) override {
auto& datas = get_data();
datas.emplace_back(datum.get<ValueType>());
}
void append(const Column& src, size_t offset, size_t count) override;
@ -123,11 +145,12 @@ public:
[[nodiscard]] bool append_nulls(size_t count __attribute__((unused))) override { return false; }
[[nodiscard]] bool contain_value(size_t start, size_t end, T value) const {
const auto datas = this->immutable_data();
DCHECK_LE(start, end);
DCHECK_LE(start, _data.size());
DCHECK_LE(end, _data.size());
DCHECK_LE(start, datas.size());
DCHECK_LE(end, datas.size());
for (size_t i = start; i < end; i++) {
if (_data[i] == value) {
if (datas[i] == value) {
return true;
}
}
@ -137,22 +160,24 @@ public:
size_t append_numbers(const void* buff, size_t length) override {
DCHECK(length % sizeof(ValueType) == 0);
const size_t count = length / sizeof(ValueType);
size_t dst_offset = _data.size();
raw::stl_vector_resize_uninitialized(&_data, _data.size() + count);
T* dst = _data.data() + dst_offset;
auto& datas = this->get_data();
size_t dst_offset = datas.size();
raw::stl_vector_resize_uninitialized(&datas, datas.size() + count);
T* dst = datas.data() + dst_offset;
memcpy(dst, buff, length);
return count;
}
size_t append_numbers(const ContainerResource& res) override;
void append_value_multiple_times(const void* value, size_t count) override {
_data.insert(_data.end(), count, *reinterpret_cast<const T*>(value));
auto& datas = get_data();
datas.insert(datas.end(), count, *reinterpret_cast<const T*>(value));
}
void append_default() override { _data.emplace_back(DefaultValueGenerator<ValueType>::next_value()); }
void append_default() override;
void append_default(size_t count) override {
_data.resize(_data.size() + count, DefaultValueGenerator<ValueType>::next_value());
}
void append_default(size_t count) override;
StatusOr<ColumnPtr> replicate(const Buffer<uint32_t>& offsets) override;
@ -212,11 +237,27 @@ public:
std::string get_name() const override;
Container& get_data() { return _data; }
Container& get_data() {
// Note: not thread safe !
if (!_resource.empty()) {
auto span = _resource.span<T>();
_data.assign(span.begin(), span.end());
_resource.reset();
}
return _data;
}
const Container& get_data() const { return _data; }
const ImmContainer immutable_data() const {
if (!_resource.empty()) {
return _resource.span<T>();
}
return _data;
}
Datum get(size_t n) const override { return Datum(_data[n]); }
Datum get(size_t n) const override {
const auto datas = immutable_data();
return Datum(datas[n]);
}
std::string debug_item(size_t idx) const override;
@ -229,27 +270,23 @@ public:
auto& r = down_cast<FixedLengthColumnBase&>(rhs);
std::swap(this->_delete_state, r._delete_state);
std::swap(this->_data, r._data);
std::swap(this->_resource, r._resource);
}
void reset_column() override {
Column::reset_column();
_resource.reset();
_data.clear();
}
// The `_data` support one size(> 2^32), but some interface such as update_rows() will use index of uint32_t to
// access the item, so we should use 2^32 as the limit
Status capacity_limit_reached() const override {
if (_data.size() > Column::MAX_CAPACITY_LIMIT) {
return Status::CapacityLimitExceed(
strings::Substitute("row count of fixed length column exceend the limit: $0",
std::to_string(Column::MAX_CAPACITY_LIMIT)));
}
return Status::OK();
}
Status capacity_limit_reached() const override;
void check_or_die() const override {}
protected:
ContainerResource _resource;
Container _data;
private:

View File

@ -34,6 +34,7 @@ public:
using ValueType = JsonValue;
using SuperClass = CowFactory<ColumnFactory<ObjectColumn<JsonValue>, JsonColumn>, JsonColumn, Column>;
using BaseClass = JsonColumnBase;
using ImmContainer = ObjectDataProxyContainer;
JsonColumn() = default;
explicit JsonColumn(size_t size) : SuperClass(size) {}

View File

@ -31,8 +31,9 @@
namespace starrocks {
void MapColumn::check_or_die() const {
CHECK_EQ(_offsets->get_data().back(), _keys->size());
CHECK_EQ(_offsets->get_data().back(), _values->size());
const auto offsets = _offsets->immutable_data();
CHECK_EQ(offsets.back(), _keys->size());
CHECK_EQ(offsets.back(), _values->size());
DCHECK(_keys->is_nullable());
DCHECK(_values->is_nullable());
_offsets->check_or_die();
@ -71,17 +72,15 @@ uint8_t* MapColumn::mutable_raw_data() {
size_t MapColumn::byte_size(size_t from, size_t size) const {
DCHECK_LE(from + size, this->size()) << "Range error";
return _keys->byte_size(_offsets->get_data()[from],
_offsets->get_data()[from + size] - _offsets->get_data()[from]) +
_values->byte_size(_offsets->get_data()[from],
_offsets->get_data()[from + size] - _offsets->get_data()[from]) +
_offsets->byte_size(from, size);
const auto offsets = _offsets->immutable_data();
return _keys->byte_size(offsets[from], offsets[from + size] - offsets[from]) +
_values->byte_size(offsets[from], offsets[from + size] - offsets[from]) + _offsets->byte_size(from, size);
}
size_t MapColumn::byte_size(size_t idx) const {
return _keys->byte_size(_offsets->get_data()[idx], _offsets->get_data()[idx + 1]) +
_values->byte_size(_offsets->get_data()[idx], _offsets->get_data()[idx + 1]) +
sizeof(_offsets->get_data()[idx]);
const auto offsets = _offsets->immutable_data();
return _keys->byte_size(offsets[idx], offsets[idx + 1]) + _values->byte_size(offsets[idx], offsets[idx + 1]) +
sizeof(offsets[idx]);
}
void MapColumn::reserve(size_t n) {
@ -118,15 +117,18 @@ void MapColumn::append(const Column& src, size_t offset, size_t count) {
const auto& map_column = down_cast<const MapColumn&>(src);
const UInt32Column& src_offsets = map_column.offsets();
size_t src_offset = src_offsets.get_data()[offset];
size_t src_count = src_offsets.get_data()[offset + count] - src_offset;
const auto src_offsets_data = src_offsets.immutable_data();
size_t src_offset = src_offsets_data[offset];
size_t src_count = src_offsets_data[offset + count] - src_offset;
_keys->append(map_column.keys(), src_offset, src_count);
_values->append(map_column.values(), src_offset, src_count);
auto& offsets_data = _offsets->get_data();
for (size_t i = offset; i < offset + count; i++) {
uint32_t l = src_offsets.get_data()[i + 1] - src_offsets.get_data()[i];
_offsets->append(_offsets->get_data().back() + l);
uint32_t l = src_offsets_data[i + 1] - src_offsets_data[i];
offsets_data.emplace_back(offsets_data.back() + l);
}
}
@ -187,11 +189,13 @@ void MapColumn::update_rows(const Column& src, const uint32_t* indexes) {
const auto& map_column = down_cast<const MapColumn&>(src);
const UInt32Column& src_offsets = map_column.offsets();
const auto src_offsets_data = src_offsets.immutable_data();
size_t replace_num = src.size();
bool need_resize = false;
for (size_t i = 0; i < replace_num; ++i) {
if (_offsets->get_data()[indexes[i] + 1] - _offsets->get_data()[indexes[i]] !=
src_offsets.get_data()[i + 1] - src_offsets.get_data()[i]) {
src_offsets_data[i + 1] - src_offsets_data[i]) {
need_resize = true;
break;
}
@ -200,7 +204,7 @@ void MapColumn::update_rows(const Column& src, const uint32_t* indexes) {
if (!need_resize) {
Buffer<uint32_t> element_idxes;
for (size_t i = 0; i < replace_num; ++i) {
size_t element_count = src_offsets.get_data()[i + 1] - src_offsets.get_data()[i];
size_t element_count = src_offsets_data[i + 1] - src_offsets_data[i];
size_t element_offset = _offsets->get_data()[indexes[i]];
for (size_t j = 0; j < element_count; j++) {
element_idxes.emplace_back(element_offset + j);
@ -242,8 +246,10 @@ void MapColumn::remove_first_n_values(size_t count) {
uint32_t MapColumn::serialize(size_t idx, uint8_t* pos) const {
DCHECK(!_keys->is_map());
uint32_t offset = _offsets->get_data()[idx];
uint32_t map_size = _offsets->get_data()[idx + 1] - offset;
const auto offsets_data = _offsets->immutable_data();
uint32_t offset = offsets_data[idx];
uint32_t map_size = offsets_data[idx + 1] - offset;
strings::memcpy_inlined(pos, &map_size, sizeof(map_size));
size_t ser_size = sizeof(map_size);
@ -298,8 +304,10 @@ uint32_t MapColumn::max_one_element_serialize_size() const {
}
uint32_t MapColumn::serialize_size(size_t idx) const {
uint32_t offset = _offsets->get_data()[idx];
uint32_t map_size = _offsets->get_data()[idx + 1] - offset;
const auto offsets_data = _offsets->immutable_data();
uint32_t offset = offsets_data[idx];
uint32_t map_size = offsets_data[idx + 1] - offset;
uint32_t ser_size = sizeof(map_size);
for (size_t i = 0; i < map_size; ++i) {
@ -417,12 +425,15 @@ int MapColumn::compare_at(size_t left, size_t right, const Column& right_column,
}
int MapColumn::equals(size_t left, const Column& rhs, size_t right, bool safe_eq) const {
const auto& rhs_map = down_cast<const MapColumn&>(rhs);
const auto offsets_data = _offsets->immutable_data();
size_t lhs_offset = offsets_data[left];
size_t lhs_end = offsets_data[left + 1];
size_t lhs_offset = _offsets->get_data()[left];
size_t lhs_end = _offsets->get_data()[left + 1];
size_t rhs_offset = rhs_map._offsets->get_data()[right];
size_t rhs_end = rhs_map._offsets->get_data()[right + 1];
const auto& rhs_map = down_cast<const MapColumn&>(rhs);
const auto rhs_offsets_data = rhs_map.offsets().immutable_data();
size_t rhs_offset = rhs_offsets_data[right];
size_t rhs_end = rhs_offsets_data[right + 1];
// If size is not equal return false
if (lhs_end - lhs_offset != rhs_end - rhs_offset) {
return false;
@ -501,9 +512,10 @@ int MapColumn::equals(size_t left, const Column& rhs, size_t right, bool safe_eq
void MapColumn::fnv_hash_at(uint32_t* hash, uint32_t idx) const {
DCHECK_LT(idx + 1, _offsets->size()) << "idx + 1 should be less than offsets size";
uint32_t offset = _offsets->get_data()[idx];
const auto offsets_data = _offsets->immutable_data();
uint32_t offset = offsets_data[idx];
// Should use size_t not uint32_t for compatible
size_t map_size = _offsets->get_data()[idx + 1] - offset;
size_t map_size = offsets_data[idx + 1] - offset;
*hash = HashUtil::fnv_hash(&map_size, static_cast<uint32_t>(sizeof(map_size)), *hash);
uint32_t base_hash = *hash;
@ -520,9 +532,10 @@ void MapColumn::fnv_hash_at(uint32_t* hash, uint32_t idx) const {
void MapColumn::crc32_hash_at(uint32_t* hash, uint32_t idx) const {
DCHECK_LT(idx + 1, _offsets->size()) << "idx + 1 should be less than offsets size";
uint32_t offset = _offsets->get_data()[idx];
const auto offsets_data = _offsets->immutable_data();
uint32_t offset = offsets_data[idx];
// Should use size_t not uint32_t for compatible
size_t map_size = _offsets->get_data()[idx + 1] - offset;
size_t map_size = offsets_data[idx + 1] - offset;
*hash = HashUtil::zlib_crc_hash(&map_size, static_cast<uint32_t>(sizeof(map_size)), *hash);
uint32_t base_hash = *hash;
@ -556,21 +569,23 @@ void MapColumn::crc32_hash(uint32_t* hash, uint32_t from, uint32_t to) const {
int64_t MapColumn::xor_checksum(uint32_t from, uint32_t to) const {
// The XOR of MapColumn
// XOR the offsets column and elements column
const auto offsets_data = _offsets->immutable_data();
int64_t xor_checksum = 0;
for (size_t idx = from; idx < to; ++idx) {
int64_t array_size = _offsets->get_data()[idx + 1] - _offsets->get_data()[idx];
int64_t array_size = offsets_data[idx + 1] - offsets_data[idx];
xor_checksum ^= array_size;
}
uint32_t element_from = _offsets->get_data()[from];
uint32_t element_to = _offsets->get_data()[to];
uint32_t element_from = offsets_data[from];
uint32_t element_to = offsets_data[to];
xor_checksum ^= _keys->xor_checksum(element_from, element_to);
return (xor_checksum ^ _values->xor_checksum(element_from, element_to));
}
void MapColumn::put_mysql_row_buffer(MysqlRowBuffer* buf, size_t idx, bool is_binary_protocol) const {
DCHECK_LT(idx, size());
const size_t offset = _offsets->get_data()[idx];
const size_t map_size = _offsets->get_data()[idx + 1] - offset;
const auto offsets_data = _offsets->immutable_data();
const size_t offset = offsets_data[idx];
const size_t map_size = offsets_data[idx + 1] - offset;
buf->begin_push_bracket();
auto* keys = _keys.get();
@ -591,8 +606,10 @@ void MapColumn::put_mysql_row_buffer(MysqlRowBuffer* buf, size_t idx, bool is_bi
Datum MapColumn::get(size_t idx) const {
DCHECK_LT(idx + 1, _offsets->size()) << "idx + 1 should be less than offsets size";
size_t offset = _offsets->get_data()[idx];
size_t map_size = _offsets->get_data()[idx + 1] - offset;
const auto offsets = _offsets->immutable_data();
size_t offset = offsets[idx];
size_t map_size = offsets[idx + 1] - offset;
DatumMap res;
for (size_t i = 0; i < map_size; ++i) {
@ -603,12 +620,14 @@ Datum MapColumn::get(size_t idx) const {
size_t MapColumn::get_map_size(size_t idx) const {
DCHECK_LT(idx + 1, _offsets->size());
return _offsets->get_data()[idx + 1] - _offsets->get_data()[idx];
const auto offsets = _offsets->immutable_data();
return offsets[idx + 1] - offsets[idx];
}
std::pair<size_t, size_t> MapColumn::get_map_offset_size(size_t idx) const {
DCHECK_LT(idx + 1, _offsets->size());
return {_offsets->get_data()[idx], _offsets->get_data()[idx + 1] - _offsets->get_data()[idx]};
const auto offsets = _offsets->immutable_data();
return {offsets[idx], offsets[idx + 1] - offsets[idx]};
}
bool MapColumn::set_null(size_t idx) {
@ -617,8 +636,9 @@ bool MapColumn::set_null(size_t idx) {
size_t MapColumn::reference_memory_usage(size_t from, size_t size) const {
DCHECK_LE(from + size, this->size()) << "Range error";
size_t start_offset = _offsets->get_data()[from];
size_t elements_num = _offsets->get_data()[from + size] - start_offset;
const auto offsets = _offsets->immutable_data();
size_t start_offset = offsets[from];
size_t elements_num = offsets[from + size] - start_offset;
return _keys->reference_memory_usage(start_offset, elements_num) +
_values->reference_memory_usage(start_offset, elements_num) + _offsets->reference_memory_usage(from, size);
}
@ -639,8 +659,10 @@ void MapColumn::reset_column() {
std::string MapColumn::debug_item(size_t idx) const {
DCHECK_LT(idx, size());
uint32_t offset = _offsets->get_data()[idx];
uint32_t map_size = _offsets->get_data()[idx + 1] - offset;
auto offsets = _offsets->immutable_data();
uint32_t offset = offsets[idx];
uint32_t map_size = offsets[idx + 1] - offset;
std::stringstream ss;
ss << "{";

View File

@ -38,7 +38,8 @@ size_t NullableColumn::null_count() const {
if (!_has_null) {
return 0;
}
return SIMD::count_nonzero(_null_column->get_data());
const auto null_data = _null_column->immutable_data();
return SIMD::count_nonzero(null_data);
}
size_t NullableColumn::null_count(size_t offset, size_t count) const {
@ -71,6 +72,7 @@ void NullableColumn::append(const Column& src, size_t offset, size_t count) {
} else if (src.is_nullable()) {
const auto& src_column = down_cast<const NullableColumn&>(src);
DCHECK_EQ(src_column._null_column->size(), src_column._data_column->size());
const auto null_data = src_column._null_column->immutable_data();
if (!src_column.has_null()) {
_null_column->resize(_null_column->size() + count);
@ -78,7 +80,7 @@ void NullableColumn::append(const Column& src, size_t offset, size_t count) {
} else {
_null_column->append(*src_column._null_column, offset, count);
_data_column->append(*src_column._data_column, offset, count);
_has_null = _has_null || SIMD::contain_nonzero(src_column._null_column->get_data(), offset, count);
_has_null = _has_null || SIMD::contain_nonzero(null_data, offset, count);
}
} else {
_null_column->resize(_null_column->size() + count);
@ -288,7 +290,8 @@ uint32_t NullableColumn::serialize(size_t idx, uint8_t* pos) const {
return sizeof(bool) + _data_column->serialize(idx, pos + sizeof(bool));
}
bool null = _null_column->get_data()[idx];
const auto null_data = _null_column->immutable_data();
bool null = null_data[idx];
strings::memcpy_inlined(pos, &null, sizeof(bool));
if (null) {
@ -306,9 +309,10 @@ uint32_t NullableColumn::serialize_default(uint8_t* pos) const {
size_t NullableColumn::serialize_batch_at_interval(uint8_t* dst, size_t byte_offset, size_t byte_interval, size_t start,
size_t count) const {
const auto null_data = _null_column->immutable_data();
_null_column->serialize_batch_at_interval(dst, byte_offset, byte_interval, start, count);
for (size_t i = start; i < start + count; i++) {
if (_null_column->get_data()[i] == 0) {
if (null_data[i] == 0) {
_data_column->serialize(i, dst + (i - start) * byte_interval + byte_offset + 1);
} else {
_data_column->serialize_default(dst + (i - start) * byte_interval + byte_offset + 1);
@ -319,8 +323,9 @@ size_t NullableColumn::serialize_batch_at_interval(uint8_t* dst, size_t byte_off
void NullableColumn::serialize_batch(uint8_t* dst, Buffer<uint32_t>& slice_sizes, size_t chunk_size,
uint32_t max_one_row_size) const {
_data_column->serialize_batch_with_null_masks(dst, slice_sizes, chunk_size, max_one_row_size,
_null_column->get_data().data(), _has_null);
const auto null_data = _null_column->immutable_data();
_data_column->serialize_batch_with_null_masks(dst, slice_sizes, chunk_size, max_one_row_size, null_data.data(),
_has_null);
}
const uint8_t* NullableColumn::deserialize_and_append(const uint8_t* pos) {
@ -350,7 +355,7 @@ void NullableColumn::fnv_hash(uint32_t* hash, uint32_t from, uint32_t to) const
return;
}
const auto& null_data = _null_column->get_data();
const auto null_data = _null_column->immutable_data();
uint32_t value = 0x9e3779b9;
while (from < to) {
uint32_t new_from = from + 1;
@ -373,7 +378,7 @@ void NullableColumn::fnv_hash_with_selection(uint32_t* hash, uint8_t* selection,
_data_column->fnv_hash_with_selection(hash, selection, from, to);
return;
}
const auto& null_data = _null_column->get_data();
const auto null_data = _null_column->immutable_data();
uint32_t value = 0x9e3779b9;
while (from < to) {
uint16_t new_from = from + 1;
@ -397,7 +402,7 @@ void NullableColumn::fnv_hash_selective(uint32_t* hash, uint16_t* sel, uint16_t
_data_column->fnv_hash_selective(hash, sel, sel_size);
return;
}
const auto& null_data = _null_column->get_data();
const auto null_data = _null_column->immutable_data();
uint32_t value = 0x9e3779b9;
// @TODO can we optimize this?
for (uint16_t i = 0; i < sel_size; i++) {
@ -416,7 +421,7 @@ void NullableColumn::crc32_hash(uint32_t* hash, uint32_t from, uint32_t to) cons
return;
}
const auto& null_data = _null_column->get_data();
const auto null_data = _null_column->immutable_data();
// NULL is treat as 0 when crc32 hash for data loading
static const int INT_VALUE = 0;
while (from < to) {
@ -439,7 +444,7 @@ void NullableColumn::crc32_hash_with_selection(uint32_t* hash, uint8_t* selectio
_data_column->crc32_hash_with_selection(hash, selection, from, to);
return;
}
const auto& null_data = _null_column->get_data();
const auto null_data = _null_column->immutable_data();
static const int INT_VALUE = 0;
while (from < to) {
uint16_t new_from = from + 1;
@ -462,7 +467,7 @@ void NullableColumn::crc32_hash_selective(uint32_t* hash, uint16_t* sel, uint16_
_data_column->crc32_hash_selective(hash, sel, sel_size);
return;
}
const auto& null_data = _null_column->get_data();
const auto null_data = _null_column->immutable_data();
static const int INT_VALUE = 0;
// @TODO can we optimize this?
for (uint16_t i = 0; i < sel_size; i++) {
@ -480,7 +485,7 @@ void NullableColumn::murmur_hash3_x86_32(uint32_t* hash, uint32_t from, uint32_t
return;
}
const auto& null_data = _null_column->get_data();
const auto null_data = immutable_null_column_data();
while (from < to) {
uint32_t new_from = from + 1;
while (new_from < to && null_data[from] == null_data[new_from]) {
@ -500,7 +505,7 @@ int64_t NullableColumn::xor_checksum(uint32_t from, uint32_t to) const {
}
int64_t xor_checksum = 0;
const uint8_t* src = _null_column->get_data().data();
const uint8_t* src = _null_column->immutable_data().data();
// The XOR of NullableColumn
// XOR all the 8-bit integers one by one
@ -514,7 +519,8 @@ int64_t NullableColumn::xor_checksum(uint32_t from, uint32_t to) const {
}
void NullableColumn::put_mysql_row_buffer(MysqlRowBuffer* buf, size_t idx, bool is_binary_protocol) const {
if (_has_null && _null_column->get_data()[idx]) {
auto null_data = _null_column->immutable_data();
if (_has_null && null_data[idx]) {
buf->push_null(is_binary_protocol);
} else {
buf->update_field_pos();
@ -526,7 +532,8 @@ void NullableColumn::check_or_die() const {
CHECK_EQ(_null_column->size(), _data_column->size());
// when _has_null=true, the column may have no null value, so don't check.
if (!_has_null) {
CHECK(!SIMD::contain_nonzero(_null_column->get_data(), 0));
auto null_data = _null_column->immutable_data();
CHECK(!SIMD::contain_nonzero(null_data, 0));
}
_data_column->check_or_die();
_null_column->check_or_die();

View File

@ -21,6 +21,7 @@
namespace starrocks {
using NullData = FixedLengthColumn<uint8_t>::Container;
using ImmutableNullData = FixedLengthColumn<uint8_t>::ImmContainer;
using NullColumn = FixedLengthColumn<uint8_t>;
using NullColumnPtr = NullColumn::Ptr;
using NullColumns = std::vector<NullColumnPtr>;
@ -250,8 +251,8 @@ public:
const NullColumn& null_column_ref() const { return *_null_column; }
NullData& null_column_data() { return _null_column->get_data(); }
const NullData& null_column_data() const { return _null_column->get_data(); }
const NullData& immutable_null_column_data() const { return _null_column->get_data(); }
const ImmutableNullData null_column_data() const { return _null_column->immutable_data(); }
const ImmutableNullData immutable_null_column_data() const { return _null_column->immutable_data(); }
const Column* immutable_data_column() const { return _data_column.get(); }

View File

@ -37,6 +37,18 @@ public:
using ValueType = T;
using Container = Buffer<ValueType*>;
struct ObjectDataProxyContainer {
ObjectDataProxyContainer(const ObjectColumn& column) : _column(column) {}
T* operator[](size_t index) const { return _column.get_object(index); }
size_t size() const { return _column.size(); }
private:
const ObjectColumn& _column;
};
using ImmContainer = ObjectDataProxyContainer;
ObjectColumn() = default;
explicit ObjectColumn(size_t size) : _pool(size) {}
@ -162,6 +174,8 @@ public:
return _cache;
}
const ObjectDataProxyContainer immutable_data() const { return ObjectDataProxyContainer(*this); }
Datum get(size_t n) const override { return Datum(get_object(n)); }
size_t container_memory_usage() const override { return _pool.capacity() * type_size(); }

View File

@ -101,17 +101,17 @@ const Int8Column* StreamChunkConverter::ops_col(const StreamChunk* stream_chunk_
const StreamRowOp* StreamChunkConverter::ops(const StreamChunk& stream_chunk) {
auto* op_col = ops_col(stream_chunk);
return (StreamRowOp*)(op_col->get_data().data());
return (StreamRowOp*)(op_col->immutable_data().data());
}
const StreamRowOp* StreamChunkConverter::ops(const StreamChunk* stream_chunk) {
auto* op_col = ops_col(stream_chunk);
return (StreamRowOp*)(op_col->get_data().data());
return (StreamRowOp*)(op_col->immutable_data().data());
}
const StreamRowOp* StreamChunkConverter::ops(const StreamChunkPtr& stream_chunk) {
auto* op_col = ops_col(stream_chunk);
return (StreamRowOp*)(op_col->get_data().data());
return (StreamRowOp*)(op_col->immutable_data().data());
}
ChunkPtr StreamChunkConverter::to_chunk(const StreamChunkPtr& stream_chunk) {

View File

@ -108,133 +108,133 @@ template <>
struct RunTimeTypeTraits<TYPE_BOOLEAN> {
using CppType = uint8_t;
using ColumnType = BooleanColumn;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_TINYINT> {
using CppType = int8_t;
using ColumnType = Int8Column;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_UNSIGNED_TINYINT> {
using CppType = uint8_t;
using ColumnType = UInt8Column;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_SMALLINT> {
using CppType = int16_t;
using ColumnType = Int16Column;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_UNSIGNED_SMALLINT> {
using CppType = uint16_t;
using ColumnType = UInt16Column;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_INT> {
using CppType = int32_t;
using ColumnType = Int32Column;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_UNSIGNED_INT> {
using CppType = uint32_t;
using ColumnType = UInt32Column;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_BIGINT> {
using CppType = int64_t;
using ColumnType = Int64Column;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_UNSIGNED_BIGINT> {
using CppType = uint64_t;
using ColumnType = UInt64Column;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_LARGEINT> {
using CppType = int128_t;
using ColumnType = Int128Column;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_FLOAT> {
using CppType = float;
using ColumnType = FloatColumn;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_DOUBLE> {
using CppType = double;
using ColumnType = DoubleColumn;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_DECIMALV2> {
using CppType = DecimalV2Value;
using ColumnType = DecimalColumn;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_DECIMAL32> {
using CppType = int32_t;
using ColumnType = Decimal32Column;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_DECIMAL64> {
using CppType = int64_t;
using ColumnType = Decimal64Column;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_DECIMAL128> {
using CppType = int128_t;
using ColumnType = Decimal128Column;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_DECIMAL256> {
using CppType = int256_t;
using ColumnType = Decimal256Column;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_INT256> {
using CppType = int256_t;
using ColumnType = Int256Column;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_NULL> {
using CppType = uint8_t;
using ColumnType = NullColumn;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
@ -255,21 +255,21 @@ template <>
struct RunTimeTypeTraits<TYPE_DATE> {
using CppType = DateValue;
using ColumnType = DateColumn;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_DATETIME> {
using CppType = TimestampValue;
using ColumnType = TimestampColumn;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_TIME> {
using CppType = double;
using ColumnType = DoubleColumn;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
@ -297,14 +297,14 @@ template <>
struct RunTimeTypeTraits<TYPE_JSON> {
using CppType = JsonValue*;
using ColumnType = JsonColumn;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>
struct RunTimeTypeTraits<TYPE_VARIANT> {
using CppType = VariantValue*;
using ColumnType = VariantColumn;
using ProxyContainerType = ColumnType::Container;
using ProxyContainerType = ColumnType::ImmContainer;
};
template <>

View File

@ -29,6 +29,7 @@ public:
using ValueType = VariantValue;
using SuperClass = CowFactory<ColumnFactory<ObjectColumn<VariantValue>, VariantColumn>, VariantColumn, Column>;
using BaseClass = VariantColumnBase;
using ImmContainer = ObjectDataProxyContainer;
VariantColumn() = default;
explicit VariantColumn(size_t size) : SuperClass(size) {}

View File

@ -15,11 +15,9 @@
#pragma once
#include <memory>
#include <span>
#include <vector>
#include "runtime/memory/column_allocator.h"
#include "types/int256.h"
namespace starrocks {
class DecimalV2Value;
@ -32,6 +30,8 @@ class VariantValue;
class DateValue;
class TimestampValue;
struct int256_t;
typedef __int128 int128_t;
class Chunk;
@ -47,6 +47,9 @@ class ColumnAllocator;
template <typename T>
using Buffer = std::vector<T, ColumnAllocator<T>>;
template <typename T>
using ImmBuffer = std::span<const T>;
class ArrayColumn;
class ArrayViewColumn;
class MapColumn;
@ -89,6 +92,9 @@ using Decimal256Column = DecimalV3Column<int256_t>;
using BinaryColumn = BinaryColumnBase<uint32_t>;
using LargeBinaryColumn = BinaryColumnBase<uint64_t>;
class ColumnVisitor;
class ColumnVisitorMutable;
template <typename T>
constexpr bool is_decimal_column = false;
template <typename T>

View File

@ -309,6 +309,8 @@ CONF_Int32(min_file_descriptor_number, "60000");
// data and index page size, default is 64k
CONF_Int32(data_page_size, "65536");
CONF_mBool(enable_zero_copy_from_page_cache, "true");
// Page cache is the cache for the decompressed or decoded page of data file.
// Currently, BE does not support configure the upper limit of the page cache.
// The memory limit of page cache are uniformly restricted by datacache_mem_size.

View File

@ -15,6 +15,7 @@
#include "deletion_bitmap.h"
#include "column/vectorized_fwd.h"
#include "runtime/memory/column_allocator.h"
#include "util/defer_op.h"
namespace starrocks {

View File

@ -95,12 +95,26 @@ using SliceAggTwoLevelHashMap =
phmap::parallel_flat_hash_map<Slice, AggDataPtr, SliceHashWithSeed<seed>, SliceEqual,
phmap::priv::Allocator<phmap::priv::Pair<const Slice, AggDataPtr>>, PHMAPN>;
template <typename T>
concept HasImmutableData = requires(T t) {
{t.immutable_data()};
};
template <typename T>
auto get_immutable_data(T* obj) {
if constexpr (HasImmutableData<T>) {
return obj->immutable_data();
} else {
return obj->get_proxy_data();
}
}
static_assert(sizeof(AggDataPtr) == sizeof(size_t));
#define AGG_HASH_MAP_PRECOMPUTE_HASH_VALUES(column, prefetch_dist) \
size_t const column_size = column->size(); \
size_t* hash_values = reinterpret_cast<size_t*>(agg_states->data()); \
{ \
const auto& container_data = column->get_data(); \
const auto container_data = get_immutable_data(column); \
for (size_t i = 0; i < column_size; i++) { \
size_t hashval = this->hash_map.hash_function()(container_data[i]); \
hash_values[i] = hashval; \
@ -293,7 +307,7 @@ struct AggHashMapWithOneNumberKeyWithNullable
for (size_t i = 0; i < column_size; i++) {
AGG_HASH_MAP_PREFETCH_HASH_VALUE();
FieldType key = column->get_data()[i];
FieldType key = column->immutable_data()[i];
if constexpr (HTBuildOp::process_limit) {
if (hash_table_size < extra->limits) {
@ -319,8 +333,10 @@ struct AggHashMapWithOneNumberKeyWithNullable
[[maybe_unused]] size_t hash_table_size = this->hash_map.size();
auto* __restrict not_founds = extra->not_founds;
size_t num_rows = column->size();
auto container = column->immutable_data();
for (size_t i = 0; i < num_rows; i++) {
FieldType key = column->get_data()[i];
FieldType key = container[i];
if constexpr (HTBuildOp::process_limit) {
if (hash_table_size < extra->limits) {
_emplace_key(key, (*agg_states)[i], allocate_func, [&] { hash_table_size++; });
@ -344,9 +360,10 @@ struct AggHashMapWithOneNumberKeyWithNullable
[[maybe_unused]] size_t hash_table_size = this->hash_map.size();
auto* __restrict not_founds = extra->not_founds;
const auto* data_column = down_cast<const ColumnType*>(nullable_column->data_column().get());
const auto container = data_column->immutable_data();
const auto& null_data = nullable_column->null_column_data();
for (size_t i = 0; i < chunk_size; i++) {
const auto key = data_column->get_data()[i];
const auto key = container[i];
if (null_data[i]) {
if (UNLIKELY(null_key_data == nullptr)) {
null_key_data = allocate_func(nullptr);

View File

@ -148,7 +148,7 @@ struct AggHashSetOfOneNumberKey : public AggHashSet<HashSet, AggHashSetOfOneNumb
ALWAYS_NOINLINE void build_set_noprefetch(size_t chunk_size, const Columns& key_columns, MemPool* pool,
Filter* not_founds) {
const auto* column = down_cast<const ColumnType*>(key_columns[0].get());
const auto& keys = column->get_data();
const auto keys = column->immutable_data();
for (size_t i = 0; i < chunk_size; ++i) {
if constexpr (compute_and_allocate) {
@ -163,7 +163,7 @@ struct AggHashSetOfOneNumberKey : public AggHashSet<HashSet, AggHashSetOfOneNumb
ALWAYS_NOINLINE void build_set_prefetch(size_t chunk_size, const Columns& key_columns, MemPool* pool,
Filter* not_founds) {
const auto* column = down_cast<const ColumnType*>(key_columns[0].get());
const auto& keys = column->get_data();
const auto keys = column->immutable_data();
AGG_HASH_SET_PRECOMPUTE_HASH_VALS();
for (size_t i = 0; i < chunk_size; ++i) {
@ -236,7 +236,7 @@ struct AggHashSetOfOneNullableNumberKey
const auto* nullable_column = down_cast<const NullableColumn*>(key_columns[0].get());
const auto* data_column = down_cast<const ColumnType*>(nullable_column->data_column().get());
const auto& null_data = nullable_column->null_column_data();
const auto& keys = data_column->get_data();
const auto keys = data_column->immutable_data();
if (nullable_column->has_null()) {
for (size_t i = 0; i < chunk_size; ++i) {
@ -266,7 +266,7 @@ struct AggHashSetOfOneNullableNumberKey
Filter* not_founds) {
const auto* nullable_column = down_cast<const NullableColumn*>(key_columns[0].get());
const auto* data_column = down_cast<const ColumnType*>(nullable_column->data_column().get());
const auto& keys = data_column->get_data();
const auto keys = data_column->immutable_data();
AGG_HASH_SET_PRECOMPUTE_HASH_VALS();
for (size_t i = 0; i < chunk_size; ++i) {

View File

@ -132,7 +132,7 @@ public:
void bit_compress(const Column& column) {
if constexpr (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8 || sizeof(T) == 16) {
using SrcType = typename int_type<sizeof(T)>::type;
const auto& container = column.get_data();
const auto container = column.immutable_data();
const auto& raw_data = container.data();
size_t n = container.size();
auto base = std::any_cast<T>(_base);

View File

@ -38,6 +38,7 @@
#include "runtime/types.h"
#include "types/logical_type.h"
#include "util/pred_guard.h"
#include "util/value_generator.h"
namespace starrocks {

View File

@ -193,7 +193,7 @@ struct SortRuntimeFilterBuilder {
auto data_column = ColumnHelper::get_data_column(column.get());
auto runtime_data_column = down_cast<const RunTimeColumnType<ltype>*>(data_column);
auto data = runtime_data_column->get_data()[rid];
auto data = GetContainer<ltype>::get_data(runtime_data_column)[rid];
if (asc) {
return MinMaxRuntimeFilter<ltype>::template create_with_range<false>(pool, data, is_close_interval,
need_null);

View File

@ -224,7 +224,7 @@ void ChunksSorterHeapSort::_do_filter_data_for_type(detail::ChunkHolder* chunk_h
const auto& order_by_null_column = down_cast<const NullableColumn*>(input_column.get())->null_column();
const auto& order_by_data_column = down_cast<const NullableColumn*>(input_column.get())->data_column();
const auto* null_data = order_by_null_column->get_data().data();
const auto* null_data = order_by_null_column->immutable_data().data();
const auto* order_by_data = ColumnHelper::cast_to_raw<TYPE>(order_by_data_column)->get_data().data();
auto* __restrict__ filter_data = filter->data();

View File

@ -15,6 +15,7 @@
#include "exec/dictionary_cache_writer.h"
#include "exec/tablet_info.h"
#include "runtime/current_thread.h"
#include "serde/protobuf_serde.h"
#include "util/brpc_stub_cache.h"
#include "util/compression/block_compression.h"

View File

@ -18,10 +18,7 @@
#define JOIN_HASH_MAP_H
#include "join_hash_map_helper.h"
#include "join_hash_map_method.hpp"
#include "join_hash_table_descriptor.h"
#include "join_key_constructor.hpp"
#include "join_type_traits.h"
#if defined(__aarch64__)
@ -232,96 +229,96 @@ private:
void _search_ht_remain(RuntimeState* state);
template <bool first_probe, bool is_collision_free_and_unique>
void _search_ht_impl(RuntimeState* state, const Buffer<CppType>& build_data, const Buffer<CppType>& data);
void _search_ht_impl(RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> data);
// for one key inner join
template <bool first_probe, bool is_collision_free_and_unique>
void _probe_from_ht(RuntimeState* state, const Buffer<CppType>& build_data, const Buffer<CppType>& probe_data);
void _probe_from_ht(RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data);
HashTableProbeState::ProbeCoroutine _probe_from_ht(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
HashTableProbeState::ProbeCoroutine _probe_from_ht(RuntimeState* state, const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
template <bool first_probe>
void _probe_coroutine(RuntimeState* state, const Buffer<CppType>& build_data, const Buffer<CppType>& probe_data);
void _probe_coroutine(RuntimeState* state, const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
// for one key left outer join
template <bool first_probe, bool is_collision_free_and_unique>
void _probe_from_ht_for_left_outer_join(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
void _probe_from_ht_for_left_outer_join(RuntimeState* state, const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
HashTableProbeState::ProbeCoroutine _probe_from_ht_for_left_outer_join(RuntimeState* state,
const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
bool _contains_probe_row(RuntimeState* state, const Buffer<CppType>& build_data, const Buffer<CppType>& probe_data,
uint32_t probe_index);
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
bool _contains_probe_row(RuntimeState* state, const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data, uint32_t probe_index);
// for one key left semi join
template <bool first_probe, bool is_collision_free_and_unique>
void _probe_from_ht_for_left_semi_join(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
void _probe_from_ht_for_left_semi_join(RuntimeState* state, const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
HashTableProbeState::ProbeCoroutine _probe_from_ht_for_left_semi_join(RuntimeState* state,
const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
// for one key left anti join
template <bool first_probe, bool is_collision_free_and_unique>
void _probe_from_ht_for_left_anti_join(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
void _probe_from_ht_for_left_anti_join(RuntimeState* state, const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
HashTableProbeState::ProbeCoroutine _probe_from_ht_for_left_anti_join(RuntimeState* state,
const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
// for one key right outer join
template <bool first_probe, bool is_collision_free_and_unique>
void _probe_from_ht_for_right_outer_join(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
void _probe_from_ht_for_right_outer_join(RuntimeState* state, const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
HashTableProbeState::ProbeCoroutine _probe_from_ht_for_right_outer_join(RuntimeState* state,
const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
// for one key right semi join
template <bool first_probe, bool is_collision_free_and_unique>
void _probe_from_ht_for_right_semi_join(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
void _probe_from_ht_for_right_semi_join(RuntimeState* state, const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
HashTableProbeState::ProbeCoroutine _probe_from_ht_for_right_semi_join(RuntimeState* state,
const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
// for one key right anti join
template <bool first_probe, bool is_collision_free_and_unique>
void _probe_from_ht_for_right_anti_join(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
void _probe_from_ht_for_right_anti_join(RuntimeState* state, const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
HashTableProbeState::ProbeCoroutine _probe_from_ht_for_right_anti_join(RuntimeState* state,
const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
// for one key full outer join
template <bool first_probe, bool is_collision_free_and_unique>
void _probe_from_ht_for_full_outer_join(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
void _probe_from_ht_for_full_outer_join(RuntimeState* state, const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
HashTableProbeState::ProbeCoroutine _probe_from_ht_for_full_outer_join(RuntimeState* state,
const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
// for left semi join with other join conjunct
template <bool first_probe, bool is_collision_free_and_unique>
void _probe_from_ht_for_left_semi_join_with_other_conjunct(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
void _probe_from_ht_for_left_semi_join_with_other_conjunct(RuntimeState* state, const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
// for null aware anti join with other join conjunct
template <bool first_probe, bool is_collision_free_and_unique>
void _probe_from_ht_for_null_aware_anti_join_with_other_conjunct(RuntimeState* state,
const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data);
// for one key right outer join with other conjunct
template <bool first_probe, bool is_collision_free_and_unique>
void _probe_from_ht_for_right_outer_right_semi_right_anti_join_with_other_conjunct(
RuntimeState* state, const Buffer<CppType>& build_data, const Buffer<CppType>& probe_data);
RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data);
// for one key full outer join with other join conjunct
template <bool first_probe, bool is_collision_free_and_unique>
void _probe_from_ht_for_left_outer_left_anti_full_outer_join_with_other_conjunct(RuntimeState* state,
const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data);
void _probe_from_ht_for_left_outer_left_anti_full_outer_join_with_other_conjunct(
RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data);
JoinHashTableItems* _table_items = nullptr;
HashTableProbeState* _probe_state = nullptr;

View File

@ -14,6 +14,7 @@
#pragma once
#include "column/column.h"
#include "simd/gather.h"
#include "simd/simd.h"
#include "util/runtime_profile.h"
@ -57,7 +58,7 @@ void JoinHashMap<LT, CT, MT>::probe_prepare(RuntimeState* state) {
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
void JoinHashMap<LT, CT, MT>::build(RuntimeState* state) {
const auto& keys = BuildKeyConstructor().get_key_data(*_table_items);
const auto* is_nulls = BuildKeyConstructor().get_is_nulls(*_table_items);
const auto is_nulls = BuildKeyConstructor().get_is_nulls(*_table_items);
HashMapMethod().construct_hash_table(_table_items, keys, is_nulls);
_table_items->calculate_ht_info(BuildKeyConstructor().get_key_column_bytes(*_table_items));
}
@ -482,8 +483,8 @@ void JoinHashMap<LT, CT, MT>::_search_ht_remain(RuntimeState* state) {
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
template <bool first_probe, bool is_collision_free_and_unique>
void JoinHashMap<LT, CT, MT>::_search_ht_impl(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& data) {
void JoinHashMap<LT, CT, MT>::_search_ht_impl(RuntimeState* state, const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> data) {
if (!_table_items->with_other_conjunct) {
switch (_table_items->join_type) {
case TJoinOp::LEFT_OUTER_JOIN:
@ -668,8 +669,8 @@ void JoinHashMap<LT, CT, MT>::_search_ht_impl(RuntimeState* state, const Buffer<
// NOTE: coroutine only SIMD code of SSE but not AVX
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
template <bool first_probe>
void JoinHashMap<LT, CT, MT>::_probe_coroutine(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data) {
void JoinHashMap<LT, CT, MT>::_probe_coroutine(RuntimeState* state, const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data) {
_probe_state->match_flag = JoinMatchFlag::NORMAL;
_probe_state->match_count = 0;
_probe_state->cur_row_match_count = 0;
@ -697,8 +698,8 @@ void JoinHashMap<LT, CT, MT>::_probe_coroutine(RuntimeState* state, const Buffer
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
template <bool first_probe, bool is_collision_free_and_unique>
void JoinHashMap<LT, CT, MT>::_probe_from_ht(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data) {
void JoinHashMap<LT, CT, MT>::_probe_from_ht(RuntimeState* state, const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data) {
_probe_state->match_flag = JoinMatchFlag::NORMAL;
size_t match_count = 0;
bool one_to_many = false;
@ -778,8 +779,8 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht(RuntimeState* state, const Buffer<C
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht(RuntimeState* state,
const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data) {
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data) {
for (size_t i = _probe_state->cur_probe_index++; i < _probe_state->probe_row_count;
i = _probe_state->cur_probe_index++) {
_probe_state->probe_match_filter[i] = 0;
@ -820,7 +821,7 @@ HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht(Runt
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_outer_join(
RuntimeState* state, const Buffer<CppType>& build_data, const Buffer<CppType>& probe_data) {
RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data) {
for (size_t i = _probe_state->cur_probe_index++; i < _probe_state->probe_row_count;
i = _probe_state->cur_probe_index++) {
int cur_row_match_count = 0;
@ -863,8 +864,9 @@ HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht_for_
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
template <bool first_probe, bool is_collision_free_and_unique>
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_outer_join(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data) {
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_outer_join(RuntimeState* state,
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data) {
_probe_state->match_flag = JoinMatchFlag::NORMAL;
size_t match_count = 0;
bool one_to_many = false;
@ -938,7 +940,7 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_outer_join(RuntimeState* s
}
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_semi_join(
RuntimeState* state, const Buffer<CppType>& build_data, const Buffer<CppType>& probe_data) {
RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data) {
for (size_t i = _probe_state->cur_probe_index++; i < _probe_state->probe_row_count;
i = _probe_state->cur_probe_index++) {
size_t build_index = _probe_state->next[i];
@ -966,8 +968,8 @@ HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht_for_
}
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
bool JoinHashMap<LT, CT, MT>::_contains_probe_row(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data, uint32_t probe_index) {
bool JoinHashMap<LT, CT, MT>::_contains_probe_row(RuntimeState* state, const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data, uint32_t probe_index) {
uint32_t index = _probe_state->next[probe_index];
if (index == 0) {
return false;
@ -985,8 +987,9 @@ bool JoinHashMap<LT, CT, MT>::_contains_probe_row(RuntimeState* state, const Buf
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
template <bool first_probe, bool is_collision_free_and_unique>
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_semi_join(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data) {
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_semi_join(RuntimeState* state,
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data) {
size_t match_count = 0;
const size_t probe_row_count = _probe_state->probe_row_count;
for (size_t i = 0; i < probe_row_count; i++) {
@ -1014,13 +1017,14 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_semi_join(RuntimeState* st
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
template <bool first_probe, bool is_collision_free_and_unique>
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_anti_join(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data) {
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_anti_join(RuntimeState* state,
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data) {
DCHECK_LT(0, _table_items->row_count);
size_t match_count = 0;
const size_t probe_row_count = _probe_state->probe_row_count;
if (_table_items->join_type == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN && _probe_state->null_array != nullptr) {
if (_table_items->join_type == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN && _probe_state->null_array.has_value()) {
// process left anti join from not in
for (size_t i = 0; i < probe_row_count; i++) {
if ((*_probe_state->null_array)[i] == 0 && !_contains_probe_row(state, build_data, probe_data, i)) {
@ -1055,9 +1059,9 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_anti_join(RuntimeState* st
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_anti_join(
RuntimeState* state, const Buffer<CppType>& build_data, const Buffer<CppType>& probe_data) {
RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data) {
DCHECK_LT(0, _table_items->row_count);
if (_table_items->join_type == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN && _probe_state->null_array != nullptr) {
if (_table_items->join_type == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN && _probe_state->null_array.has_value()) {
// process left anti join from not in
for (size_t i = _probe_state->cur_probe_index++; i < _probe_state->probe_row_count;
i = _probe_state->cur_probe_index++) {
@ -1122,8 +1126,8 @@ HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht_for_
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
template <bool first_probe, bool is_collision_free_and_unique>
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_right_outer_join(RuntimeState* state,
const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data) {
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data) {
size_t match_count = 0;
size_t i = _probe_state->cur_probe_index;
@ -1168,7 +1172,7 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_right_outer_join(RuntimeState*
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht_for_right_outer_join(
RuntimeState* state, const Buffer<CppType>& build_data, const Buffer<CppType>& probe_data) {
RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data) {
for (size_t i = _probe_state->cur_probe_index++; i < _probe_state->probe_row_count;
i = _probe_state->cur_probe_index++) {
size_t build_index = _probe_state->next[i];
@ -1200,8 +1204,9 @@ HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht_for_
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
template <bool first_probe, bool is_collision_free_and_unique>
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_right_semi_join(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data) {
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_right_semi_join(RuntimeState* state,
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data) {
size_t match_count = 0;
size_t i = _probe_state->cur_probe_index;
@ -1242,7 +1247,7 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_right_semi_join(RuntimeState* s
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht_for_right_semi_join(
RuntimeState* state, const Buffer<CppType>& build_data, const Buffer<CppType>& probe_data) {
RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data) {
for (size_t i = _probe_state->cur_probe_index++; i < _probe_state->probe_row_count;
i = _probe_state->cur_probe_index++) {
size_t build_index = _probe_state->next[i];
@ -1274,8 +1279,9 @@ HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht_for_
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
template <bool first_probe, bool is_collision_free_and_unique>
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_right_anti_join(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data) {
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_right_anti_join(RuntimeState* state,
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data) {
size_t probe_row_count = _probe_state->probe_row_count;
for (size_t i = 0; i < probe_row_count; i++) {
size_t index = _probe_state->next[i];
@ -1299,7 +1305,7 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_right_anti_join(RuntimeState* s
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht_for_right_anti_join(
RuntimeState* state, const Buffer<CppType>& build_data, const Buffer<CppType>& probe_data) {
RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data) {
for (size_t i = _probe_state->cur_probe_index++; i < _probe_state->probe_row_count;
i = _probe_state->cur_probe_index++) {
size_t build_index = _probe_state->next[i];
@ -1320,8 +1326,9 @@ HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht_for_
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
template <bool first_probe, bool is_collision_free_and_unique>
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_full_outer_join(RuntimeState* state, const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data) {
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_full_outer_join(RuntimeState* state,
const ImmBuffer<CppType> build_data,
const ImmBuffer<CppType> probe_data) {
size_t match_count = 0;
size_t i = _probe_state->cur_probe_index;
@ -1381,7 +1388,7 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_full_outer_join(RuntimeState* s
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht_for_full_outer_join(
RuntimeState* state, const Buffer<CppType>& build_data, const Buffer<CppType>& probe_data) {
RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data) {
for (size_t i = _probe_state->cur_probe_index++; i < _probe_state->probe_row_count;
i = _probe_state->cur_probe_index++) {
size_t build_index = _probe_state->next[i];
@ -1416,9 +1423,8 @@ HashTableProbeState::ProbeCoroutine JoinHashMap<LT, CT, MT>::_probe_from_ht_for_
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
template <bool first_probe, bool is_collision_free_and_unique>
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_semi_join_with_other_conjunct(RuntimeState* state,
const Buffer<CppType>& build_data,
const Buffer<CppType>& probe_data) {
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_semi_join_with_other_conjunct(
RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data) {
size_t match_count = 0;
size_t i = _probe_state->cur_probe_index;
@ -1468,7 +1474,7 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_semi_join_with_other_conju
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
template <bool first_probe, bool is_collision_free_and_unique>
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_null_aware_anti_join_with_other_conjunct(
RuntimeState* state, const Buffer<CppType>& build_data, const Buffer<CppType>& probe_data) {
RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data) {
size_t match_count = 0;
size_t i = _probe_state->cur_probe_index;
@ -1492,7 +1498,7 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_null_aware_anti_join_with_other
size_t probe_row_count = _probe_state->probe_row_count;
for (; i < probe_row_count; i++) {
size_t build_index = _probe_state->next[i];
if (_probe_state->null_array != nullptr && (*_probe_state->null_array)[i] == 1) {
if (_probe_state->null_array.has_value() && (*_probe_state->null_array)[i] == 1) {
// when left table col value is null needs match all rows in right table
for (size_t j = _probe_state->cur_nullaware_build_index; j < _table_items->row_count + 1; j++) {
MATCH_RIGHT_TABLE_ROWS()
@ -1548,7 +1554,7 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_null_aware_anti_join_with_other
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
template <bool first_probe, bool is_collision_free_and_unique>
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_right_outer_right_semi_right_anti_join_with_other_conjunct(
RuntimeState* state, const Buffer<CppType>& build_data, const Buffer<CppType>& probe_data) {
RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data) {
size_t match_count = 0;
size_t i = _probe_state->cur_probe_index;
@ -1585,7 +1591,7 @@ void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_right_outer_right_semi_right_an
template <LogicalType LT, JoinKeyConstructorType CT, JoinHashMapMethodType MT>
template <bool first_probe, bool is_collision_free_and_unique>
void JoinHashMap<LT, CT, MT>::_probe_from_ht_for_left_outer_left_anti_full_outer_join_with_other_conjunct(
RuntimeState* state, const Buffer<CppType>& build_data, const Buffer<CppType>& probe_data) {
RuntimeState* state, const ImmBuffer<CppType> build_data, const ImmBuffer<CppType> probe_data) {
size_t match_count = 0;
size_t i = _probe_state->cur_probe_index;

View File

@ -14,7 +14,6 @@
#pragma once
#include "join_hash_map_helper.h"
#include "join_hash_table_descriptor.h"
namespace starrocks {
@ -59,12 +58,12 @@ public:
static constexpr bool AreKeysInChainIdentical = false;
static void build_prepare(RuntimeState* state, JoinHashTableItems* table_items);
static void construct_hash_table(JoinHashTableItems* table_items, const Buffer<CppType>& keys,
const Buffer<uint8_t>* is_nulls);
static void construct_hash_table(JoinHashTableItems* table_items, const ImmBuffer<CppType>& keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls);
static void lookup_init(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,
const Buffer<CppType>& build_keys, const Buffer<CppType>& probe_keys,
const Buffer<uint8_t>* is_nulls);
const ImmBuffer<CppType>& build_keys, const ImmBuffer<CppType>& probe_keys,
std::optional<ImmBuffer<uint8_t>> is_nulls);
static bool equal(const CppType& x, const CppType& y) { return x == y; }
};
@ -124,12 +123,12 @@ public:
static constexpr bool AreKeysInChainIdentical = true;
static void build_prepare(RuntimeState* state, JoinHashTableItems* table_items);
static void construct_hash_table(JoinHashTableItems* table_items, const Buffer<CppType>& keys,
const Buffer<uint8_t>* is_nulls);
static void construct_hash_table(JoinHashTableItems* table_items, const ImmBuffer<CppType>& keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls);
static void lookup_init(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,
const Buffer<CppType>& build_keys, const Buffer<CppType>& probe_keys,
const Buffer<uint8_t>* is_nulls);
const ImmBuffer<CppType>& build_keys, const ImmBuffer<CppType>& probe_keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls);
static bool equal(const CppType& x, const CppType& y) { return true; }
@ -189,12 +188,12 @@ public:
static constexpr bool AreKeysInChainIdentical = true;
static void build_prepare(RuntimeState* state, JoinHashTableItems* table_items);
static void construct_hash_table(JoinHashTableItems* table_items, const Buffer<CppType>& keys,
const Buffer<uint8_t>* is_nulls);
static void construct_hash_table(JoinHashTableItems* table_items, const ImmBuffer<CppType>& keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls);
static void lookup_init(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,
const Buffer<CppType>& build_keys, const Buffer<CppType>& probe_keys,
const Buffer<uint8_t>* is_nulls);
const ImmBuffer<CppType>& build_keys, const ImmBuffer<CppType>& probe_keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls);
static bool equal(const CppType& x, const CppType& y) { return true; }
};
@ -240,12 +239,12 @@ public:
static constexpr bool AreKeysInChainIdentical = true;
static void build_prepare(RuntimeState* state, JoinHashTableItems* table_items);
static void construct_hash_table(JoinHashTableItems* table_items, const Buffer<CppType>& keys,
const Buffer<uint8_t>* is_nulls);
static void construct_hash_table(JoinHashTableItems* table_items, const ImmBuffer<CppType>& keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls);
static void lookup_init(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,
const Buffer<CppType>& build_keys, const Buffer<CppType>& probe_keys,
const Buffer<uint8_t>* is_nulls);
const ImmBuffer<CppType>& build_keys, const ImmBuffer<CppType>& probe_keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls);
static bool equal(const CppType& x, const CppType& y) { return true; }
};
@ -262,12 +261,12 @@ public:
static constexpr bool AreKeysInChainIdentical = true;
static void build_prepare(RuntimeState* state, JoinHashTableItems* table_items);
static void construct_hash_table(JoinHashTableItems* table_items, const Buffer<CppType>& keys,
const Buffer<uint8_t>* is_nulls);
static void construct_hash_table(JoinHashTableItems* table_items, const ImmBuffer<CppType>& keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls);
static void lookup_init(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,
const Buffer<CppType>& build_keys, const Buffer<CppType>& probe_keys,
const Buffer<uint8_t>* is_nulls);
const ImmBuffer<CppType>& build_keys, const ImmBuffer<CppType>& probe_keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls);
static bool equal(const CppType& x, const CppType& y) { return true; }
};
@ -318,12 +317,12 @@ public:
static constexpr bool AreKeysInChainIdentical = true;
static void build_prepare(RuntimeState* state, JoinHashTableItems* table_items);
static void construct_hash_table(JoinHashTableItems* table_items, const Buffer<CppType>& keys,
const Buffer<uint8_t>* is_nulls);
static void construct_hash_table(JoinHashTableItems* table_items, const ImmBuffer<CppType>& keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls);
static void lookup_init(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,
const Buffer<CppType>& build_keys, const Buffer<CppType>& probe_keys,
const Buffer<uint8_t>* is_nulls);
const ImmBuffer<CppType>& build_keys, const ImmBuffer<CppType>& probe_keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls);
static bool equal(const CppType& x, const CppType& y) { return true; }
};

View File

@ -14,7 +14,8 @@
#pragma once
#include "join_hash_map_method.h"
#include "exec/join/join_hash_map_helper.h"
#include "exec/join/join_hash_map_method.h"
#include "simd/gather.h"
namespace starrocks {
@ -32,11 +33,11 @@ void BucketChainedJoinHashMap<LT>::build_prepare(RuntimeState* state, JoinHashTa
}
template <LogicalType LT>
void BucketChainedJoinHashMap<LT>::construct_hash_table(JoinHashTableItems* table_items, const Buffer<CppType>& keys,
const Buffer<uint8_t>* is_nulls) {
void BucketChainedJoinHashMap<LT>::construct_hash_table(JoinHashTableItems* table_items, const ImmBuffer<CppType>& keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls) {
const auto num_rows = 1 + table_items->row_count;
if (is_nulls == nullptr) {
if (!is_nulls.has_value()) {
auto* __restrict next = table_items->next.data();
for (uint32_t i = 1; i < num_rows; i++) {
// Use `next` stores `bucket_num` temporarily.
@ -84,14 +85,15 @@ void BucketChainedJoinHashMap<LT>::construct_hash_table(JoinHashTableItems* tabl
template <LogicalType LT>
void BucketChainedJoinHashMap<LT>::lookup_init(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,
const Buffer<CppType>& build_keys, const Buffer<CppType>& probe_keys,
const Buffer<uint8_t>* is_nulls) {
const ImmBuffer<CppType>& build_keys,
const ImmBuffer<CppType>& probe_keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls) {
const uint32_t row_count = probe_state->probe_row_count;
const auto* firsts = table_items.first.data();
const auto* buckets = probe_state->buckets.data();
auto* nexts = probe_state->next.data();
if (is_nulls == nullptr) {
if (!is_nulls.has_value()) {
for (uint32_t i = 0; i < row_count; i++) {
probe_state->buckets[i] = JoinHashMapHelper::calc_bucket_num<CppType>(
probe_keys[i], table_items.bucket_size, table_items.log_bucket_size);
@ -130,9 +132,9 @@ void LinearChainedJoinHashMap<LT, NeedBuildChained>::build_prepare(RuntimeState*
}
template <LogicalType LT, bool NeedBuildChained>
void LinearChainedJoinHashMap<LT, NeedBuildChained>::construct_hash_table(JoinHashTableItems* table_items,
const Buffer<CppType>& keys,
const Buffer<uint8_t>* is_nulls) {
void LinearChainedJoinHashMap<LT, NeedBuildChained>::construct_hash_table(
JoinHashTableItems* table_items, const ImmBuffer<CppType>& keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls) {
auto process = [&]<bool IsNullable>() {
const auto num_rows = 1 + table_items->row_count;
const uint32_t bucket_size_mask = table_items->bucket_size - 1;
@ -208,7 +210,7 @@ void LinearChainedJoinHashMap<LT, NeedBuildChained>::construct_hash_table(JoinHa
}
};
if (is_nulls == nullptr) {
if (!is_nulls.has_value()) {
process.template operator()<false>();
} else {
process.template operator()<true>();
@ -218,9 +220,9 @@ void LinearChainedJoinHashMap<LT, NeedBuildChained>::construct_hash_table(JoinHa
template <LogicalType LT, bool NeedBuildChained>
void LinearChainedJoinHashMap<LT, NeedBuildChained>::lookup_init(const JoinHashTableItems& table_items,
HashTableProbeState* probe_state,
const Buffer<CppType>& build_keys,
const Buffer<CppType>& probe_keys,
const Buffer<uint8_t>* is_nulls) {
const ImmBuffer<CppType>& build_keys,
const ImmBuffer<CppType>& probe_keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls) {
auto process = [&]<bool IsNullable>() {
const uint32_t bucket_size_mask = table_items.bucket_size - 1;
const uint32_t row_count = probe_state->probe_row_count;
@ -292,7 +294,7 @@ void LinearChainedJoinHashMap<LT, NeedBuildChained>::lookup_init(const JoinHashT
}
};
if (is_nulls == nullptr) {
if (!is_nulls.has_value()) {
process.template operator()<false>();
} else {
process.template operator()<true>();
@ -314,12 +316,12 @@ void DirectMappingJoinHashMap<LT>::build_prepare(RuntimeState* state, JoinHashTa
}
template <LogicalType LT>
void DirectMappingJoinHashMap<LT>::construct_hash_table(JoinHashTableItems* table_items, const Buffer<CppType>& keys,
const Buffer<uint8_t>* is_nulls) {
void DirectMappingJoinHashMap<LT>::construct_hash_table(JoinHashTableItems* table_items, const ImmBuffer<CppType>& keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls) {
static constexpr CppType MIN_VALUE = RunTimeTypeLimits<LT>::min_value();
const auto num_rows = 1 + table_items->row_count;
if (is_nulls == nullptr) {
if (!is_nulls.has_value()) {
for (uint32_t i = 1; i < num_rows; i++) {
const size_t bucket_num = keys[i] - MIN_VALUE;
table_items->next[i] = table_items->first[bucket_num];
@ -339,14 +341,15 @@ void DirectMappingJoinHashMap<LT>::construct_hash_table(JoinHashTableItems* tabl
template <LogicalType LT>
void DirectMappingJoinHashMap<LT>::lookup_init(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,
const Buffer<CppType>& build_keys, const Buffer<CppType>& probe_keys,
const Buffer<uint8_t>* is_nulls) {
const ImmBuffer<CppType>& build_keys,
const ImmBuffer<CppType>& probe_keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls) {
probe_state->active_coroutines = 0; // the ht data is not large, so disable it always.
static constexpr CppType MIN_VALUE = RunTimeTypeLimits<LT>::min_value();
const size_t probe_row_count = probe_state->probe_row_count;
if (is_nulls == nullptr) {
if (!is_nulls.has_value()) {
for (size_t i = 0; i < probe_row_count; i++) {
probe_state->next[i] = table_items.first[probe_keys[i] - MIN_VALUE];
}
@ -376,11 +379,11 @@ void RangeDirectMappingJoinHashMap<LT>::build_prepare(RuntimeState* state, JoinH
template <LogicalType LT>
void RangeDirectMappingJoinHashMap<LT>::construct_hash_table(JoinHashTableItems* table_items,
const Buffer<CppType>& keys,
const Buffer<uint8_t>* is_nulls) {
const ImmBuffer<CppType>& keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls) {
const uint64_t min_value = table_items->min_value;
const auto num_rows = 1 + table_items->row_count;
if (is_nulls == nullptr) {
if (!is_nulls.has_value()) {
for (uint32_t i = 1; i < num_rows; i++) {
const size_t bucket_num = keys[i] - min_value;
table_items->next[i] = table_items->first[bucket_num];
@ -400,15 +403,16 @@ void RangeDirectMappingJoinHashMap<LT>::construct_hash_table(JoinHashTableItems*
template <LogicalType LT>
void RangeDirectMappingJoinHashMap<LT>::lookup_init(const JoinHashTableItems& table_items,
HashTableProbeState* probe_state, const Buffer<CppType>& build_keys,
const Buffer<CppType>& probe_keys,
const Buffer<uint8_t>* is_nulls) {
HashTableProbeState* probe_state,
const ImmBuffer<CppType>& build_keys,
const ImmBuffer<CppType>& probe_keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls) {
probe_state->active_coroutines = 0; // the ht data is not large, so disable it always.
const int64_t min_value = table_items.min_value;
const int64_t max_value = table_items.max_value;
const size_t num_rows = probe_state->probe_row_count;
if (is_nulls == nullptr) {
if (!is_nulls.has_value()) {
for (size_t i = 0; i < num_rows; i++) {
if ((probe_keys[i] >= min_value) & (probe_keys[i] <= max_value)) {
const uint64_t index = probe_keys[i] - min_value;
@ -443,11 +447,11 @@ void RangeDirectMappingJoinHashSet<LT>::build_prepare(RuntimeState* state, JoinH
template <LogicalType LT>
void RangeDirectMappingJoinHashSet<LT>::construct_hash_table(JoinHashTableItems* table_items,
const Buffer<CppType>& keys,
const Buffer<uint8_t>* is_nulls) {
const ImmBuffer<CppType>& keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls) {
const uint64_t min_value = table_items->min_value;
const auto num_rows = 1 + table_items->row_count;
if (is_nulls == nullptr) {
if (!is_nulls.has_value()) {
for (uint32_t i = 1; i < num_rows; i++) {
const uint64_t bucket = keys[i] - min_value;
const uint32_t group = bucket / 8;
@ -467,15 +471,16 @@ void RangeDirectMappingJoinHashSet<LT>::construct_hash_table(JoinHashTableItems*
template <LogicalType LT>
void RangeDirectMappingJoinHashSet<LT>::lookup_init(const JoinHashTableItems& table_items,
HashTableProbeState* probe_state, const Buffer<CppType>& build_keys,
const Buffer<CppType>& probe_keys,
const Buffer<uint8_t>* is_nulls) {
HashTableProbeState* probe_state,
const ImmBuffer<CppType>& build_keys,
const ImmBuffer<CppType>& probe_keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls) {
probe_state->active_coroutines = 0; // the ht data is not large, so disable it always.
const int64_t min_value = table_items.min_value;
const int64_t max_value = table_items.max_value;
const size_t num_rows = probe_state->probe_row_count;
if (is_nulls == nullptr) {
if (!is_nulls.has_value()) {
for (size_t i = 0; i < num_rows; i++) {
if ((probe_keys[i] >= min_value) & (probe_keys[i] <= max_value)) {
const uint64_t index = probe_keys[i] - min_value;
@ -516,12 +521,12 @@ void DenseRangeDirectMappingJoinHashMap<LT>::build_prepare(RuntimeState* state,
template <LogicalType LT>
void DenseRangeDirectMappingJoinHashMap<LT>::construct_hash_table(JoinHashTableItems* table_items,
const Buffer<CppType>& keys,
const Buffer<uint8_t>* is_nulls) {
const ImmBuffer<CppType>& keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls) {
const uint64_t min_value = table_items->min_value;
const auto num_rows = 1 + table_items->row_count;
const uint8_t* is_nulls_data = is_nulls == nullptr ? nullptr : is_nulls->data();
const uint8_t* is_nulls_data = !is_nulls.has_value() ? nullptr : is_nulls->data();
auto is_null = [&]<bool Nullable>(const uint32_t index) {
if constexpr (Nullable) {
return is_nulls_data[index] != 0;
@ -565,7 +570,7 @@ void DenseRangeDirectMappingJoinHashMap<LT>::construct_hash_table(JoinHashTableI
}
};
if (is_nulls == nullptr) {
if (!is_nulls.has_value()) {
process.template operator()<false>();
} else {
process.template operator()<true>();
@ -575,9 +580,9 @@ void DenseRangeDirectMappingJoinHashMap<LT>::construct_hash_table(JoinHashTableI
template <LogicalType LT>
void DenseRangeDirectMappingJoinHashMap<LT>::lookup_init(const JoinHashTableItems& table_items,
HashTableProbeState* probe_state,
const Buffer<CppType>& build_keys,
const Buffer<CppType>& probe_keys,
const Buffer<uint8_t>* is_nulls) {
const ImmBuffer<CppType>& build_keys,
const ImmBuffer<CppType>& probe_keys,
const std::optional<ImmBuffer<uint8_t>> is_nulls) {
probe_state->active_coroutines = 0; // the ht data is not large, so disable it always.
const int64_t min_value = table_items.min_value;
@ -602,7 +607,7 @@ void DenseRangeDirectMappingJoinHashMap<LT>::lookup_init(const JoinHashTableItem
};
const size_t num_rows = probe_state->probe_row_count;
if (is_nulls == nullptr) {
if (!is_nulls.has_value()) {
for (size_t i = 0; i < num_rows; i++) {
if ((probe_keys[i] >= min_value) & (probe_keys[i] <= max_value)) {
const uint64_t bucket_num = probe_keys[i] - min_value;

View File

@ -135,7 +135,8 @@ struct HashTableProbeState {
Buffer<uint32_t> buckets;
Buffer<uint32_t> next;
Buffer<Slice> probe_slice;
const Buffer<uint8_t>* null_array = nullptr;
std::optional<ImmBuffer<uint8_t>> null_array;
ColumnPtr probe_key_column;
const Columns* key_columns = nullptr;
ColumnPtr build_index_column;

View File

@ -14,6 +14,8 @@
#include "join_key_constructor.hpp"
#include <optional>
#include "serde/column_array_serde.h"
namespace starrocks {
@ -60,10 +62,11 @@ void BuildKeyConstructorForSerialized::build_key(RuntimeState* state, JoinHashTa
} else {
table_items->build_key_nulls.resize(row_count + 1);
auto* dest_is_nulls = table_items->build_key_nulls.data();
std::memcpy(dest_is_nulls, null_columns[0]->get_data().data(), (row_count + 1) * sizeof(NullColumn::ValueType));
std::memcpy(dest_is_nulls, null_columns[0]->immutable_data().data(),
(row_count + 1) * sizeof(NullColumn::ValueType));
for (uint32_t i = 1; i < null_columns.size(); i++) {
for (uint32_t j = 1; j < 1 + row_count; j++) {
dest_is_nulls[j] |= null_columns[i]->get_data()[j];
dest_is_nulls[j] |= null_columns[i]->immutable_data()[j];
}
}
@ -125,7 +128,7 @@ void ProbeKeyConstructorForSerialized::_probe_column(const JoinHashTableItems& t
ptr += probe_state->probe_slice[i].size;
}
probe_state->null_array = nullptr;
probe_state->null_array = std::nullopt;
}
void ProbeKeyConstructorForSerialized::_probe_nullable_column(const JoinHashTableItems& table_items,
@ -135,11 +138,11 @@ void ProbeKeyConstructorForSerialized::_probe_nullable_column(const JoinHashTabl
const uint32_t row_count = probe_state->probe_row_count;
for (uint32_t i = 0; i < row_count; i++) {
probe_state->is_nulls[i] = null_columns[0]->get_data()[i];
probe_state->is_nulls[i] = null_columns[0]->immutable_data()[i];
}
for (uint32_t i = 1; i < null_columns.size(); i++) {
for (uint32_t j = 0; j < row_count; j++) {
probe_state->is_nulls[j] |= null_columns[i]->get_data()[j];
probe_state->is_nulls[j] |= null_columns[i]->immutable_data()[j];
}
}
@ -150,6 +153,6 @@ void ProbeKeyConstructorForSerialized::_probe_nullable_column(const JoinHashTabl
}
}
probe_state->null_array = &probe_state->is_nulls;
probe_state->null_array = probe_state->is_nulls;
}
} // namespace starrocks

View File

@ -20,7 +20,7 @@
#include <coroutine>
#include <cstdint>
#include <set>
#include <optional>
#include "column/chunk.h"
#include "column/column_hash.h"
@ -48,8 +48,8 @@ public:
static size_t get_key_column_bytes(const JoinHashTableItems& table_items) {
return table_items.key_columns[0]->byte_size();
}
static const Buffer<CppType>& get_key_data(const JoinHashTableItems& table_items);
static const Buffer<uint8_t>* get_is_nulls(const JoinHashTableItems& table_items);
static const ImmBuffer<CppType> get_key_data(const JoinHashTableItems& table_items);
static const std::optional<ImmBuffer<uint8_t>> get_is_nulls(const JoinHashTableItems& table_items);
};
template <LogicalType LT>
@ -60,7 +60,7 @@ public:
static void prepare(RuntimeState* state, HashTableProbeState* probe_state) {}
static void build_key(const JoinHashTableItems& table_items, HashTableProbeState* probe_state);
static const Buffer<CppType>& get_key_data(const HashTableProbeState& probe_state);
static const ImmBuffer<CppType> get_key_data(const HashTableProbeState& probe_state);
};
// ------------------------------------------------------------------------------------
@ -79,11 +79,14 @@ public:
static size_t get_key_column_bytes(const JoinHashTableItems& table_items) {
return table_items.build_key_column->byte_size();
}
static const Buffer<CppType>& get_key_data(const JoinHashTableItems& table_items) {
return ColumnHelper::as_raw_column<const ColumnType>(table_items.build_key_column)->get_data();
static const ImmBuffer<CppType> get_key_data(const JoinHashTableItems& table_items) {
return ColumnHelper::as_raw_column<const ColumnType>(table_items.build_key_column)->immutable_data();
}
static const Buffer<uint8_t>* get_is_nulls(const JoinHashTableItems& table_items) {
return table_items.build_key_nulls.empty() ? nullptr : &table_items.build_key_nulls;
static const std::optional<ImmBuffer<uint8_t>> get_is_nulls(const JoinHashTableItems& table_items) {
if (table_items.build_key_nulls.empty()) {
return std::nullopt;
}
return table_items.build_key_nulls;
}
};
@ -100,7 +103,7 @@ public:
static void build_key(const JoinHashTableItems& table_items, HashTableProbeState* probe_state);
static const Buffer<CppType>& get_key_data(const HashTableProbeState& probe_state) {
static const ImmBuffer<CppType> get_key_data(const HashTableProbeState& probe_state) {
return ColumnHelper::as_raw_column<ColumnType>(probe_state.probe_key_column)->get_data();
}
};
@ -117,9 +120,14 @@ public:
static size_t get_key_column_bytes(const JoinHashTableItems& table_items) {
return table_items.build_pool->total_allocated_bytes();
}
static const Buffer<Slice>& get_key_data(const JoinHashTableItems& table_items) { return table_items.build_slice; }
static const Buffer<uint8_t>* get_is_nulls(const JoinHashTableItems& table_items) {
return table_items.build_key_nulls.empty() ? nullptr : &table_items.build_key_nulls;
static const ImmBuffer<Slice> get_key_data(const JoinHashTableItems& table_items) {
return table_items.build_slice;
}
static const std::optional<ImmBuffer<uint8_t>> get_is_nulls(const JoinHashTableItems& table_items) {
if (table_items.build_key_nulls.empty()) {
return std::nullopt;
}
return table_items.build_key_nulls;
}
};
@ -133,7 +141,9 @@ public:
static void build_key(const JoinHashTableItems& table_items, HashTableProbeState* probe_state);
static const Buffer<Slice>& get_key_data(const HashTableProbeState& probe_state) { return probe_state.probe_slice; }
static const ImmBuffer<Slice> get_key_data(const HashTableProbeState& probe_state) {
return probe_state.probe_slice;
}
private:
static void _probe_column(const JoinHashTableItems& table_items, HashTableProbeState* probe_state,

View File

@ -14,6 +14,9 @@
#pragma once
#include <optional>
#include "column/column.h"
#include "join_key_constructor.h"
namespace starrocks {
@ -23,7 +26,7 @@ namespace starrocks {
// ------------------------------------------------------------------------------------
template <LogicalType LT>
auto BuildKeyConstructorForOneKey<LT>::get_key_data(const JoinHashTableItems& table_items) -> const Buffer<CppType>& {
auto BuildKeyConstructorForOneKey<LT>::get_key_data(const JoinHashTableItems& table_items) -> const ImmBuffer<CppType> {
ColumnPtr data_column;
if (table_items.key_columns[0]->is_nullable()) {
auto* null_column = ColumnHelper::as_raw_column<NullableColumn>(table_items.key_columns[0]);
@ -44,12 +47,13 @@ auto BuildKeyConstructorForOneKey<LT>::get_key_data(const JoinHashTableItems& ta
}
template <LogicalType LT>
const Buffer<uint8_t>* BuildKeyConstructorForOneKey<LT>::get_is_nulls(const JoinHashTableItems& table_items) {
const std::optional<ImmBuffer<uint8_t>> BuildKeyConstructorForOneKey<LT>::get_is_nulls(
const JoinHashTableItems& table_items) {
if (table_items.key_columns[0]->is_nullable() && table_items.key_columns[0]->has_null()) {
auto* nullable_column = ColumnHelper::as_raw_column<NullableColumn>(table_items.key_columns[0]);
return &nullable_column->null_column()->get_data();
return nullable_column->null_column()->get_data();
} else {
return nullptr;
return std::nullopt;
}
}
@ -59,14 +63,15 @@ void ProbeKeyConstructorForOneKey<LT>::build_key(const JoinHashTableItems& table
const auto& key_column = (*probe_state->key_columns)[0];
if (key_column->is_nullable() && key_column->has_null()) {
const auto* nullable_column = ColumnHelper::as_raw_column<NullableColumn>((*probe_state->key_columns)[0]);
probe_state->null_array = &nullable_column->null_column()->get_data();
probe_state->null_array = nullable_column->immutable_null_column_data();
} else {
probe_state->null_array = nullptr;
probe_state->null_array = std::nullopt;
}
}
template <LogicalType LT>
auto ProbeKeyConstructorForOneKey<LT>::get_key_data(const HashTableProbeState& probe_state) -> const Buffer<CppType>& {
auto ProbeKeyConstructorForOneKey<LT>::get_key_data(const HashTableProbeState& probe_state)
-> const ImmBuffer<CppType> {
if ((*probe_state.key_columns)[0]->is_nullable()) {
auto* nullable_column = ColumnHelper::as_raw_column<NullableColumn>((*probe_state.key_columns)[0]);
return ColumnHelper::as_raw_column<ColumnType>(nullable_column->data_column())->get_data();
@ -153,7 +158,7 @@ void ProbeKeyConstructorForSerializedFixedSize<LT>::build_key(const JoinHashTabl
row_count);
if (null_columns.empty()) {
probe_state->null_array = nullptr;
probe_state->null_array = std::nullopt;
} else {
for (uint32_t i = 0; i < row_count; i++) {
probe_state->is_nulls[i] = null_columns[0]->get_data()[i];
@ -164,7 +169,7 @@ void ProbeKeyConstructorForSerializedFixedSize<LT>::build_key(const JoinHashTabl
}
}
probe_state->null_array = &probe_state->is_nulls;
probe_state->null_array = probe_state->is_nulls;
}
}

View File

@ -50,7 +50,7 @@ ParquetReaderWrap::ParquetReaderWrap(std::shared_ptr<arrow::io::RandomAccessFile
_read_offset(read_offset),
_read_size(read_size) {
_parquet = std::move(parquet_file);
_properties = parquet::ReaderProperties();
_properties = ::parquet::ReaderProperties();
_filename = (reinterpret_cast<ParquetChunkFile*>(_parquet.get()))->filename();
}
@ -77,7 +77,7 @@ Status ParquetReaderWrap::next_selected_row_group() {
Status ParquetReaderWrap::_init_parquet_reader() {
try {
parquet::ArrowReaderProperties arrow_reader_properties;
::parquet::ArrowReaderProperties arrow_reader_properties;
/*
* timestamp unit to use for INT96-encoded timestamps in parquet.
* SECOND, MICRO, MILLI, NANO
@ -111,9 +111,9 @@ Status ParquetReaderWrap::_init_parquet_reader() {
arrow_reader_properties.set_cache_options(cache_options);
// new file reader for parquet file
auto st = parquet::arrow::FileReader::Make(arrow::default_memory_pool(),
parquet::ParquetFileReader::Open(_parquet, _properties),
arrow_reader_properties, &_reader);
auto st = ::parquet::arrow::FileReader::Make(arrow::default_memory_pool(),
::parquet::ParquetFileReader::Open(_parquet, _properties),
arrow_reader_properties, &_reader);
if (!st.ok()) {
std::ostringstream oss;
oss << "Failed to create parquet file reader. error: " << st.ToString() << ", filename: " << _filename;
@ -153,7 +153,7 @@ Status ParquetReaderWrap::_init_parquet_reader() {
}
return Status::OK();
} catch (parquet::ParquetException& e) {
} catch (::parquet::ParquetException& e) {
std::stringstream str_error;
str_error << "Init parquet reader fail. " << e.what() << ", filename: " << _filename;
LOG(WARNING) << str_error.str();
@ -195,7 +195,7 @@ Status ParquetReaderWrap::init_parquet_reader(const std::vector<SlotDescriptor*>
}
}
return Status::OK();
} catch (parquet::ParquetException& e) {
} catch (::parquet::ParquetException& e) {
std::stringstream str_error;
str_error << "Init parquet reader fail. " << e.what();
LOG(WARNING) << str_error.str() << " filename: " << _filename;

View File

@ -41,7 +41,8 @@ void calc_hash_values_and_bucket_ids(const std::vector<const Column*>& partition
round_ids[j] = (round_hashes[j] & std::numeric_limits<int>::max()) % bucket_properties[i].bucket_num;
}
if (partitions_columns[i]->has_null()) {
const auto& null_data = down_cast<const NullableColumn*>(partitions_columns[i])->null_column()->get_data();
const auto& null_data =
down_cast<const NullableColumn*>(partitions_columns[i])->null_column()->immutable_data();
for (int j = 0; j < num_rows; j++) {
round_ids[j] = null_data[j] ? bucket_properties[i].bucket_num : round_ids[j];
}

View File

@ -297,7 +297,7 @@ protected:
typename HashMap::allocator_type>
visited_keys(chunk->num_rows());
const auto& null_flag_data = nullable_key_column->null_column()->get_data();
const auto null_data = nullable_key_column->immutable_null_column_data();
const auto size = chunk->num_rows();
// partition_idx=0 is reserved by null key.
auto next_partition_idx = hash_map.size() + 1;
@ -305,7 +305,7 @@ protected:
uint32_t i = 0;
for (; !is_passthrough && i < size; i++) {
PartitionChunks* value_ptr = nullptr;
if (null_flag_data[i] == 1) {
if (null_data[i] == 1) {
value_ptr = &null_key_value;
} else {
const auto& key = key_loader(i);
@ -377,7 +377,7 @@ struct PartitionHashMapWithOneNumberKey : public PartitionHashMapBase<false, fal
NewPartitionCallback&& new_partition_cb, PartitionChunkConsumer&& partition_chunk_consumer) {
DCHECK(!key_columns[0]->is_nullable());
const auto* key_column = down_cast<const ColumnType*>(key_columns[0].get());
const auto& key_column_data = key_column->get_data();
const auto key_column_data = key_column->immutable_data();
append_chunk_for_one_key<EnablePassthrough>(
hash_map, chunk, [&](uint32_t offset) { return key_column_data[offset]; },
[](const FieldType& key) { return key; }, obj_pool,
@ -402,8 +402,8 @@ struct PartitionHashMapWithOneNullableNumberKey : public PartitionHashMapBase<tr
NewPartitionCallback&& new_partition_cb, PartitionChunkConsumer&& partition_chunk_consumer) {
DCHECK(key_columns[0]->is_nullable());
const auto* nullable_key_column = ColumnHelper::as_raw_column<const NullableColumn>(key_columns[0].get());
const auto& key_column_data =
down_cast<const ColumnType*>(nullable_key_column->data_column().get())->get_data();
const auto key_column_data =
down_cast<const ColumnType*>(nullable_key_column->data_column().get())->immutable_data();
append_chunk_for_one_nullable_key<EnablePassthrough>(
hash_map, null_key_value, chunk, nullable_key_column,
[&](uint32_t offset) { return key_column_data[offset]; }, [](const FieldType& key) { return key; },

View File

@ -31,6 +31,7 @@
#include "exec/pipeline/scan/scan_operator.h"
#include "exec/pipeline/schedule/common.h"
#include "exec/pipeline/schedule/observer.h"
#include "exec/pipeline/schedule/pipeline_timer.h"
#include "exec/pipeline/source_operator.h"
#include "exec/workgroup/work_group_fwd.h"
#include "exprs/runtime_filter_bank.h"

View File

@ -16,6 +16,7 @@
#include "exec/pipeline/pipeline_driver_executor.h"
#include "exec/workgroup/work_group.h"
#include "runtime/current_thread.h"
#include "util/arrow/row_batch.h"
#include "util/arrow/starrocks_column_to_arrow.h"

View File

@ -25,7 +25,7 @@ void TableFunctionOperator::close(RuntimeState* state) {
bool TableFunctionOperator::has_output() const {
if (!_table_function_result.first.empty() && _table_function_result.second->size() > 1 &&
_next_output_row < _table_function_result.second->get_data().back()) {
_next_output_row < _table_function_result.second->immutable_data().back()) {
return true;
}
if (_input_chunk != nullptr && _table_function_state != nullptr &&
@ -207,14 +207,15 @@ void TableFunctionOperator::_copy_result(Columns& columns, uint32_t max_output_s
uint32_t curr_output_size = columns[0]->size();
const auto& fn_result_cols = _table_function_result.first;
const auto& offsets_col = _table_function_result.second;
while (curr_output_size < max_output_size && _next_output_row < offsets_col->get_data().back()) {
const auto offsets_data = offsets_col->immutable_data();
while (curr_output_size < max_output_size && _next_output_row < offsets_data.back()) {
uint32_t start = _next_output_row;
uint32_t end = offsets_col->get_data()[_next_output_row_offset + 1];
DCHECK_GE(start, offsets_col->get_data()[_next_output_row_offset]);
uint32_t end = offsets_data[_next_output_row_offset + 1];
DCHECK_GE(start, offsets_data[_next_output_row_offset]);
DCHECK_LE(start, end);
uint32_t copy_rows = std::min(end - start, max_output_size - curr_output_size);
VLOG(2) << "_next_output_row=" << _next_output_row << " start=" << start << " end=" << end
<< " copy_rows=" << copy_rows << " input_size=" << offsets_col->get_data().back()
<< " copy_rows=" << copy_rows << " input_size=" << offsets_data.back()
<< " _next_output_row_offset=" << _next_output_row_offset
<< " _input_index_of_first_result=" << _input_index_of_first_result;

View File

@ -21,6 +21,7 @@
#include "exec/limited_pipeline_chunk_buffer.h"
#include "exec/pipeline/operator.h"
#include "exec/pipeline/schedule/observer.h"
#include "exec/pipeline/schedule/timeout_tasks.h"
#include "exec/pipeline/source_operator.h"
namespace starrocks {

View File

@ -30,7 +30,7 @@
namespace starrocks {
using NullMasks = NullColumn::Container;
using NullMasks = NullColumn::ImmContainer;
// compare the value by column
//
@ -126,7 +126,7 @@ public:
} else {
_cmp_vector[0] |= 1;
}
const auto& data_container = column.get_data();
const auto data_container = column.immutable_data();
if (!_null_masks.empty()) {
DCHECK_EQ(_null_masks.size(), num_rows);
for (size_t i = 1; i < num_rows; ++i) {
@ -160,7 +160,7 @@ public:
private:
const ColumnPtr& _first_column;
std::vector<uint8_t>& _cmp_vector;
const NullColumn::Container& _null_masks;
const NullMasks _null_masks;
};
// append the result by selector
@ -413,7 +413,7 @@ Status SortedStreamingAggregator::_compute_group_by(size_t chunk_size) {
// _cmp_vector[i] = group[i - 1].equals(group[i])
// _cmp_vector[i] == 0 means group[i - 1].equals(group[i])
_cmp_vector.assign(chunk_size, 0);
const Buffer<uint8_t> dummy;
const NullMasks dummy;
SCOPED_TIMER(_agg_stat->agg_compute_timer);
for (size_t i = 0; i < _group_by_columns.size(); ++i) {
ColumnSelfComparator cmp(_last_columns[i], _cmp_vector, dummy);

View File

@ -90,7 +90,7 @@ public:
// Two step compare:
// 1. Compare null values, store at temporary result
// 2. Mask notnull values, and compare not-null values
const NullData& null_data = column.immutable_null_column_data();
const auto null_data = column.immutable_null_column_data();
int nan_direction = _sort_order * _null_first;
@ -201,7 +201,7 @@ public:
template <typename T>
Status do_visit(const FixedLengthColumnBase<T>& column) {
T rhs_data = _rhs_value.get<T>();
auto& lhs_data = column.get_data();
const auto lhs_data = column.immutable_data();
if (_sort_order == 1) {
auto cmp = [&](int lhs_row) { return SorterComparator<T>::compare(lhs_data[lhs_row], rhs_data); };
@ -222,7 +222,7 @@ public:
}
Status do_visit(const JsonColumn& column) {
auto& lhs_data = column.get_data();
const auto lhs_data = column.immutable_data();
const JsonValue& rhs_json = *_rhs_value.get_json();
if (_sort_order == 1) {
@ -263,7 +263,7 @@ public:
// 1. Compare the null flag
// 2. Compare the value if both are not null. Since value for null is just default value,
// which are equal, so just compare the value directly
const NullData& null_data = column.immutable_null_column_data();
const auto null_data = column.immutable_null_column_data();
for (size_t i = 1; i < column.size(); i++) {
(*_tie)[i] &= (null_data[i - 1] == null_data[i]);
}
@ -275,12 +275,12 @@ public:
template <typename T>
Status do_visit(const BinaryColumnBase<T>& column) {
auto& data = column.get_proxy_data();
const NullData* null_data = nullptr;
ImmutableNullData null_data;
if (_nullable_column != nullptr) {
null_data = &_nullable_column->get_data();
null_data = _nullable_column->immutable_data();
}
for (size_t i = 1; i < column.size(); i++) {
if ((null_data == nullptr) || ((*null_data)[i - 1] != 1 && (*null_data)[i] != 1)) {
if ((null_data.empty()) || (null_data[i - 1] != 1 && null_data[i] != 1)) {
(*_tie)[i] &= SorterComparator<Slice>::compare(data[i - 1], data[i]) == 0;
}
}
@ -289,13 +289,13 @@ public:
template <typename T>
Status do_visit(const FixedLengthColumnBase<T>& column) {
auto& data = column.get_data();
const NullData* null_data = nullptr;
const auto data = column.immutable_data();
ImmutableNullData null_data;
if (_nullable_column != nullptr) {
null_data = &_nullable_column->get_data();
null_data = _nullable_column->immutable_data();
}
for (size_t i = 1; i < column.size(); i++) {
if ((null_data == nullptr) || ((*null_data)[i - 1] != 1 && (*null_data)[i] != 1)) {
if ((null_data.empty()) || (null_data[i - 1] != 1 && null_data[i] != 1)) {
(*_tie)[i] &= SorterComparator<T>::compare(data[i - 1], data[i]) == 0;
}
}

View File

@ -163,9 +163,9 @@ public:
template <class T>
Status do_visit(const FixedLengthColumn<T>& _) {
using ColumnType = const FixedLengthColumn<T>;
using Container = typename ColumnType::Container;
auto& left_data = down_cast<ColumnType*>(_left_col)->get_data();
auto& right_data = down_cast<ColumnType*>(_right_col)->get_data();
using Container = typename ColumnType::ImmContainer;
const auto left_data = down_cast<ColumnType*>(_left_col)->immutable_data();
const auto right_data = down_cast<ColumnType*>(_right_col)->immutable_data();
return merge_ordinary_column<Container, T>(left_data, right_data);
}

View File

@ -118,7 +118,7 @@ public:
return column.data_column_ref().accept(this);
}
const NullData& null_data = column.immutable_null_column_data();
const auto null_data = column.immutable_null_column_data();
auto null_pred = [&](const SmallPermuteItem& item) -> bool {
if (_sort_desc.is_null_first()) {
@ -194,7 +194,7 @@ public:
return SorterComparator<T>::compare(lhs.inline_value, rhs.inline_value);
};
auto inlined = create_inline_permutation<T, IS_RANGES>(_permutation, column.get_data());
auto inlined = create_inline_permutation<T, IS_RANGES>(_permutation, column.immutable_data());
RETURN_IF_ERROR(sort_and_tie_helper(_cancel, &column, _sort_desc.asc_order(), inlined, _tie, cmp,
_range_or_ranges, _build_tie));
restore_inline_permutation(inlined, _permutation);
@ -247,17 +247,17 @@ public:
size_t get_limited() const { return _pruned_limit; }
Status do_visit(const NullableColumn& column) {
std::vector<const NullData*> null_datas;
std::vector<ImmutableNullData> null_datas;
Columns data_columns;
for (auto& col : _vertical_columns) {
auto real = down_cast<const NullableColumn*>(col.get());
null_datas.push_back(&real->immutable_null_column_data());
null_datas.push_back(real->immutable_null_column_data());
data_columns.push_back(real->data_column());
}
if (_sort_desc.is_null_first()) {
auto null_pred = [&](const PermutationItem& item) -> bool {
return (*null_datas[item.chunk_index])[item.index_in_chunk] == 1;
return null_datas[item.chunk_index][item.index_in_chunk] == 1;
};
RETURN_IF_ERROR(sort_and_tie_helper_nullable_vertical(_cancel, data_columns, null_pred, _sort_desc,
@ -265,7 +265,7 @@ public:
&_pruned_limit));
} else {
auto null_pred = [&](const PermutationItem& item) -> bool {
return (*null_datas[item.chunk_index])[item.index_in_chunk] != 1;
return null_datas[item.chunk_index][item.index_in_chunk] != 1;
};
RETURN_IF_ERROR(sort_and_tie_helper_nullable_vertical(_cancel, data_columns, null_pred, _sort_desc,
@ -290,10 +290,10 @@ public:
return lhs.inline_value.compare(rhs.inline_value);
};
std::vector<const Container*> containers;
std::vector<Container> containers;
for (const auto& col : _vertical_columns) {
const auto real = down_cast<const ColumnType*>(col.get());
containers.push_back(&real->get_proxy_data());
containers.push_back(real->get_proxy_data());
}
auto inlined = _create_inlined_permutation<Slice>(containers);
@ -319,17 +319,17 @@ public:
template <typename T>
Status do_visit(const FixedLengthColumnBase<T>& column) {
using ColumnType = FixedLengthColumnBase<T>;
using Container = typename FixedLengthColumnBase<T>::Container;
using Container = typename FixedLengthColumnBase<T>::ImmContainer;
if (_need_inline_value()) {
using ItemType = CompactChunkItem<T>;
auto cmp = [&](const ItemType& lhs, const ItemType& rhs) {
return SorterComparator<T>::compare(lhs.inline_value, rhs.inline_value);
};
std::vector<const Container*> containers;
std::vector<Container> containers;
for (const auto& col : _vertical_columns) {
const auto real = down_cast<const ColumnType*>(col.get());
containers.emplace_back(&real->get_data());
containers.emplace_back(real->immutable_data());
}
auto inlined = _create_inlined_permutation<T>(containers);
RETURN_IF_ERROR(sort_and_tie_helper(_cancel, &column, _sort_desc.asc_order(), inlined, _tie, cmp, _range,
@ -340,8 +340,8 @@ public:
auto cmp = [&](const ItemType& lhs, const ItemType& rhs) {
auto left_column = down_cast<const ColumnType*>(_vertical_columns[lhs.chunk_index].get());
auto right_column = down_cast<const ColumnType*>(_vertical_columns[rhs.chunk_index].get());
auto left_value = left_column->get_data()[lhs.index_in_chunk];
auto right_value = right_column->get_data()[rhs.index_in_chunk];
auto left_value = left_column->immutable_data()[lhs.index_in_chunk];
auto right_value = right_column->immutable_data()[rhs.index_in_chunk];
return SorterComparator<T>::compare(left_value, right_value);
};
@ -434,7 +434,7 @@ private:
int index_in_chunk = _permutation[i].index_in_chunk;
result[i].chunk_index = chunk_index;
result[i].index_in_chunk = index_in_chunk;
result[i].inline_value = (*containers[chunk_index])[index_in_chunk];
result[i].inline_value = containers[chunk_index][index_in_chunk];
}
return result;
}

View File

@ -101,7 +101,7 @@ public:
template <typename T>
Status do_visit(DecimalV3Column<T>* dst) {
using Container = typename DecimalV3Column<T>::Container;
using Container = typename DecimalV3Column<T>::ImmContainer;
using ColumnType = DecimalV3Column<T>;
auto& data = dst->get_data();
@ -109,7 +109,7 @@ public:
data.resize(output + _perm.size());
for (auto& p : _perm) {
const Container& container = down_cast<const ColumnType*>(_columns[p.chunk_index])->get_data();
const Container& container = down_cast<const ColumnType*>(_columns[p.chunk_index])->immutable_data();
data[output++] = container[p.index_in_chunk];
}
@ -118,7 +118,7 @@ public:
template <typename T>
Status do_visit(FixedLengthColumnBase<T>* dst) {
using Container = typename FixedLengthColumnBase<T>::Container;
using Container = typename FixedLengthColumnBase<T>::ImmContainer;
using ColumnType = FixedLengthColumnBase<T>;
auto& data = dst->get_data();
@ -126,7 +126,7 @@ public:
data.resize(output + _perm.size());
for (auto& p : _perm) {
const Container& container = down_cast<const ColumnType*>(_columns[p.chunk_index])->get_data();
const Container& container = down_cast<const ColumnType*>(_columns[p.chunk_index])->immutable_data();
data[output++] = container[p.index_in_chunk];
}

View File

@ -435,7 +435,7 @@ Status PartitionedSpillerWriter::_choose_partitions_to_flush(bool is_final_flush
// make shuffle public
void PartitionedSpillerWriter::shuffle(std::vector<uint32_t>& dst, const SpillHashColumn* hash_column) {
const auto& hashs = hash_column->get_data();
const auto hashs = hash_column->immutable_data();
dst.resize(hashs.size());
if (_min_level == _max_level) {

View File

@ -233,7 +233,7 @@ Status StreamAggregator::_output_result_changes_with_retract(size_t chunk_size,
// compute agg count to decide whehter to generate retract info.
auto agg_count_column = down_cast<const Int64Column*>(
final_result_chunk->get_column_by_index(_group_by_columns.size() + _count_agg_idx).get());
const auto& agg_count_column_data = agg_count_column->get_data();
const auto agg_count_column_data = agg_count_column->immutable_data();
// 2. seek previous results from result state table.
StateTableResult prev_state_result;

View File

@ -20,6 +20,7 @@
#include "column/vectorized_fwd.h"
#include "common/status.h"
#include "exec/olap_scan_node.h"
#include "runtime/current_thread.h"
#include "service/backend_options.h"
#include "storage/chunk_helper.h"
#include "storage/column_predicate_rewriter.h"

View File

@ -791,7 +791,8 @@ Status OlapTableSink::_fill_auto_increment_id_internal(Chunk* chunk, SlotDescrip
}
ColumnPtr& data_col = NullableColumn::dynamic_pointer_cast(col)->data_column();
Filter filter(NullableColumn::dynamic_pointer_cast(col)->immutable_null_column_data());
const auto null_datas = NullableColumn::dynamic_pointer_cast(col)->immutable_null_column_data();
Filter filter(null_datas.begin(), null_datas.end());
Filter init_filter(chunk->num_rows(), 0);

View File

@ -37,7 +37,7 @@ struct AggDataTypeTraits<lt, FixedLengthLTGuard<lt>> {
static void append_value(ColumnType* column, const ValueType& value) { column->append(value); }
static RefType get_row_ref(const ColumnType& column, size_t row) { return column.get_data()[row]; }
static RefType get_row_ref(const ColumnType& column, size_t row) { return column.immutable_data()[row]; }
static RefType get_ref(const ValueType& value) { return value; }
static void update_max(ValueType& current, const RefType& input) { current = std::max<ValueType>(current, input); }

View File

@ -345,7 +345,8 @@ public:
if (src[0]->is_nullable()) {
auto* src_nullable_column = down_cast<const NullableColumn*>(src[0].get());
auto* src_column = down_cast<const InputColumnType*>(src_nullable_column->data_column().get());
const auto* src_column = down_cast<const InputColumnType*>(src_nullable_column->data_column().get());
const auto src_data = GetContainer<LT>::get_data(src_column);
ApproxTopKState<LT> state;
for (size_t i = 0; i < src_nullable_column->size(); ++i) {
@ -353,7 +354,7 @@ public:
if (src_nullable_column->is_null(i)) {
state.process_null(1);
} else {
state.template process<false>(ctx->mem_pool(), src_column->get_data()[i], 1, false);
state.template process<false>(ctx->mem_pool(), src_data[i], 1, false);
}
serialize_state(state, dst_column);
}
@ -361,9 +362,11 @@ public:
auto* src_column = down_cast<const InputColumnType*>(src[0].get());
ApproxTopKState<LT> state;
for (auto& value : src_column->get_data()) {
const auto imm_data = GetContainer<LT>::get_data(src_column);
size_t size = imm_data.size();
for (size_t i = 0; i < size; ++i) {
state.reset(kv.first, kv.second);
state.template process<false>(ctx->mem_pool(), value, 1, false);
state.template process<false>(ctx->mem_pool(), imm_data[i], 1, false);
serialize_state(state, dst_column);
}
}

View File

@ -52,8 +52,9 @@ struct ArrayAggAggregateState {
});
}
} else {
const auto datas = column.immutable_data();
for (int i = 0; i < count; i++) {
set.emplace(column.get_data()[offset + i]);
set.emplace(datas[offset + i]);
}
}
} else {
@ -275,7 +276,7 @@ public:
auto& input_columns = down_cast<const StructColumn*>(ColumnHelper::get_data_column(column))->fields();
for (auto i = 0; i < input_columns.size(); ++i) {
auto array_column = down_cast<const ArrayColumn*>(ColumnHelper::get_data_column(input_columns[i].get()));
auto& offsets = array_column->offsets().get_data();
const auto offsets = array_column->offsets().immutable_data();
this->data(state).update(array_column->elements(), i, offsets[row_num],
offsets[row_num + 1] - offsets[row_num]);
}

View File

@ -53,7 +53,7 @@ struct ArrayUnionAggAggregateState {
}
} else {
for (int i = 0; i < count; i++) {
set.emplace(column.get_data()[offset + i]);
set.emplace(column.immutable_data()[offset + i]);
}
}
} else {

View File

@ -88,30 +88,30 @@ public:
[[maybe_unused]] const auto* column = down_cast<const InputColumnType*>(columns[0]);
if constexpr (is_inc) {
if constexpr (lt_is_datetime<LT>) {
this->data(state).sum += column->get_data()[row_num].to_unix_second();
this->data(state).sum += column->immutable_data()[row_num].to_unix_second();
} else if constexpr (lt_is_date<LT>) {
this->data(state).sum += column->get_data()[row_num].julian();
this->data(state).sum += column->immutable_data()[row_num].julian();
} else if constexpr (lt_is_decimalv2<LT>) {
this->data(state).sum += column->get_data()[row_num];
this->data(state).sum += column->immutable_data()[row_num];
} else if constexpr (lt_is_arithmetic<LT>) {
this->data(state).sum += column->get_data()[row_num];
this->data(state).sum += column->immutable_data()[row_num];
} else if constexpr (lt_is_decimal<LT>) {
this->data(state).sum += column->get_data()[row_num];
this->data(state).sum += column->immutable_data()[row_num];
} else {
DCHECK(false) << "Invalid LogicalTypes for avg function";
}
this->data(state).count++;
} else {
if constexpr (lt_is_datetime<LT>) {
this->data(state).sum -= column->get_data()[row_num].to_unix_second();
this->data(state).sum -= column->immutable_data()[row_num].to_unix_second();
} else if constexpr (lt_is_date<LT>) {
this->data(state).sum -= column->get_data()[row_num].julian();
this->data(state).sum -= column->immutable_data()[row_num].julian();
} else if constexpr (lt_is_decimalv2<LT>) {
this->data(state).sum -= column->get_data()[row_num];
this->data(state).sum -= column->immutable_data()[row_num];
} else if constexpr (lt_is_arithmetic<LT>) {
this->data(state).sum -= column->get_data()[row_num];
this->data(state).sum -= column->immutable_data()[row_num];
} else if constexpr (lt_is_decimal<LT>) {
this->data(state).sum -= column->get_data()[row_num];
this->data(state).sum -= column->immutable_data()[row_num];
} else {
DCHECK(false) << "Invalid LogicalTypes for avg function";
}
@ -196,15 +196,15 @@ public:
ImmediateType result = {};
for (size_t i = 0; i < chunk_size; ++i) {
if constexpr (lt_is_datetime<LT>) {
result = src_column->get_data()[i].to_unix_second();
result = src_column->immutable_data()[i].to_unix_second();
} else if constexpr (lt_is_date<LT>) {
result = src_column->get_data()[i].julian();
result = src_column->immutable_data()[i].julian();
} else if constexpr (lt_is_decimalv2<LT>) {
result = src_column->get_data()[i];
result = src_column->immutable_data()[i];
} else if constexpr (lt_is_arithmetic<LT>) {
result = src_column->get_data()[i];
result = src_column->immutable_data()[i];
} else if constexpr (lt_is_decimal<LT>) {
result = src_column->get_data()[i];
result = src_column->immutable_data()[i];
} else {
DCHECK(false) << "Invalid LogicalTypes for avg function";
}

View File

@ -34,13 +34,13 @@ public:
void update(FunctionContext* ctx, const Column** columns, AggDataPtr state, size_t row_num) const override {
const auto* col = down_cast<const InputColumnType*>(columns[0]);
auto value = col->get_data()[row_num];
auto value = col->immutable_data()[row_num];
if (value >= 0 && value <= std::numeric_limits<uint64_t>::max()) {
this->data(state).add(value);
}
}
bool check_valid(const Buffer<InputCppType>& values, size_t count) const {
bool check_valid(const ImmBuffer<InputCppType>& values, size_t count) const {
for (size_t i = 0; i < count; i++) {
auto value = values[i];
if (!(value >= 0 && value <= std::numeric_limits<uint64_t>::max())) {
@ -53,7 +53,7 @@ public:
void update_batch_single_state(FunctionContext* ctx, size_t chunk_size, const Column** columns,
AggDataPtr __restrict state) const override {
const auto& col = down_cast<const InputColumnType&>(*columns[0]);
const auto& values = col.get_data();
const auto values = col.immutable_data();
if constexpr (LT == TYPE_INT) {
if (check_valid(values, chunk_size)) {
// All the values is unsigned, can be safely converted to unsigned int.
@ -86,7 +86,7 @@ public:
auto* dest_column = down_cast<BitmapColumn*>(dst->get());
for (size_t i = 0; i < chunk_size; i++) {
BitmapValue bitmap;
auto v = src_column.get_data()[i];
auto v = src_column.immutable_data()[i];
if (v >= 0 && v <= std::numeric_limits<uint64_t>::max()) {
bitmap.add(v);
}

View File

@ -35,7 +35,7 @@ public:
DCHECK((*columns[0]).is_numeric());
if constexpr (std::is_integral_v<T>) {
const auto& column = static_cast<const InputColumnType&>(*columns[0]);
this->data(state).add(column.get_data()[row_num]);
this->data(state).add(column.immutable_data()[row_num]);
}
}
@ -57,7 +57,7 @@ public:
auto* dst_column = down_cast<BitmapColumn*>((*dst).get());
const auto* src_column = static_cast<const InputColumnType*>(src[0].get());
for (size_t i = 0; i < chunk_size; ++i) {
BitmapValue bitmap(src_column->get_data()[i]);
BitmapValue bitmap(src_column->immutable_data()[i]);
dst_column->append(std::move(bitmap));
}
}

View File

@ -71,11 +71,11 @@ public:
}
const auto& data_column = nullable_column.data_column();
const auto& column = down_cast<const InputColumnType&>(*data_column);
bool value = column.get_data()[row_num];
bool value = column.immutable_data()[row_num];
BoolOrElement()(this->data(state), value);
} else {
const auto& column = down_cast<const InputColumnType&>(*columns[0]);
bool value = column.get_data()[row_num];
bool value = column.immutable_data()[row_num];
BoolOrElement()(this->data(state), value);
}
}
@ -93,7 +93,7 @@ public:
for (size_t i = 0; i < chunk_size; ++i) {
if (!nullable_column.is_null(i)) {
const auto& column = down_cast<const InputColumnType&>(*data_column);
bool value = column.get_data()[i];
bool value = column.immutable_data()[i];
if (value) {
this->data(state).result = true;
break;
@ -104,7 +104,7 @@ public:
const auto& column = down_cast<const InputColumnType&>(*columns[0]);
for (size_t i = 0; i < chunk_size; ++i) {
bool value = column.get_data()[i];
bool value = column.immutable_data()[i];
if (value) {
this->data(state).result = true;
break;
@ -127,7 +127,7 @@ public:
for (size_t i = frame_start; i < frame_end; ++i) {
if (!nullable_column.is_null(i)) {
bool value = column.get_data()[i];
bool value = column.immutable_data()[i];
if (value) {
this->data(state).result = true;
break;
@ -138,7 +138,7 @@ public:
const auto& column = down_cast<const InputColumnType&>(*columns[0]);
for (size_t i = frame_start; i < frame_end; ++i) {
bool value = column.get_data()[i];
bool value = column.immutable_data()[i];
if (value) {
this->data(state).result = true;
break;
@ -155,11 +155,11 @@ public:
}
const auto& data_column = nullable_column.data_column();
const auto* input_column = down_cast<const InputColumnType*>(data_column.get());
bool value = input_column->get_data()[row_num];
bool value = input_column->immutable_data()[row_num];
BoolOrElement()(this->data(state), value);
} else {
const auto* input_column = down_cast<const InputColumnType*>(column);
bool value = input_column->get_data()[row_num];
bool value = input_column->immutable_data()[row_num];
BoolOrElement()(this->data(state), value);
}
}

View File

@ -13,6 +13,9 @@
// limitations under the License.
#pragma once
#include <utility>
#include "column/column_helper.h"
#include "exprs/agg/aggregate.h"
#include "exprs/agg/combinator/agg_state_combinator.h"
@ -30,7 +33,7 @@ struct AggStateIfState {};
class AggStateIf final : public AggStateCombinator<AggStateIfState, AggStateIf> {
public:
AggStateIf(AggStateDesc agg_state_desc, const AggregateFunction* function)
: AggStateCombinator(agg_state_desc, function) {
: AggStateCombinator(std::move(agg_state_desc), function) {
DCHECK(_function != nullptr);
}
@ -118,9 +121,10 @@ public:
} else if (nullCount == predicate_column->size()) {
fake_null_column = NullColumn::create(columns[0]->size(), 1);
} else {
const auto& nullable_predicate_null_col_data = nullable_predicate_column->immutable_null_column_data();
const auto& nullable_predicate_data_col_data =
down_cast<const UInt8Column*>(nullable_predicate_column->immutable_data_column())->get_data();
const auto nullable_predicate_null_col_data = nullable_predicate_column->immutable_null_column_data();
const auto nullable_predicate_data_col_data =
down_cast<const UInt8Column*>(nullable_predicate_column->immutable_data_column())
->immutable_data();
// we treat false(0) as null(which is 1)
for (size_t i = 0; i < chunk_size; ++i) {
fake_null_column_raw_data[i] = static_cast<uint8_t>((!nullable_predicate_data_col_data[i]) ||

View File

@ -92,7 +92,7 @@ public:
void merge(FunctionContext* ctx, const Column* column, AggDataPtr __restrict state, size_t row_num) const override {
DCHECK(column->is_numeric());
const auto* input_column = down_cast<const Int64Column*>(column);
this->data(state).count += input_column->get_data()[row_num];
this->data(state).count += input_column->immutable_data()[row_num];
}
void get_values(FunctionContext* ctx, ConstAggDataPtr __restrict state, Column* dst, size_t start,
@ -298,7 +298,7 @@ public:
void merge(FunctionContext* ctx, const Column* column, AggDataPtr __restrict state, size_t row_num) const override {
DCHECK(column->is_numeric());
const auto* input_column = down_cast<const Int64Column*>(column);
this->data(state).count += input_column->get_data()[row_num];
this->data(state).count += input_column->immutable_data()[row_num];
}
void get_values(FunctionContext* ctx, ConstAggDataPtr __restrict state, Column* dst, size_t start,

View File

@ -73,10 +73,10 @@ public:
this->data(state).count += 1;
double oldMeanX = this->data(state).meanX;
InputCppType rowX = column0->get_data()[row_num];
InputCppType rowX = column0->immutable_data()[row_num];
double oldMeanY = this->data(state).meanY;
InputCppType rowY = column1->get_data()[row_num];
InputCppType rowY = column1->immutable_data()[row_num];
double newMeanX = (oldMeanX + (rowX - oldMeanX) / this->data(state).count);
double newMeanY = (oldMeanY + (rowY - oldMeanY) / this->data(state).count);
@ -178,9 +178,11 @@ public:
double c2 = 0;
int64_t count = 1;
const auto src0_data = src_column0->immutable_data();
const auto src1_data = src_column1->immutable_data();
for (size_t i = 0; i < chunk_size; ++i) {
meanX = static_cast<double>(src_column0->get_data()[i]);
meanY = static_cast<double>(src_column1->get_data()[i]);
meanX = static_cast<double>(src0_data[i]);
meanY = static_cast<double>(src1_data[i]);
memcpy(bytes.data() + old_size, &meanX, sizeof(double));
memcpy(bytes.data() + old_size + sizeof(double), &meanY, sizeof(double));
memcpy(bytes.data() + old_size + sizeof(double) * 2, &c2, sizeof(double));

View File

@ -365,7 +365,8 @@ public:
if constexpr (IsSlice<T>) {
this->data(state).update(ctx->mem_pool(), column->get_slice(row_num));
} else {
this->data(state).update(column->get_data()[row_num]);
const auto immutable_data = column->immutable_data();
this->data(state).update(immutable_data[row_num]);
}
}
@ -382,7 +383,7 @@ public:
};
std::vector<CacheEntry> cache(chunk_size);
const auto& container_data = column->get_data();
const auto container_data = GetContainer<LT>::get_data(column);
for (size_t i = 0; i < chunk_size; ++i) {
size_t hash_value = agg_state.set.hash_function()(container_data[i]);
cache[i] = CacheEntry{hash_value};
@ -413,7 +414,7 @@ public:
};
std::vector<CacheEntry> cache(chunk_size);
const auto& container_data = column->get_data();
const auto container_data = GetContainer<LT>::get_data(column);
for (size_t i = 0; i < chunk_size; ++i) {
AggDataPtr state = states[i] + state_offset;
auto& agg_state = this->data(state);
@ -490,7 +491,7 @@ public:
old_size += key.size;
dst_column->get_offset()[i + 1] = new_size;
} else {
T key = src_column->get_data()[i];
T key = src_column->immutable_data()[i];
size_t new_size = old_size + sizeof(T);
bytes.resize(new_size);
@ -576,7 +577,7 @@ public:
if (data_column->is_array()) {
const auto* array_column = down_cast<const ArrayColumn*>(data_column);
const auto* column = array_column->elements_column().get();
const auto& off = array_column->offsets().get_data();
const auto off = array_column->offsets().immutable_data();
const auto* binary_column = down_cast<const BinaryColumn*>(ColumnHelper::get_data_column(column));
for (auto i = off[row_num]; i < off[row_num + 1]; i++) {
if (!column->is_null(i)) {

View File

@ -65,7 +65,7 @@ public:
Slice s = column->get_slice(row_num);
value = HashUtil::murmur_hash64A(s.data, s.size, HashUtil::MURMUR_SEED);
} else {
const auto& v = column->get_data();
const auto v = column->immutable_data();
value = HashUtil::murmur_hash64A(&v[row_num], sizeof(v[row_num]), HashUtil::MURMUR_SEED);
}
update_state(ctx, state, value);
@ -89,7 +89,7 @@ public:
}
} else {
uint64_t value = 0;
const auto& v = column->get_data();
const auto v = column->immutable_data();
for (size_t i = frame_start; i < frame_end; ++i) {
value = HashUtil::murmur_hash64A(&v[i], sizeof(v[i]), HashUtil::MURMUR_SEED);
@ -167,7 +167,7 @@ public:
Slice s = column->get_slice(i);
value = HashUtil::murmur_hash64A(s.data, s.size, HashUtil::MURMUR_SEED);
} else {
auto v = column->get_data()[i];
auto v = column->immutable_data()[i];
value = HashUtil::murmur_hash64A(&v, sizeof(v), HashUtil::MURMUR_SEED);
}
if (value != 0) {

View File

@ -60,7 +60,7 @@ public:
Slice s = column->get_slice(row_num);
value = HashUtil::murmur_hash64A(s.data, s.size, HashUtil::MURMUR_SEED);
} else {
const auto& v = column->get_data();
const auto v = column->immutable_data();
value = HashUtil::murmur_hash64A(&v[row_num], sizeof(v[row_num]), HashUtil::MURMUR_SEED);
}
update_state(ctx, state, value);
@ -83,7 +83,7 @@ public:
}
} else {
uint64_t value = 0;
const auto& v = column->get_data();
const auto v = column->immutable_data();
for (size_t i = frame_start; i < frame_end; ++i) {
value = HashUtil::murmur_hash64A(&v[i], sizeof(v[i]), HashUtil::MURMUR_SEED);
@ -151,7 +151,7 @@ public:
Slice s = input->get_slice(i);
value = HashUtil::murmur_hash64A(s.data, s.size, HashUtil::MURMUR_SEED);
} else {
auto v = input->get_data()[i];
auto v = input->immutable_data()[i];
value = HashUtil::murmur_hash64A(&v, sizeof(v), HashUtil::MURMUR_SEED);
}

View File

@ -65,7 +65,7 @@ public:
void merge(FunctionContext* ctx, const Column* column, AggDataPtr __restrict state, size_t row_num) const override {
DCHECK(column->is_numeric());
const auto* input_column = down_cast<const Int64Column*>(column);
this->data(state).bytes += input_column->get_data()[row_num];
this->data(state).bytes += input_column->immutable_data()[row_num];
}
void get_values(FunctionContext* ctx, ConstAggDataPtr __restrict state, Column* dst, size_t start,
@ -85,7 +85,7 @@ public:
void batch_serialize(FunctionContext* ctx, size_t chunk_size, const Buffer<AggDataPtr>& agg_states,
size_t state_offset, Column* to) const override {
auto* column = down_cast<Int64Column*>(to);
Buffer<int64_t>& result_data = column->get_data();
auto& result_data = column->get_data();
for (size_t i = 0; i < chunk_size; i++) {
result_data.emplace_back(this->data(agg_states[i] + state_offset).bytes);
}

View File

@ -460,7 +460,7 @@ public:
}
for (auto i = 0; i < input_columns.size(); ++i) {
auto array_column = down_cast<const ArrayColumn*>(ColumnHelper::get_data_column(input_columns[i].get()));
auto& offsets = array_column->offsets().get_data();
auto offsets = array_column->offsets().immutable_data();
state_impl.update(ctx, array_column->elements(), i, offsets[row_num],
offsets[row_num + 1] - offsets[row_num]);
}

View File

@ -57,7 +57,7 @@ public:
Slice s = column->get_slice(row_num);
value = HashUtil::murmur_hash64A(s.data, s.size, HashUtil::MURMUR_SEED);
} else {
const auto& v = column->get_data();
const auto v = column->immutable_data();
value = HashUtil::murmur_hash64A(&v[row_num], sizeof(v[row_num]), HashUtil::MURMUR_SEED);
}
@ -83,7 +83,7 @@ public:
}
} else {
uint64_t value = 0;
const auto& v = column->get_data();
const auto v = column->immutable_data();
for (size_t i = frame_start; i < frame_end; ++i) {
value = HashUtil::murmur_hash64A(&v[i], sizeof(v[i]), HashUtil::MURMUR_SEED);
@ -144,7 +144,7 @@ public:
Slice s = column->get_slice(i);
value = HashUtil::murmur_hash64A(s.data, s.size, HashUtil::MURMUR_SEED);
} else {
auto v = column->get_data()[i];
auto v = column->immutable_data()[i];
value = HashUtil::murmur_hash64A(&v, sizeof(v), HashUtil::MURMUR_SEED);
}
if (value != 0) {

View File

@ -90,7 +90,7 @@ public:
const auto* key_column = down_cast<const InputColumnType*>(columns[1]);
auto bimtap_value = bitmap_column->get_pool()[row_num];
auto key_value = key_column->get_data()[row_num];
auto key_value = GetContainer<LT>::get_data(key_column)[row_num];
if constexpr (LT != TYPE_VARCHAR && LT != TYPE_CHAR) {
intersect.update(key_value, bimtap_value);
@ -149,7 +149,7 @@ public:
BitmapIntersect<BitmapRuntimeCppType<LT>> intersect_per_row(intersect);
auto bimtap_value = bitmap_column->get_pool()[i];
auto key_value = key_column->get_data()[i];
auto key_value = GetContainer<LT>::get_data(key_column)[i];
if constexpr (LT != TYPE_VARCHAR && LT != TYPE_CHAR) {
intersect_per_row.update(key_value, bimtap_value);

View File

@ -16,7 +16,6 @@
#include <fmt/format.h>
#include "column/binary_column.h"
#include "column/column.h"
#include "column/column_helper.h"
#include "column/fixed_length_column.h"
@ -26,8 +25,8 @@
#include "exprs/agg/aggregate.h"
#include "exprs/function_context.h"
#include "gutil/casts.h"
#include "runtime/mem_pool.h"
#include "util/phmap/phmap.h"
#include "util/time.h"
namespace starrocks {
@ -43,8 +42,9 @@ struct MapAggAggregateFunctionState : public AggregateFunctionEmptyState {
void update(MemPool* mem_pool, const KeyColumnType& arg_key_column, const Column& arg_value_column, size_t offset,
size_t count) {
if constexpr (!lt_is_string<KT>) {
auto key_datas = arg_key_column.immutable_data();
for (int i = offset; i < offset + count; i++) {
auto key = arg_key_column.get_data()[i];
auto key = key_datas[i];
if (!hash_map.contains(key)) {
auto value = arg_value_column.get(i);
value_column->append_datum(value);
@ -90,7 +90,7 @@ public:
void merge(FunctionContext* ctx, const Column* column, AggDataPtr __restrict state, size_t row_num) const override {
auto map_column = down_cast<const MapColumn*>(ColumnHelper::get_data_column(column));
auto& offsets = map_column->offsets().get_data();
auto& offsets = map_column->offsets().immutable_data();
if (offsets[row_num + 1] > offsets[row_num]) {
this->data(state).update(
ctx->mem_pool(),

View File

@ -175,7 +175,7 @@ public:
size_t row_num) const override {
DCHECK(!columns[0]->is_nullable() && !columns[0]->is_binary());
const auto& column = down_cast<const InputColumnType&>(*columns[0]);
T value = column.get_data()[row_num];
T value = column.immutable_data()[row_num];
OP()(this->data(state), value);
}
@ -198,7 +198,7 @@ public:
int64_t current_frame_last_position = current_row_position + rows_end_offset;
if (!ignore_subtraction && previous_frame_first_position >= partition_start &&
previous_frame_first_position < partition_end) {
if (OP::equals(this->data(state), column.get_data()[previous_frame_first_position])) {
if (OP::equals(this->data(state), column.immutable_data()[previous_frame_first_position])) {
current_frame_last_position = std::min(current_frame_last_position, partition_end - 1);
this->data(state).reset();
int64_t frame_start = previous_frame_first_position + 1;
@ -228,7 +228,7 @@ public:
void merge(FunctionContext* ctx, const Column* column, AggDataPtr __restrict state, size_t row_num) const override {
DCHECK(!column->is_nullable() && !column->is_binary());
const auto* input_column = down_cast<const InputColumnType*>(column);
T value = input_column->get_data()[row_num];
T value = input_column->immutable_data()[row_num];
OP()(this->data(state), value);
}
@ -294,7 +294,7 @@ public:
int64_t current_frame_last_position = current_row_position + rows_end_offset;
if (!ignore_subtraction && previous_frame_first_position >= partition_start &&
previous_frame_first_position < partition_end) {
if (OP::equals(this->data(state), column.get_data()[previous_frame_first_position])) {
if (OP::equals(this->data(state), column.get_slice(previous_frame_first_position))) {
current_frame_last_position = std::min(current_frame_last_position, partition_end - 1);
this->data(state).reset();
int64_t frame_start = previous_frame_first_position + 1;

View File

@ -17,6 +17,7 @@
#include <limits>
#include <type_traits>
#include "column/column_helper.h"
#include "column/fixed_length_column.h"
#include "column/type_traits.h"
#include "column/vectorized_fwd.h"
@ -385,9 +386,9 @@ public:
RunTimeCppType<LT> rhs;
auto* data_col1 = down_cast<const InputColumnType*>(ColumnHelper::get_data_column(columns[1]));
if (columns[1]->is_constant()) {
rhs = data_col1->get_data()[0];
rhs = data_col1->immutable_data()[0];
} else {
rhs = data_col1->get_data()[row_num];
rhs = data_col1->immutable_data()[row_num];
}
OP()(this->data(state), (Column*)columns[0], row_num, rhs);
}
@ -491,14 +492,15 @@ public:
}
const auto* col_maxmin = down_cast<const InputColumnType*>(ColumnHelper::get_data_column(src[1].get()));
const auto maxmin_datas = col_maxmin->immutable_data();
BinaryColumn* result = nullptr;
if ((*dst)->is_nullable()) {
auto* dst_nullable_column = down_cast<NullableColumn*>((*dst).get());
result = down_cast<BinaryColumn*>(dst_nullable_column->data_column().get());
if (src[1]->is_nullable()) {
dst_nullable_column->null_column_data() =
down_cast<const NullableColumn*>(src[1].get())->immutable_null_column_data();
auto null_column_data = down_cast<const NullableColumn*>(src[1].get())->immutable_null_column_data();
dst_nullable_column->null_column_data().assign(null_column_data.begin(), null_column_data.end());
} else {
dst_nullable_column->null_column_data().resize(chunk_size, 0);
}
@ -518,7 +520,7 @@ public:
down_cast<NullableColumn*>((*dst).get())->set_has_null(true);
} else {
auto is_null = src[0]->only_null() || src[0]->is_null(i);
T value = col_maxmin->get_data()[i];
T value = maxmin_datas[i];
if (is_null) {
if constexpr (State::not_filter_nulls_flag) {
new_size = old_size + sizeof(T) + 1;
@ -751,8 +753,9 @@ public:
result = down_cast<BinaryColumn*>(dst_nullable_column->data_column().get());
if (src[1]->is_nullable()) {
dst_nullable_column->null_column_data() =
down_cast<const NullableColumn*>(src[1].get())->immutable_null_column_data();
auto null_datas = down_cast<const NullableColumn*>(src[1].get())->immutable_null_column_data();
dst_nullable_column->null_column_data().assign(null_datas.begin(), null_datas.end());
} else {
dst_nullable_column->null_column_data().resize(chunk_size, 0);
}
@ -774,7 +777,7 @@ public:
auto is_null = src[0]->only_null() || src[0]->is_null(i);
if (is_null) {
if constexpr (State::not_filter_nulls_flag) {
Slice value = col_maxmin->get(i).get_slice();
Slice value = col_maxmin->get_slice(i);
size_t value_size = value.size;
new_size = old_size + sizeof(size_t) + value_size + 1;
bytes.resize(new_size);
@ -791,7 +794,7 @@ public:
dst_nullable_column->set_has_null(true);
}
} else {
Slice value = col_maxmin->get(i).get_slice();
Slice value = col_maxmin->get_slice(i);
size_t value_size = value.size;
auto* data_column = ColumnHelper::get_data_column(src[0].get());
size_t serde_size = data_column->serialize_size(i);

View File

@ -127,7 +127,8 @@ public:
// for nullable column when the real whole chunk data all not-null.
if (column->is_nullable()) {
const auto* nullable_column = down_cast<const NullableColumn*>(column);
if (!nullable_column->null_column()->get_data()[row_num]) {
auto imm_null_data = nullable_column->immutable_null_column_data();
if (!imm_null_data[row_num]) {
this->data(state).is_null = false;
const Column* data_column = nullable_column->data_column().get();
nested_function->merge(ctx, data_column, this->data(state).mutable_nest_state(), row_num);
@ -212,13 +213,13 @@ public:
nested_function->convert_to_serialize_format(ctx, src, chunk_size, &dst_nullable_column->data_column());
} else if (nullable_column->has_null()) {
dst_nullable_column->set_has_null(true);
const NullData& src_null_data = nullable_column->immutable_null_column_data();
const auto src_null_data = nullable_column->immutable_null_column_data();
size_t null_size = SIMD::count_nonzero(src_null_data);
if (null_size == chunk_size) {
dst_nullable_column->append_nulls(chunk_size);
} else {
NullData& dst_null_data = dst_nullable_column->null_column_data();
dst_null_data = src_null_data;
dst_null_data.assign(src_null_data.begin(), src_null_data.end());
if constexpr (IgnoreNull) {
Columns src_data_columns(1);
src_data_columns[0] = nullable_column->data_column();
@ -869,7 +870,7 @@ public:
this->nested_function->merge(ctx, data_column, state_data.mutable_nest_state(), i);
}
};
auto slow_call_path = [&](const NullData& null_data, const Column* data_column) {
auto slow_call_path = [&](const ImmutableNullData& null_data, const Column* data_column) {
for (size_t i = 0; i < chunk_size; ++i) {
auto& state_data = this->data(states[i] + state_offset);
if (null_data[i] == 0) {
@ -896,7 +897,7 @@ public:
}
};
auto slow_call_path = [&](const NullData& null_data, const Column* data_column) {
auto slow_call_path = [&](const ImmutableNullData& null_data, const Column* data_column) {
for (size_t i = 0; i < chunk_size; ++i) {
if (filter[i] == 0) {
auto& state_data = this->data(states[i] + state_offset);
@ -923,7 +924,7 @@ public:
this->nested_function->merge(ctx, data_column, state_data.mutable_nest_state(), i);
}
};
auto slow_call_path = [&](const NullData& null_data, const Column* data_column) {
auto slow_call_path = [&](const ImmutableNullData& null_data, const Column* data_column) {
for (size_t i = start; i < start + size; ++i) {
auto& state_data = this->data(state);
if (null_data[i] == 0) {
@ -1086,7 +1087,7 @@ public:
data_columns.emplace_back(nullable_column->data_column());
if (i->has_null()) {
dst_nullable_column->set_has_null(true);
const NullData& src_null_data = nullable_column->immutable_null_column_data();
const auto src_null_data = nullable_column->immutable_null_column_data();
size_t null_size = SIMD::count_nonzero(src_null_data);
// if one column only has null element, set dst_column all null

View File

@ -109,7 +109,7 @@ public:
if (UNLIKELY(data(state).targetQuantile == std::numeric_limits<double>::infinity())) {
data(state).targetQuantile = columns[1]->get(0).get_double();
}
double column_value = data_column->get_data()[row_num];
double column_value = data_column->immutable_data()[row_num];
int64_t prev_memory = data(state).percentile->mem_usage();
data(state).percentile->add(implicit_cast<float>(column_value));
ctx->add_mem_usage(data(state).percentile->mem_usage() - prev_memory);
@ -133,7 +133,7 @@ public:
size_t old_size = bytes.size();
for (size_t i = 0; i < chunk_size; ++i) {
PercentileValue percentile;
percentile.add(data_column->get_data()[i]);
percentile.add(data_column->immutable_data()[i]);
size_t new_size = old_size + sizeof(double) + percentile.serialize_size();
bytes.resize(new_size);
@ -181,7 +181,7 @@ public:
data(state).targetQuantile = columns[2]->get(0).get_double();
}
double column_value = data_column->get_data()[row_num];
double column_value = data_column->immutable_data()[row_num];
int64_t prev_memory = data(state).percentile->mem_usage();
// add value with weight
if (LIKELY(weight != 0)) {
@ -211,7 +211,7 @@ public:
if (LIKELY(weight != 0)) {
for (size_t i = 0; i < chunk_size; ++i) {
PercentileValue percentile;
double value = data_column->get_data()[i];
double value = data_column->immutable_data()[i];
percentile.add(value, weight);
size_t new_size = old_size + sizeof(double) + percentile.serialize_size();
bytes.resize(new_size);
@ -236,9 +236,9 @@ public:
} else {
const auto* weight_column = down_cast<const Int64Column*>(src[1].get());
for (size_t i = 0; i < chunk_size; ++i) {
int64_t weight = weight_column->get_data()[i];
int64_t weight = weight_column->immutable_data()[i];
PercentileValue percentile;
double value = data_column->get_data()[i];
double value = data_column->immutable_data()[i];
if (LIKELY(weight != 0)) {
percentile.add(value, weight);
}

View File

@ -57,7 +57,7 @@ struct PercentileState {
using GridType = typename PercentileStateTypes<LT>::GridType;
void update(CppType item) { items.emplace_back(item); }
void update_batch(const Buffer<CppType>& vec) {
void update_batch(const ImmBuffer<CppType> vec) {
size_t old_size = items.size();
items.resize(old_size + vec.size());
memcpy(items.data() + old_size, vec.data(), vec.size() * sizeof(CppType));
@ -198,7 +198,8 @@ public:
this->init_state_if_needed(ctx, columns, state);
const auto& column = down_cast<const InputColumnType&>(*columns[0]);
this->data(state).update(column.get_data()[row_num]);
auto column_data = column.immutable_data();
this->data(state).update(column_data[row_num]);
}
void update_batch_single_state(FunctionContext* ctx, size_t chunk_size, const Column** columns,
@ -206,7 +207,8 @@ public:
this->init_state_if_needed(ctx, columns, state);
const auto& column = down_cast<const InputColumnType&>(*columns[0]);
this->data(state).update_batch(column.get_data());
auto column_data = column.immutable_data();
this->data(state).update_batch(column_data);
}
void merge(FunctionContext* ctx, const Column* column, AggDataPtr __restrict state, size_t row_num) const override {
@ -269,7 +271,7 @@ public:
Bytes& bytes = dst_column->get_bytes();
double rate = ColumnHelper::get_const_value<TYPE_DOUBLE>(src[1]);
auto src_column = *down_cast<const InputColumnType*>(src[0].get());
InputCppType* src_data = src_column.get_data().data();
const InputCppType* src_data = src_column.immutable_data().data();
for (auto i = 0; i < chunk_size; ++i) {
size_t old_size = bytes.size();
bytes.resize(old_size + sizeof(double) + sizeof(size_t) + sizeof(InputCppType));
@ -304,12 +306,12 @@ public:
this->init_state_if_needed(ctx, columns, state);
const auto& column = down_cast<const BinaryColumn&>(*columns[0]);
const auto& column_data = column.get_proxy_data();
// use mem_pool to hold the slice's data, otherwise after chunk is processed, the memory of slice used is gone
size_t element_size = column.get_data()[row_num].get_size();
size_t element_size = column_data[row_num].get_size();
uint8_t* pos = ctx->mem_pool()->allocate(element_size);
ctx->add_mem_usage(element_size);
memcpy(pos, column.get_data()[row_num].get_data(), element_size);
memcpy(pos, column_data[row_num].get_data(), element_size);
this->data(state).update(Slice(pos, element_size));
}
@ -392,7 +394,7 @@ public:
double rate = ColumnHelper::get_const_value<TYPE_DOUBLE>(src[1]);
auto src_column = *down_cast<const BinaryColumn*>(src[0].get());
Slice* src_data = src_column.get_data().data();
const auto& src_data = src_column.get_proxy_data();
for (auto i = 0; i < chunk_size; ++i) {
size_t old_size = bytes.size();
// [rate, 1, element ith size, element ith data]
@ -543,7 +545,7 @@ struct LowCardPercentileState {
constexpr int static ser_header = 0x3355 | LT << 16;
void update(CppType item) { items[item]++; }
void update_batch(const Buffer<CppType>& vec) {
void update_batch(const ImmBuffer<CppType> vec) {
for (const auto& item : vec) {
items[item]++;
}
@ -629,13 +631,13 @@ public:
void update(FunctionContext* ctx, const Column** columns, AggDataPtr __restrict state,
size_t row_num) const override {
const auto& column = down_cast<const InputColumnType&>(*columns[0]);
this->data(state).update(column.get_data()[row_num]);
this->data(state).update(column.immutable_data()[row_num]);
}
void update_batch_single_state(FunctionContext* ctx, size_t chunk_size, const Column** columns,
AggDataPtr __restrict state) const override {
const auto& column = down_cast<const InputColumnType&>(*columns[0]);
this->data(state).update_batch(column.get_data());
this->data(state).update_batch(column.immutable_data());
}
void merge(FunctionContext* ctx, const Column* column, AggDataPtr __restrict state, size_t row_num) const override {
@ -670,7 +672,7 @@ public:
unsigned char* cur = bytes.data() + old_size;
auto src_column = *down_cast<const InputColumnType*>(src[0].get());
InputCppType* src_data = src_column.get_data().data();
const InputCppType* src_data = src_column.immutable_data().data();
size_t cur_size = old_size;
for (size_t i = 0; i < chunk_size; ++i) {

View File

@ -38,12 +38,13 @@ namespace starrocks {
struct RetentionState {
static void udpate(uint64_t* value_ptr, const ArrayColumn* column, size_t row_num) {
const auto& ele_col = column->elements();
const auto& offsets = column->offsets().get_data();
const auto offsets = column->offsets().immutable_data();
size_t array_size = 0;
if (ele_col.is_nullable()) {
const auto& null_column = down_cast<const NullableColumn&>(ele_col);
auto data_column = down_cast<const BooleanColumn*>(null_column.data_column().get());
const auto imm_data = data_column->immutable_data();
size_t offset = offsets[row_num];
array_size = offsets[row_num + 1] - offset;
@ -54,13 +55,15 @@ struct RetentionState {
for (size_t i = 0; i < array_size; ++i) {
auto ele_offset = offset + i;
if (!null_column.is_null(ele_offset) && data_column->get_data()[ele_offset]) {
if (!null_column.is_null(ele_offset) && imm_data[ele_offset]) {
// Set right bit for condition.
(*value_ptr) |= RetentionState::bool_values[i];
}
}
} else {
const auto& data_column = down_cast<const BooleanColumn&>(ele_col);
const auto imm_data = data_column.immutable_data();
size_t offset = offsets[row_num];
array_size = offsets[row_num + 1] - offset;
@ -69,7 +72,7 @@ struct RetentionState {
}
for (size_t i = 0; i < array_size; ++i) {
if (data_column.get_data()[offset + i]) {
if (imm_data[offset + i]) {
(*value_ptr) |= RetentionState::bool_values[i];
}
}
@ -145,7 +148,7 @@ public:
void merge(FunctionContext* ctx, const Column* column, AggDataPtr __restrict state, size_t row_num) const override {
const auto* input_column = down_cast<const Int64Column*>(column);
this->data(state).boolean_value |= input_column->get_data()[row_num];
this->data(state).boolean_value |= input_column->immutable_data()[row_num];
}
void serialize_to_column(FunctionContext* ctx, ConstAggDataPtr __restrict state, Column* to) const override {

View File

@ -195,11 +195,11 @@ public:
T get_row_value(const Column* column, size_t row_num) const {
if constexpr (lt_is_string<LT>) {
DCHECK(column->is_binary());
return column->get(row_num).get_slice();
return down_cast<const InputColumnType&>(*column).get_slice(row_num);
} else {
DCHECK(!column->is_nullable() && !column->is_binary());
const auto& col = down_cast<const InputColumnType&>(*column);
return col.get_data()[row_num];
return col.immutable_data()[row_num];
}
}

View File

@ -59,7 +59,7 @@ public:
size_t row_num) const override {
DCHECK(columns[0]->is_numeric() || columns[0]->is_decimal());
const auto& column = down_cast<const InputColumnType&>(*columns[0]);
this->data(state).sum += column.get_data()[row_num];
this->data(state).sum += column.immutable_data()[row_num];
}
AggStateTableKind agg_state_table_kind(bool is_append_only) const override { return AggStateTableKind::RESULT; }
@ -68,13 +68,13 @@ public:
size_t row_num) const override {
DCHECK(columns[0]->is_numeric() || columns[0]->is_decimal());
const auto& column = down_cast<const InputColumnType&>(*columns[0]);
this->data(state).sum -= column.get_data()[row_num];
this->data(state).sum -= column.immutable_data()[row_num];
}
void update_batch_single_state(FunctionContext* ctx, size_t chunk_size, const Column** columns,
AggDataPtr __restrict state) const override {
const auto* column = down_cast<const InputColumnType*>(columns[0]);
const auto* data = column->get_data().data();
const auto* data = column->immutable_data().data();
for (size_t i = 0; i < chunk_size; ++i) {
this->data(state).sum += data[i];
}
@ -84,7 +84,7 @@ public:
int64_t peer_group_start, int64_t peer_group_end, int64_t frame_start,
int64_t frame_end) const override {
const auto* column = down_cast<const InputColumnType*>(columns[0]);
const auto* data = column->get_data().data();
const auto* data = column->immutable_data().data();
for (size_t i = frame_start; i < frame_end; ++i) {
this->data(state).sum += data[i];
}
@ -96,7 +96,7 @@ public:
bool ignore_subtraction, bool ignore_addition,
[[maybe_unused]] bool has_null) const override {
const auto* column = down_cast<const InputColumnType*>(columns[0]);
const auto* data = column->get_data().data();
const auto* data = column->immutable_data().data();
const int64_t previous_frame_first_position = current_row_position - 1 + rows_start_offset;
const int64_t current_frame_last_position = current_row_position + rows_end_offset;
@ -113,7 +113,7 @@ public:
void merge(FunctionContext* ctx, const Column* column, AggDataPtr __restrict state, size_t row_num) const override {
DCHECK(column->is_numeric() || column->is_decimal());
const auto* input_column = down_cast<const ResultColumnType*>(column);
this->data(state).sum += input_column->get_data()[row_num];
this->data(state).sum += input_column->immutable_data()[row_num];
}
void get_values(FunctionContext* ctx, ConstAggDataPtr __restrict state, Column* dst, size_t start,
@ -135,7 +135,7 @@ public:
void batch_serialize(FunctionContext* ctx, size_t chunk_size, const Buffer<AggDataPtr>& agg_states,
size_t state_offset, Column* to) const override {
auto* column = down_cast<ResultColumnType*>(to);
Buffer<ResultType>& result_data = column->get_data();
auto& result_data = column->get_data();
for (size_t i = 0; i < chunk_size; i++) {
result_data.emplace_back(this->data(agg_states[i] + state_offset).sum);
}
@ -149,8 +149,8 @@ public:
void convert_to_serialize_format([[maybe_unused]] FunctionContext* ctx, const Columns& src, size_t chunk_size,
ColumnPtr* dst) const override {
Buffer<ResultType>& dst_data = down_cast<ResultColumnType*>((*dst).get())->get_data();
const auto* src_data = down_cast<const InputColumnType*>(src[0].get())->get_data().data();
auto& dst_data = down_cast<ResultColumnType*>((*dst).get())->get_data();
const auto* src_data = down_cast<const InputColumnType*>(src[0].get())->immutable_data().data();
dst_data.resize(chunk_size);
for (size_t i = 0; i < chunk_size; ++i) {

View File

@ -67,7 +67,7 @@ public:
int64_t temp = 1 + this->data(state).count;
TResult delta;
delta = column->get_data()[row_num] - this->data(state).mean;
delta = column->immutable_data()[row_num] - this->data(state).mean;
TResult r;
if constexpr (lt_is_decimalv2<LT>) {
@ -177,7 +177,7 @@ public:
int64_t count = 1;
for (size_t i = 0; i < chunk_size; ++i) {
mean = src_column->get_data()[i];
mean = src_column->immutable_data()[i];
memcpy(bytes.data() + old_size, &mean, sizeof(TResult));
memcpy(bytes.data() + old_size + sizeof(TResult), &m2, sizeof(TResult));
memcpy(bytes.data() + old_size + sizeof(TResult) * 2, &count, sizeof(int64_t));

View File

@ -446,7 +446,7 @@ public:
if (!columns[1]->is_constant()) {
const auto timestamp_column = down_cast<const TimeTypeColumn*>(columns[1]);
DCHECK(LT == TYPE_DATETIME || LT == TYPE_DATE || LT == TYPE_INT || LT == TYPE_BIGINT);
tv = timestamp_column->get_data()[row_num];
tv = timestamp_column->immutable_data()[row_num];
} else {
tv = ColumnHelper::get_const_value<LT>(columns[1]);
}
@ -462,14 +462,14 @@ public:
event_column =
down_cast<const ArrayColumn*>(down_cast<const ConstColumn*>(columns[3])->data_column().get());
const UInt32Column& offsets = event_column->offsets();
auto offsets_ptr = offsets.get_data().data();
const auto offsets_ptr = offsets.immutable_data().data();
offset = offsets_ptr[0];
array_size = offsets_ptr[1] - offsets_ptr[0];
} else {
DCHECK(columns[3]->is_array());
event_column = down_cast<const ArrayColumn*>(columns[3]);
const UInt32Column& offsets = event_column->offsets();
auto offsets_ptr = offsets.get_data().data();
const auto offsets_ptr = offsets.immutable_data().data();
offset = offsets_ptr[row_num];
array_size = offsets_ptr[row_num + 1] - offsets_ptr[row_num];
}
@ -478,10 +478,11 @@ public:
if (elements.is_nullable()) {
const auto& null_column = down_cast<const NullableColumn&>(elements);
auto data_column = down_cast<const BooleanColumn*>(null_column.data_column().get());
const auto& null_vector = null_column.null_column()->get_data();
const auto data_vector = data_column->immutable_data();
const auto null_vector = null_column.immutable_null_column_data();
for (int i = 0; i < array_size; ++i) {
auto ele_offset = offset + i;
if (!null_vector[ele_offset] && data_column->get_data()[ele_offset]) {
if (!null_vector[ele_offset] && data_vector[ele_offset]) {
event_level = i + 1;
if constexpr (LT == TYPE_DATETIME) {
this->data(state).update(tv.to_unix_second(), event_level);
@ -494,9 +495,11 @@ public:
}
} else {
const auto& data_column = down_cast<const BooleanColumn&>(elements);
const auto data_vector = data_column.immutable_data();
for (int i = 0; i < array_size; ++i) {
auto ele_offset = offset + i;
if (data_column.get_data()[ele_offset]) {
if (data_vector[ele_offset]) {
event_level = i + 1;
if constexpr (LT == TYPE_DATETIME) {
this->data(state).update(tv.to_unix_second(), event_level);
@ -518,14 +521,14 @@ public:
this->data(state).init_once(ctx);
const auto* input_column = down_cast<const ArrayColumn*>(column);
const auto& offsets = input_column->offsets().get_data();
const auto offsets = input_column->offsets().immutable_data();
const auto& elements = input_column->elements();
const int64_t* raw_data;
if (elements.is_nullable()) {
auto data_elements = down_cast<const NullableColumn*>(&elements)->data_column().get();
raw_data = down_cast<const Int64Column*>(data_elements)->get_data().data();
raw_data = down_cast<const Int64Column*>(data_elements)->immutable_data().data();
} else {
raw_data = down_cast<const Int64Column*>(&elements)->get_data().data();
raw_data = down_cast<const Int64Column*>(&elements)->immutable_data().data();
}
size_t offset = offsets[row_num];
@ -553,11 +556,11 @@ public:
for (int i = 0; i < chunk_size; i++) {
TimestampType tv;
if constexpr (LT == TYPE_DATETIME) {
tv = timestamp_column->get_data()[i].to_unix_second();
tv = timestamp_column->immutable_data()[i].to_unix_second();
} else if constexpr (LT == TYPE_DATE) {
tv = timestamp_column->get_data()[i].julian();
tv = timestamp_column->immutable_data()[i].julian();
} else {
tv = timestamp_column->get_data()[i];
tv = timestamp_column->immutable_data()[i];
}
// get 4th value: event cond array

View File

@ -38,9 +38,10 @@ StatusOr<ColumnPtr> ArrayFunctions::array_length([[maybe_unused]] FunctionContex
RETURN_IF_COLUMNS_ONLY_NULL(columns);
const size_t num_rows = columns[0]->size();
const auto* col_array = down_cast<const ArrayColumn*>(ColumnHelper::get_data_column(columns[0].get()));
const auto arr_offsets = col_array->offsets().immutable_data();
if (columns[0]->is_constant()) {
auto col_result = Int32Column::create();
col_result->append(col_array->offsets().get_data().data()[1]);
col_result->append(arr_offsets.data()[1]);
auto const_column = ConstColumn::create(std::move(col_result), num_rows);
return const_column;
} else {
@ -49,10 +50,9 @@ StatusOr<ColumnPtr> ArrayFunctions::array_length([[maybe_unused]] FunctionContex
raw::make_room(&col_result->get_data(), num_rows);
DCHECK_EQ(col_array->size(), col_result->size());
const uint32_t* offsets = col_array->offsets().get_data().data();
int32_t* p = col_result->get_data().data();
for (size_t i = 0; i < num_rows; i++) {
p[i] = offsets[i + 1] - offsets[i];
p[i] = arr_offsets[i + 1] - arr_offsets[i];
}
if (arg0->has_null()) {
@ -67,6 +67,7 @@ StatusOr<ColumnPtr> ArrayFunctions::array_length([[maybe_unused]] FunctionContex
template <bool OnlyNullData, bool ConstData>
static StatusOr<ColumnPtr> do_array_append(const Column& elements, const UInt32Column& offsets, const Column& data) {
const auto offsets_data = offsets.immutable_data();
size_t num_array = offsets.size() - 1;
uint32_t curr_offset = 0;
@ -83,7 +84,7 @@ static StatusOr<ColumnPtr> do_array_append(const Column& elements, const UInt32C
uint32_t result_offset = 0;
for (size_t i = 0; i < num_array; i++) {
uint32_t next_offset = offsets.get_data()[i + 1];
uint32_t next_offset = offsets_data[i + 1];
uint32_t array_size = next_offset - curr_offset;
result_elements->append(elements, curr_offset, array_size);
if constexpr (OnlyNullData) {
@ -147,8 +148,9 @@ private:
template <bool NullableElement, bool NullableTarget, bool ConstTarget, typename ElementColumn,
typename TargetColumn>
static StatusOr<ColumnPtr> _process(const ElementColumn& elements, const UInt32Column& offsets,
const TargetColumn& targets, const NullColumn::Container* null_map_elements,
const NullColumn::Container* null_map_targets) {
const TargetColumn& targets, const NullColumn::ImmContainer& null_map_elements,
const NullColumn::ImmContainer& null_map_targets) {
const auto offsets_data = offsets.immutable_data();
const size_t num_array = offsets.size() - 1;
auto result_array = ArrayColumn::create(NullableColumn::create(elements.clone_empty(), NullColumn::create()),
@ -163,9 +165,9 @@ private:
std::is_same_v<StructColumn, ElementColumn>,
uint8_t, typename ElementColumn::ValueType>;
auto offsets_ptr = offsets.get_data().data();
[[maybe_unused]] auto is_null = [](const NullColumn::Container* null_map, size_t idx) -> bool {
return (*null_map)[idx] != 0;
auto offsets_ptr = offsets_data.data();
[[maybe_unused]] auto is_null = [](const NullColumn::ImmContainer& null_map, size_t idx) -> bool {
return null_map[idx] != 0;
};
auto* targets_col = &targets;
@ -259,19 +261,19 @@ private:
const Column* elements_ptr = &array_elements;
const Column* targets_ptr = &argument;
const NullColumn::Container* null_map_elements = nullptr;
const NullColumn::Container* null_map_targets = nullptr;
NullColumn::ImmContainer null_map_elements;
NullColumn::ImmContainer null_map_targets;
if constexpr (NullableElement) {
const auto& nullable = down_cast<const NullableColumn&>(array_elements);
elements_ptr = nullable.data_column().get();
null_map_elements = &(nullable.null_column()->get_data());
null_map_elements = nullable.immutable_null_column_data();
}
if constexpr (NullableTarget) {
const auto& nullable = down_cast<const NullableColumn&>(argument);
targets_ptr = nullable.data_column().get();
null_map_targets = &(nullable.null_column()->get_data());
null_map_targets = nullable.immutable_null_column_data();
}
#define HANDLE_ELEMENT_TYPE(ElementType) \
@ -495,8 +497,8 @@ private:
template <bool NullableElement, bool NullableTarget, bool ConstElement, bool ConstTarget, typename ElementColumn,
typename TargetColumn>
static StatusOr<ColumnPtr> _process(const ElementColumn& elements, const UInt32Column& offsets,
const TargetColumn& targets, const NullColumn::Container* null_map_elements,
const NullColumn::Container* null_map_targets) {
const TargetColumn& targets, const NullColumn::ImmContainer& null_map_elements,
const NullColumn::ImmContainer& null_map_targets) {
auto result = ReturnType::create();
if constexpr (ConstElement && ConstTarget) {
// if both element and target column are const, we only compute once here and generate ConstColumn with target size outside.
@ -513,10 +515,10 @@ private:
std::is_same_v<StructColumn, ElementColumn>,
uint8_t, typename ElementColumn::ValueType>;
auto offsets_ptr = offsets.get_data().data();
auto offsets_ptr = offsets.immutable_data().data();
[[maybe_unused]] auto is_null = [](const NullColumn::Container* null_map, size_t idx) -> bool {
return (*null_map)[idx] != 0;
[[maybe_unused]] auto is_null = [](const NullColumn::ImmContainer& null_map, size_t idx) -> bool {
return null_map[idx] != 0;
};
auto* targets_col = &targets;
@ -590,18 +592,18 @@ private:
const Column* elements_ptr = &array_elements;
const Column* targets_ptr = &argument;
const NullColumn::Container* null_map_elements = nullptr;
const NullColumn::Container* null_map_targets = nullptr;
NullColumn::ImmContainer null_map_elements;
NullColumn::ImmContainer null_map_targets;
if constexpr (NullableElement) {
const auto& nullable = down_cast<const NullableColumn&>(array_elements);
elements_ptr = nullable.data_column().get();
null_map_elements = &(nullable.null_column()->get_data());
null_map_elements = nullable.immutable_null_column_data();
}
if constexpr (NullableTarget) {
const auto& nullable = down_cast<const NullableColumn&>(argument);
targets_ptr = nullable.data_column().get();
null_map_targets = &(nullable.null_column()->get_data());
null_map_targets = nullable.immutable_null_column_data();
}
// Using typeid instead of dynamic_cast, typeid will be much much faster than dynamic_cast
@ -718,15 +720,15 @@ private:
template <bool NullableElement, bool NullableTarget, typename ElementColumn>
static uint8 __process(const ElementColumn& elements, uint32 element_start, uint32 element_end,
const ElementColumn& targets, uint32 target_start, uint32 target_end,
const NullColumn::Container* null_map_elements,
const NullColumn::Container* null_map_targets) {
const NullColumn::ImmContainer& null_map_elements,
const NullColumn::ImmContainer& null_map_targets) {
using ValueType = std::conditional_t<std::is_same_v<ArrayColumn, ElementColumn> ||
std::is_same_v<MapColumn, ElementColumn> ||
std::is_same_v<StructColumn, ElementColumn>,
uint8_t, typename ElementColumn::ValueType>;
[[maybe_unused]] auto is_null = [](const NullColumn::Container* null_map, size_t idx) -> bool {
return (*null_map)[idx] != 0;
[[maybe_unused]] auto is_null = [](const NullColumn::ImmContainer null_map, size_t idx) -> bool {
return null_map[idx] != 0;
};
for (size_t i = target_start; i < target_end; i++) {
bool null_target = false;
@ -795,14 +797,14 @@ private:
template <bool NullableElement, bool NullableTarget, typename ElementColumn>
static uint8 __process_seq(const ElementColumn& elements, uint32 element_start, uint32 element_end,
const ElementColumn& targets, uint32 target_start, uint32 target_end,
const NullColumn::Container* null_map_elements,
const NullColumn::Container* null_map_targets) {
const NullColumn::ImmContainer& null_map_elements,
const NullColumn::ImmContainer& null_map_targets) {
using ValueType = std::conditional_t<std::is_same_v<ArrayColumn, ElementColumn> ||
std::is_same_v<MapColumn, ElementColumn> ||
std::is_same_v<StructColumn, ElementColumn>,
uint8_t, typename ElementColumn::ValueType>;
[[maybe_unused]] auto is_null = [](const NullColumn::Container* null_map, size_t idx) -> bool {
return (*null_map)[idx] != 0;
[[maybe_unused]] auto is_null = [](const NullColumn::ImmContainer& null_map, size_t idx) -> bool {
return null_map[idx] != 0;
};
if (element_end - element_start < target_end - target_start) {
return false;
@ -864,8 +866,8 @@ private:
template <bool NullableElement, bool NullableTarget, bool ConstTarget, typename ElementColumn>
static StatusOr<ColumnPtr> _process(const ElementColumn& elements, const UInt32Column& element_offsets,
const ElementColumn& targets, const UInt32Column& target_offsets,
const NullColumn::Container* null_map_elements,
const NullColumn::Container* null_map_targets) {
const NullColumn::ImmContainer& null_map_elements,
const NullColumn::ImmContainer& null_map_targets) {
const size_t num_array = element_offsets.size() - 1;
const size_t num_target = target_offsets.size() - 1;
auto result = UInt8Column::create();
@ -873,8 +875,8 @@ private:
auto* result_ptr = result->get_data().data();
auto element_offsets_ptr = element_offsets.get_data().data();
auto target_offsets_ptr = target_offsets.get_data().data();
auto element_offsets_ptr = element_offsets.immutable_data().data();
auto target_offsets_ptr = target_offsets.immutable_data().data();
for (size_t i = 0; i < num_array; i++) {
uint8_t found = 0;
@ -907,19 +909,19 @@ private:
const Column* elements_ptr = &array_elements;
const Column* targets_ptr = &array_targets;
const NullColumn::Container* null_map_elements = nullptr;
const NullColumn::Container* null_map_targets = nullptr;
NullColumn::ImmContainer null_map_elements;
NullColumn::ImmContainer null_map_targets;
if constexpr (NullableElement) {
const auto& nullable = down_cast<const NullableColumn&>(array_elements);
elements_ptr = nullable.data_column().get();
null_map_elements = &(nullable.null_column()->get_data());
null_map_elements = nullable.null_column()->immutable_data();
}
if constexpr (NullableTarget) {
const auto& nullable = down_cast<const NullableColumn&>(array_targets);
targets_ptr = nullable.data_column().get();
null_map_targets = &(nullable.null_column()->get_data());
null_map_targets = nullable.null_column()->immutable_data();
}
// Using typeid instead of dynamic_cast, typeid will be much much faster than dynamic_cast
@ -1116,7 +1118,7 @@ StatusOr<ColumnPtr> ArrayFunctions::concat(FunctionContext* ctx, const Columns&
nulls = NullColumn::static_pointer_cast(nullable_column->null_column()->clone());
} else {
ColumnHelper::or_two_filters(num_rows, nulls->get_data().data(),
nullable_column->null_column()->get_data().data());
nullable_column->null_column()->immutable_data().data());
}
}
}
@ -1327,7 +1329,7 @@ StatusOr<ColumnPtr> ArrayFunctions::array_distinct_any_type(FunctionContext* ctx
RETURN_IF_COLUMNS_ONLY_NULL(columns);
auto [array_null, elements, offsets] = unpack_array_column(columns[0]);
auto* offsets_ptr = offsets->get_data().data();
auto* offsets_ptr = offsets->immutable_data().data();
auto* row_nulls = array_null->get_data().data();
auto result_elements = elements->clone_empty();
@ -1395,7 +1397,7 @@ StatusOr<ColumnPtr> ArrayFunctions::array_reverse_any_types(FunctionContext* ctx
RETURN_IF_COLUMNS_ONLY_NULL(columns);
auto [array_null, elements, offsets] = unpack_array_column(columns[0]);
auto* offsets_ptr = offsets->get_data().data();
auto* offsets_ptr = offsets->immutable_data().data();
auto* row_nulls = array_null->get_data().data();
auto result_elements = elements->clone_empty();
@ -1484,7 +1486,7 @@ StatusOr<ColumnPtr> ArrayFunctions::array_intersect_any_type(FunctionContext* ct
}
auto [_2, cmp_elements, cmp_offsets] = unpack_array_column(columns[col_idx]);
auto* cmp_offsets_ptr = cmp_offsets->get_data().data();
auto* cmp_offsets_ptr = cmp_offsets->immutable_data().data();
for (size_t row_idx = 0; row_idx < rows; row_idx++) {
if (nulls_ptr[row_idx] == 1) {
@ -1523,6 +1525,7 @@ static Status sort_multi_array_column(FunctionContext* ctx, const Column* src_co
const std::vector<const Column*>& key_columns, Column* dest_column) {
const auto* src_elements_column = down_cast<const ArrayColumn*>(src_column)->elements_column().get();
const auto* src_offsets_column = &down_cast<const ArrayColumn*>(src_column)->offsets();
const auto src_offsets = src_offsets_column->immutable_data();
auto* dest_elements_column = down_cast<ArrayColumn*>(dest_column)->elements_column().get();
auto* dest_offsets_column = down_cast<ArrayColumn*>(dest_column)->offsets_column().get();
@ -1531,7 +1534,7 @@ static Status sort_multi_array_column(FunctionContext* ctx, const Column* src_co
const size_t num_rows = src_column->size();
const size_t num_key_columns = key_columns.size();
dest_offsets_column->get_data() = src_offsets_column->get_data();
dest_offsets_column->get_data().assign(src_offsets.begin(), src_offsets.end());
// Unpack each key array column.
std::vector<const Column*> elements_per_key_col(num_key_columns);
@ -1546,19 +1549,19 @@ static Status sort_multi_array_column(FunctionContext* ctx, const Column* src_co
}
const auto* key_array_column = down_cast<const ArrayColumn*>(key_column);
const auto key_array_offsets = key_array_column->offsets().immutable_data();
// elements_per_key_col[i] = const_cast<Column*>(key_array_column->elements_column().get());
elements_per_key_col[i] = key_array_column->elements_column().get();
offsets_per_key_col[i] = key_array_column->offsets().get_data();
offsets_per_key_col[i] = key_array_offsets;
}
// Check if the number of elements in each array column of each row is exactly the same.
for (size_t row_i = 0; row_i < num_rows; ++row_i) {
if (src_null_column != nullptr && src_null_column->get_data()[row_i]) {
if (src_null_column != nullptr && src_null_column->immutable_data()[row_i]) {
continue;
}
const auto cur_num_src_elements =
src_offsets_column->get_data()[row_i + 1] - src_offsets_column->get_data()[row_i];
const auto cur_num_src_elements = src_offsets[row_i + 1] - src_offsets[row_i];
for (size_t key_col_i = 0; key_col_i < num_key_columns; ++key_col_i) {
if (nulls_per_key_col[key_col_i] && nulls_per_key_col[key_col_i][row_i]) {
continue;
@ -1576,7 +1579,7 @@ static Status sort_multi_array_column(FunctionContext* ctx, const Column* src_co
const std::atomic<bool>& cancel = ctx->state()->cancelled_ref();
SmallPermutation permutation;
RETURN_IF_ERROR(sort_and_tie_columns(cancel, elements_per_key_col, sort_desc, permutation,
src_offsets_column->get_data(), offsets_per_key_col));
src_offsets_column->immutable_data(), offsets_per_key_col));
std::vector<uint32_t> key_sort_index;
raw::stl_vector_resize_uninitialized(&key_sort_index, num_src_element_rows);
@ -1614,13 +1617,14 @@ StatusOr<ColumnPtr> ArrayFunctions::array_sortby_multi(FunctionContext* ctx, con
const auto* src_nullable_column = down_cast<const NullableColumn*>(src_column);
const auto* src_data_column = src_nullable_column->data_column().get();
const auto* src_null_column = src_nullable_column->null_column().get();
const auto src_nulls = src_nullable_column->immutable_null_column_data();
auto* dest_nullable_column = down_cast<NullableColumn*>(dest_column.get());
auto* dest_data_column = dest_nullable_column->mutable_data_column();
auto* dest_null_column = dest_nullable_column->mutable_null_column();
if (src_nullable_column->has_null()) {
dest_null_column->get_data().assign(src_null_column->get_data().begin(), src_null_column->get_data().end());
dest_null_column->get_data().assign(src_nulls.begin(), src_nulls.end());
} else {
dest_null_column->get_data().resize(chunk_size, 0);
src_null_column = nullptr;

View File

@ -59,11 +59,12 @@ private:
if (columns[0]->is_nullable()) {
const auto* src_nullable_column = down_cast<const NullableColumn*>(src_column.get());
const auto* src_data_column = down_cast<const ArrayColumn*>(src_nullable_column->data_column().get());
const auto null_data = src_nullable_column->immutable_null_column_data();
auto& dest_nullable_column = down_cast<NullableColumn&>(*dest_column);
auto& dest_null_data = down_cast<NullableColumn&>(*dest_column).null_column_data();
auto& dest_data_column = down_cast<ArrayColumn&>(*dest_nullable_column.data_column());
dest_null_data = src_nullable_column->immutable_null_column_data();
dest_null_data.assign(null_data.begin(), null_data.end());
dest_nullable_column.set_has_null(src_nullable_column->has_null());
if (src_nullable_column->has_null()) {
@ -174,7 +175,7 @@ private:
if (columns[0]->is_nullable()) {
const auto* src_nullable_column = down_cast<const NullableColumn*>(src_column.get());
const auto* src_data_column = down_cast<const ArrayColumn*>(src_nullable_column->data_column().get());
auto src_null_data = src_nullable_column->immutable_null_column_data();
dest_column = NullableColumn::create(
ArrayColumn::create(dest_column_data, UInt32Column::create(src_data_column->offsets())),
NullColumn::create());
@ -183,7 +184,7 @@ private:
auto& dest_null_data = down_cast<NullableColumn&>(*dest_column).null_column_data();
auto& dest_data_column = down_cast<ArrayColumn&>(*dest_nullable_column.data_column());
dest_null_data = src_nullable_column->immutable_null_column_data();
dest_null_data.assign(src_null_data.begin(), src_null_data.end());
dest_nullable_column.set_has_null(src_nullable_column->has_null());
if (src_nullable_column->has_null()) {
@ -269,7 +270,7 @@ class ArrayOverlap {
public:
using CppType = RunTimeCppType<LT>;
using ColumnType = RunTimeColumnType<LT>;
using DataArray = RunTimeProxyContainerType<LT>;
using DataArray = typename RunTimeTypeTraits<LT>::ProxyContainerType;
using HashFunc = PhmapDefaultHashFunc<LT, PhmapSeed1>;
using HashSet = phmap::flat_hash_set<CppType, HashFunc>;
@ -434,7 +435,7 @@ private:
static bool _put_array_to_hash_set(const ArrayColumn& column, size_t index, HashSet* hash_set) {
const auto* elements_column = column.elements_column().get();
const auto& offsets = column.offsets().get_data();
const auto offsets = column.offsets().immutable_data();
bool has_null = false;
uint32_t start = offsets[index];
uint32_t end = offsets[index + 1];
@ -443,7 +444,7 @@ private:
const NullableColumn* nullable_column = down_cast<const NullableColumn*>(elements_column);
const auto& datas = GetContainer<LT>::get_data(nullable_column->data_column());
const auto& nulls = nullable_column->null_column()->get_data();
const auto nulls = nullable_column->immutable_null_column_data();
if (nullable_column->has_null()) {
for (size_t i = start; i < end; i++) {
@ -465,7 +466,7 @@ private:
static bool _check_column_overlap_nullable(const HashSet& hash_set, const ArrayColumn& column, size_t index,
bool has_null) {
const auto* elements_column = column.elements_column().get();
const auto& offsets = column.offsets().get_data();
const auto offsets = column.offsets().immutable_data();
uint32_t start = offsets[index];
uint32_t end = offsets[index + 1];
bool overlap = false;
@ -473,10 +474,10 @@ private:
DCHECK(elements_column->is_nullable());
const NullableColumn* nullable_elements_column = down_cast<const NullableColumn*>(elements_column);
const auto& datas = GetContainer<LT>::get_data(nullable_elements_column->data_column());
const auto datas = GetContainer<LT>::get_data(nullable_elements_column->data_column());
if (nullable_elements_column->has_null()) {
const auto& nulls = nullable_elements_column->null_column()->get_data();
const auto nulls = nullable_elements_column->immutable_null_column_data();
overlap = _check_overlap_nullable(hash_set, datas, nulls, start, end, has_null, index);
} else {
@ -496,8 +497,9 @@ private:
return false;
}
static bool _check_overlap_nullable(const HashSet& hash_set, const DataArray& data, const NullData& null_data,
uint32_t start, uint32_t end, bool has_null, size_t index) {
static bool _check_overlap_nullable(const HashSet& hash_set, const DataArray& data,
const ImmutableNullData& null_data, uint32_t start, uint32_t end, bool has_null,
size_t index) {
for (auto i = start; i < end; i++) {
if (null_data[i] == 1) {
if (has_null) {
@ -685,14 +687,14 @@ public:
const auto* src_nullable_column = down_cast<const NullableColumn*>(src_column.get());
const auto& src_data_column = src_nullable_column->data_column_ref();
const auto& src_null_column = src_nullable_column->null_column_ref();
auto imm_null_data = src_null_column.immutable_data();
auto* dest_nullable_column = down_cast<NullableColumn*>(dest_column.get());
auto* dest_data_column = dest_nullable_column->mutable_data_column();
auto* dest_null_column = dest_nullable_column->mutable_null_column();
if (src_column->has_null()) {
dest_null_column->get_data().assign(src_null_column.get_data().begin(),
src_null_column.get_data().end());
dest_null_column->get_data().assign(imm_null_data.begin(), imm_null_data.end());
} else {
dest_null_column->get_data().resize(chunk_size, 0);
}
@ -707,8 +709,8 @@ public:
protected:
static void _sort_column(std::vector<uint32_t>* sort_index, const Column& src_column, size_t offset, size_t count) {
const auto& data = down_cast<const ColumnType&>(src_column).get_data();
const Column* src = &src_column;
const auto data = GetContainer<LT>::get_data(src);
auto less_fn = [&data](uint32_t l, uint32_t r) -> bool { return data[l] < data[r]; };
pdqsort(sort_index->begin() + offset, sort_index->begin() + offset + count, less_fn);
}
@ -722,7 +724,7 @@ protected:
static void _sort_item(std::vector<uint32_t>* sort_index, const Column& src_column,
const UInt32Column& offset_column, size_t index) {
const auto& offsets = offset_column.get_data();
const auto offsets = offset_column.immutable_data();
size_t start = offsets[index];
size_t count = offsets[index + 1] - offsets[index];
@ -736,7 +738,8 @@ protected:
static void _sort_nullable_item(std::vector<uint32_t>* sort_index, const Column& src_data_column,
const NullColumn& src_null_column, const UInt32Column& offset_column,
size_t index) {
const auto& offsets = offset_column.get_data();
const auto offsets = offset_column.immutable_data();
size_t start = offsets[index];
size_t count = offsets[index + 1] - offsets[index];
@ -744,7 +747,7 @@ protected:
return;
}
auto null_first_fn = [&src_null_column](size_t i) -> bool { return src_null_column.get_data()[i] == 1; };
auto null_first_fn = [&src_null_column](size_t i) -> bool { return src_null_column.immutable_data()[i] == 1; };
auto begin_of_not_null =
std::partition(sort_index->begin() + start, sort_index->begin() + start + count, null_first_fn);
@ -761,7 +764,8 @@ protected:
auto* dest_elements_column = down_cast<ArrayColumn*>(dest_array_column)->elements_column().get();
auto* dest_offsets_column = down_cast<ArrayColumn*>(dest_array_column)->offsets_column().get();
dest_offsets_column->get_data() = offsets_column.get_data();
auto offsets = offsets_column.immutable_data();
dest_offsets_column->get_data().assign(offsets.begin(), offsets.end());
size_t chunk_size = src_array_column.size();
_init_sort_index(sort_index, src_elements_column.size());
@ -1104,10 +1108,11 @@ private:
ColumnPtr data_column = dest_column;
if (src_column->is_nullable()) {
const auto src_null_column = down_cast<const NullableColumn*>(src_column.get())->null_column();
const auto src_null_data = src_null_column->immutable_data();
auto dest_nullable_column = down_cast<NullableColumn*>(dest_column.get());
auto dest_null_column = dest_nullable_column->mutable_null_column();
dest_null_column->get_data().assign(src_null_column->get_data().begin(),
src_null_column->get_data().end());
dest_null_column->get_data().assign(src_null_data.begin(), src_null_data.end());
dest_nullable_column->set_has_null(src_column->has_null());
data_column = dest_nullable_column->data_column();
}
@ -1123,10 +1128,12 @@ private:
if (src_column->is_nullable()) {
const auto* src_nullable_column = down_cast<const NullableColumn*>(src_column.get());
const auto& src_null_column = src_nullable_column->null_column();
const auto src_null_data = src_null_column->immutable_data();
auto* dest_nullable_column = down_cast<NullableColumn*>(dest_column.get());
dest_null_column = dest_nullable_column->mutable_null_column();
dest_null_column->get_data().assign(src_null_column->get_data().begin(), src_null_column->get_data().end());
dest_null_column->get_data().assign(src_null_data.begin(), src_null_data.end());
dest_nullable_column->set_has_null(src_nullable_column->has_null());
}
@ -1190,21 +1197,23 @@ private:
filter = down_cast<const ArrayColumn*>(raw_filter.get());
}
const auto* filter_null_map_data =
filter_null_map == nullptr ? nullptr : filter_null_map->immutable_data().data();
std::vector<uint32_t> indexes;
size_t num_rows = ConstSrc ? src_rows : src_column->size();
for (size_t i = 0; i < num_rows; i++) {
if (filter_null_map == nullptr || !filter_null_map->get_data()[i]) {
bool filter_is_not_null =
(filter_null_map == nullptr ||
(ConstFilter ? !filter_null_map->get_data()[0] : !filter_null_map->get_data()[i]));
if (filter_null_map_data == nullptr || !filter_null_map_data[i]) {
bool filter_is_not_null = (filter_null_map == nullptr ||
(ConstFilter ? !filter_null_map_data[0] : !filter_null_map_data[i]));
if (filter_is_not_null) {
// if filter is not null, we should filter each elements in array
const auto& src_offsets = src_column->offsets().get_data();
const auto src_offsets = src_column->offsets().immutable_data();
size_t src_start = ConstSrc ? src_offsets[0] : src_offsets[i];
size_t src_end = ConstSrc ? src_offsets[1] : src_offsets[i + 1];
size_t src_elements_num = src_end - src_start;
const auto& filter_offsets = filter->offsets().get_data();
const auto filter_offsets = filter->offsets().immutable_data();
size_t filter_start = ConstFilter ? filter_offsets[0] : filter_offsets[i];
size_t filter_end = ConstFilter ? filter_offsets[1] : filter_offsets[i + 1];
size_t filter_elements_num = filter_end - filter_start;
@ -1261,14 +1270,14 @@ public:
const auto* src_nullable_column = down_cast<const NullableColumn*>(src_column.get());
const auto& src_data_column = src_nullable_column->data_column_ref();
const auto& src_null_column = src_nullable_column->null_column_ref();
const auto src_null_data = src_null_column.immutable_data();
auto* dest_nullable_column = down_cast<NullableColumn*>(dest_column.get());
auto* dest_data_column = dest_nullable_column->mutable_data_column();
auto* dest_null_column = dest_nullable_column->mutable_null_column();
if (src_column->has_null()) {
dest_null_column->get_data().assign(src_null_column.get_data().begin(),
src_null_column.get_data().end());
dest_null_column->get_data().assign(src_null_data.begin(), src_null_data.end());
} else {
dest_null_column->get_data().resize(chunk_size, 0);
}
@ -1295,13 +1304,15 @@ private:
const auto& key_element_column = down_cast<ArrayColumn*>(key_data.get())->elements();
const auto& key_offsets_column = down_cast<ArrayColumn*>(key_data.get())->offsets();
const auto key_offsets = key_offsets_column.immutable_data();
const auto& src_elements_column = down_cast<const ArrayColumn&>(src_array_column).elements();
const auto& src_offsets_column = down_cast<const ArrayColumn&>(src_array_column).offsets();
const auto src_offsets = src_offsets_column.immutable_data();
auto* dest_elements_column = down_cast<ArrayColumn*>(dest_array_column)->elements_column().get();
auto* dest_offsets_column = down_cast<ArrayColumn*>(dest_array_column)->offsets_column().get();
dest_offsets_column->get_data() = src_offsets_column.get_data();
dest_offsets_column->get_data().assign(src_offsets.begin(), src_offsets.end());
size_t chunk_size = src_array_column.size();
// key_element_column's size may be not equal with src_element_column, so should align their sort index for
@ -1310,25 +1321,28 @@ private:
src_sort_index.reserve(src_elements_column.size());
ArraySort<LT>::_init_sort_index(&key_sort_index, key_element_column.size());
// element column is nullable
const auto* src_null_map_data = src_null_map == nullptr ? nullptr : src_null_map->immutable_data().data();
const auto* key_null_map_data = key_null_map == nullptr ? nullptr : key_null_map->immutable_data().data();
if (key_element_column.has_null()) {
const auto& key_data_column = down_cast<const NullableColumn&>(key_element_column).data_column_ref();
const auto& null_column = down_cast<const NullableColumn&>(key_element_column).null_column_ref();
for (size_t i = 0; i < chunk_size; i++) {
if ((src_null_map == nullptr || !src_null_map->get_data()[i]) &&
(key_null_map == nullptr || !key_null_map->get_data()[i])) {
if (src_offsets_column.get_data()[i + 1] - src_offsets_column.get_data()[i] !=
key_offsets_column.get_data()[i + 1] - key_offsets_column.get_data()[i]) {
if ((src_null_map_data == nullptr || !src_null_map_data[i]) &&
(key_null_map_data == nullptr || !key_null_map_data[i])) {
if (src_offsets[i + 1] - src_offsets[i] != key_offsets[i + 1] - key_offsets[i]) {
throw std::runtime_error("Input arrays' size are not equal in array_sortby.");
}
ArraySort<LT>::_sort_nullable_item(&key_sort_index, key_data_column, null_column,
key_offsets_column, i);
auto delta = key_offsets_column.get_data()[i] - src_offsets_column.get_data()[i];
for (auto id = key_offsets_column.get_data()[i]; id < key_offsets_column.get_data()[i + 1]; ++id) {
auto delta = key_offsets[i] - src_offsets[i];
for (auto id = key_offsets[i]; id < key_offsets[i + 1]; ++id) {
src_sort_index.push_back(key_sort_index[id] - delta);
}
} else {
for (auto id = src_offsets_column.get_data()[i]; id < src_offsets_column.get_data()[i + 1]; ++id) {
for (auto id = src_offsets[i]; id < src_offsets[i + 1]; ++id) {
src_sort_index.push_back(id);
}
}
@ -1337,19 +1351,18 @@ private:
const auto& key_data_column = down_cast<const NullableColumn&>(key_element_column).data_column_ref();
for (size_t i = 0; i < chunk_size; i++) {
if ((src_null_map == nullptr || !src_null_map->get_data()[i]) &&
(key_null_map == nullptr || !key_null_map->get_data()[i])) {
if (src_offsets_column.get_data()[i + 1] - src_offsets_column.get_data()[i] !=
key_offsets_column.get_data()[i + 1] - key_offsets_column.get_data()[i]) {
if ((src_null_map_data == nullptr || !src_null_map_data[i]) &&
(key_null_map_data == nullptr || !key_null_map_data[i])) {
if (src_offsets[i + 1] - src_offsets[i] != key_offsets[i + 1] - key_offsets[i]) {
throw std::runtime_error("Input arrays' size are not equal in array_sortby.");
}
ArraySort<LT>::_sort_item(&key_sort_index, key_data_column, key_offsets_column, i);
auto delta = key_offsets_column.get_data()[i] - src_offsets_column.get_data()[i];
for (auto id = key_offsets_column.get_data()[i]; id < key_offsets_column.get_data()[i + 1]; ++id) {
auto delta = key_offsets[i] - src_offsets[i];
for (auto id = key_offsets[i]; id < key_offsets[i + 1]; ++id) {
src_sort_index.push_back(key_sort_index[id] - delta);
}
} else {
for (auto id = src_offsets_column.get_data()[i]; id < src_offsets_column.get_data()[i + 1]; ++id) {
for (auto id = src_offsets[i]; id < src_offsets[i + 1]; ++id) {
src_sort_index.push_back(id);
}
}
@ -1368,11 +1381,11 @@ public:
Column* result_col, NullColumn* null_cols) {
const RunTimeCppType<TYPE_NULL>* elements_nulls = nullptr;
if constexpr (HasNull) {
elements_nulls = elements_null_col->get_data().data();
elements_nulls = elements_null_col->immutable_data().data();
}
const auto& elements_data = GetContainer<ElementType>::get_data(elements);
auto* offsets_ptr = offsets->get_data().data();
const auto* offsets_ptr = offsets->immutable_data().data();
auto* null_ptr = null_cols->get_data().data();
const size_t rows = offsets->size() - 1;
@ -1456,11 +1469,11 @@ public:
Column* result_col, NullColumn* null_cols) {
const RunTimeCppType<TYPE_NULL>* elements_nulls = nullptr;
if constexpr (HasNull) {
elements_nulls = elements_null_col->get_data().data();
elements_nulls = elements_null_col->immutable_data().data();
}
auto* elements_data = down_cast<const RunTimeColumnType<ElementType>*>(elements)->get_data().data();
auto* offsets_ptr = offsets->get_data().data();
auto* elements_data = down_cast<const RunTimeColumnType<ElementType>*>(elements)->immutable_data().data();
const auto* offsets_ptr = offsets->immutable_data().data();
auto* null_ptr = null_cols->get_data().data();
const int64_t rows = offsets->size() - 1;
@ -1652,7 +1665,7 @@ public:
nulls = NullColumn::static_pointer_cast(nullable_column->null_column()->clone());
} else {
ColumnHelper::or_two_filters(num_rows, nulls->get_data().data(),
nullable_column->null_column()->get_data().data());
nullable_column->null_column()->immutable_data().data());
}
}
}
@ -1876,7 +1889,7 @@ private:
const CppType* elements_data = reinterpret_cast<const CppType*>(elements_column->raw_data());
const NullColumn::ValueType* null_data = null_column->raw_data();
const UInt32Column::ValueType* offsets_data = offsets_column->get_data().data();
const UInt32Column::ValueType* offsets_data = offsets_column->immutable_data().data();
// column may be null
size_t offset = offsets_data[0];
size_t array_size = offsets_data[1] - offset;
@ -1942,10 +1955,10 @@ private:
const auto& elements = down_cast<const NullableColumn*>(elements_column.get())->data_column();
const CppType* elements_data = reinterpret_cast<const CppType*>(elements->raw_data());
const NullColumn::ValueType* elements_null_data =
down_cast<const NullableColumn*>(elements_column.get())->null_column()->get_data().data();
down_cast<const NullableColumn*>(elements_column.get())->immutable_null_column_data().data();
const auto& offsets_column = down_cast<const ArrayColumn*>(arrays.get())->offsets_column();
const auto& offsets_data = offsets_column->get_data();
const auto offsets_data = offsets_column->immutable_data();
const CppType* targets_data = reinterpret_cast<const CppType*>(targets->raw_data());
@ -2053,7 +2066,7 @@ public:
const auto& [offsets_column, elements_column, null_column] = ColumnHelper::unpack_array_column(array_column);
const CppType* elements_data = reinterpret_cast<const CppType*>(elements_column->raw_data());
const NullColumn::ValueType* null_data = null_column->raw_data();
const UInt32Column::ValueType* offsets_data = offsets_column->get_data().data();
const UInt32Column::ValueType* offsets_data = offsets_column->immutable_data().data();
size_t offset = offsets_data[0];
size_t array_size = offsets_data[1] - offset;
@ -2073,7 +2086,7 @@ public:
const CppType* target_elements_data = reinterpret_cast<const CppType*>(target_elements_column->raw_data());
const NullColumn::ValueType* target_elements_null_data = target_null_column->raw_data();
const UInt32Column::ValueType* target_offsets_data = target_offsets_column->get_data().data();
const UInt32Column::ValueType* target_offsets_data = target_offsets_column->immutable_data().data();
size_t target_offset = target_offsets_data[0];
size_t target_array_size = target_offsets_data[1] - offset;
@ -2326,14 +2339,14 @@ private:
const auto& [left_offsets_column, left_elements_column, left_elements_null_column] =
ColumnHelper::unpack_array_column(left_arrays);
const CppType* left_elements_data = reinterpret_cast<const CppType*>(left_elements_column->raw_data());
const NullColumn::ValueType* left_elements_null_data = left_elements_null_column->get_data().data();
const auto* left_offsets_data = left_offsets_column->get_data().data();
const NullColumn::ValueType* left_elements_null_data = left_elements_null_column->immutable_data().data();
const auto* left_offsets_data = left_offsets_column->immutable_data().data();
const auto& [right_offsets_column, right_elements_column, right_elements_null_column] =
ColumnHelper::unpack_array_column(right_arrays);
const CppType* right_elements_data = reinterpret_cast<const CppType*>(right_elements_column->raw_data());
const NullColumn::ValueType* right_elements_null_data = right_elements_null_column->get_data().data();
const auto* right_offsets_data = right_offsets_column->get_data().data();
const NullColumn::ValueType* right_elements_null_data = right_elements_null_column->immutable_data().data();
const auto* right_offsets_data = right_offsets_column->immutable_data().data();
size_t num_rows = (is_const_left && is_const_right) ? 1 : std::max(left_arrays->size(), right_arrays->size());

View File

@ -125,7 +125,7 @@ StatusOr<ColumnPtr> ArrayMapExpr::evaluate_lambda_expr(ExprContext* context, Chu
}
UInt32Column::Ptr aligned_offsets = nullptr;
size_t null_rows = result_null_column ? SIMD::count_nonzero(result_null_column->get_data()) : 0;
size_t null_rows = result_null_column ? SIMD::count_nonzero(result_null_column->immutable_data()) : 0;
std::vector<SlotId> arguments_ids;
int argument_num = lambda_func->get_lambda_arguments_ids(&arguments_ids);
@ -158,8 +158,8 @@ StatusOr<ColumnPtr> ArrayMapExpr::evaluate_lambda_expr(ExprContext* context, Chu
}
} else {
if (result_null_column != nullptr) {
data_column->empty_null_in_complex_column(result_null_column->get_data(),
array_column->offsets().get_data());
data_column->empty_null_in_complex_column(result_null_column->immutable_data(),
array_column->offsets().immutable_data());
}
elements_column = down_cast<const ArrayColumn*>(data_column.get())->elements_column();
}

View File

@ -24,6 +24,7 @@
#include "exprs/function_helper.h"
#include "simd/simd.h"
#include "typeinfo"
#include "types/logical_type.h"
namespace starrocks {
@ -61,9 +62,15 @@ public:
for (int i = 0; i < s; ++i) {
data3[i] = OP::template apply<LCppType, RCppType, ResultCppType>(r1[i], r2[i]);
}
} else if constexpr (lt_is_object_family<LType> || lt_is_object_family<RType>) {
const auto data1 = ColumnHelper::cast_to_raw<LType>(v1)->immutable_data();
const auto data2 = ColumnHelper::cast_to_raw<RType>(v2)->immutable_data();
for (int i = 0; i < s; ++i) {
data3[i] = OP::template apply<LCppType, RCppType, ResultCppType>(data1[i], data2[i]);
}
} else {
auto* data1 = ColumnHelper::cast_to_raw<LType>(v1)->get_data().data();
auto* data2 = ColumnHelper::cast_to_raw<RType>(v2)->get_data().data();
auto* data1 = ColumnHelper::cast_to_raw<LType>(v1)->immutable_data().data();
auto* data2 = ColumnHelper::cast_to_raw<RType>(v2)->immutable_data().data();
for (int i = 0; i < s; ++i) {
data3[i] = OP::template apply<LCppType, RCppType, ResultCppType>(data1[i], data2[i]);
}
@ -90,9 +97,15 @@ public:
for (int i = 0; i < size; ++i) {
data3[i] = OP::template apply<LCppType, RCppType, ResultCppType>(data1, r2[i]);
}
} else if constexpr (lt_is_object_family<LType> || lt_is_object_family<RType>) {
const auto data1 = ColumnHelper::cast_to_raw<LType>(v1)->immutable_data()[0];
const auto data2 = ColumnHelper::cast_to_raw<RType>(v2)->immutable_data();
for (int i = 0; i < size; ++i) {
data3[i] = OP::template apply<LCppType, RCppType, ResultCppType>(data1, data2[i]);
}
} else {
auto data1 = ColumnHelper::cast_to_raw<LType>(v1)->get_data()[0];
auto* data2 = ColumnHelper::cast_to_raw<RType>(v2)->get_data().data();
const auto data1 = ColumnHelper::cast_to_raw<LType>(v1)->immutable_data()[0];
const auto* data2 = ColumnHelper::cast_to_raw<RType>(v2)->immutable_data().data();
for (int i = 0; i < size; ++i) {
data3[i] = OP::template apply<LCppType, RCppType, ResultCppType>(data1, data2[i]);
}
@ -120,9 +133,15 @@ public:
for (int i = 0; i < size; ++i) {
data3[i] = OP::template apply<LCppType, RCppType, ResultCppType>(r1[i], data2);
}
} else if constexpr (lt_is_object_family<LType> || lt_is_object_family<RType>) {
const auto data1 = ColumnHelper::cast_to_raw<LType>(v1)->immutable_data();
const auto data2 = ColumnHelper::cast_to_raw<RType>(v2)->immutable_data()[0];
for (int i = 0; i < size; ++i) {
data3[i] = OP::template apply<LCppType, RCppType, ResultCppType>(data1[i], data2);
}
} else {
auto* data1 = ColumnHelper::cast_to_raw<LType>(v1)->get_data().data();
auto data2 = ColumnHelper::cast_to_raw<RType>(v2)->get_data()[0];
const auto* data1 = ColumnHelper::cast_to_raw<LType>(v1)->immutable_data().data();
auto data2 = ColumnHelper::cast_to_raw<RType>(v2)->immutable_data()[0];
for (int i = 0; i < size; ++i) {
data3[i] = OP::template apply<LCppType, RCppType, ResultCppType>(data1[i], data2);
}
@ -147,8 +166,8 @@ public:
auto& r2 = ColumnHelper::cast_to_raw<RType>(v2)->get_proxy_data();
r3[0] = OP::template apply<LCppType, RCppType, ResultCppType>(r1[0], r2[0]);
} else {
auto& r1 = ColumnHelper::cast_to_raw<LType>(v1)->get_data();
auto& r2 = ColumnHelper::cast_to_raw<RType>(v2)->get_data();
auto& r1 = ColumnHelper::cast_to_raw<LType>(v1)->immutable_data();
auto& r2 = ColumnHelper::cast_to_raw<RType>(v2)->immutable_data();
r3[0] = OP::template apply<LCppType, RCppType, ResultCppType>(r1[0], r2[0]);
}
@ -271,7 +290,7 @@ public:
// DO NOT overwrite null flag if it is already set
null_flags->get_data()[i] |=
NULL_OP::template apply<RunTimeCppType<ResultType>, RunTimeCppType<TYPE_BOOLEAN>>(
real_data->get_data()[i]);
real_data->immutable_data()[i]);
}
if (data->is_nullable()) {
@ -397,11 +416,11 @@ public:
template <LogicalType LType, LogicalType RType, LogicalType ResultType>
static ColumnPtr vector_vector(const ColumnPtr& lv, const NullColumnPtr& ln, const ColumnPtr& rv,
const NullColumnPtr& rn) {
auto* lvd = ColumnHelper::cast_to_raw<LType>(lv)->get_data().data();
auto* rvd = ColumnHelper::cast_to_raw<RType>(rv)->get_data().data();
auto* lvd = ColumnHelper::cast_to_raw<LType>(lv)->immutable_data().data();
auto* rvd = ColumnHelper::cast_to_raw<RType>(rv)->immutable_data().data();
auto* lnd = ln->get_data().data();
auto* rnd = rn->get_data().data();
auto* lnd = ln->immutable_data().data();
auto* rnd = rn->immutable_data().data();
int size = std::min(lv->size(), rv->size());
@ -430,11 +449,11 @@ public:
template <LogicalType LType, LogicalType RType, LogicalType ResultType>
static ColumnPtr const_vector(const ColumnPtr& lv, const NullColumnPtr& ln, const ColumnPtr& rv,
const NullColumnPtr& rn) {
auto* lvd = ColumnHelper::cast_to_raw<LType>(lv)->get_data().data();
auto* rvd = ColumnHelper::cast_to_raw<RType>(rv)->get_data().data();
const auto* lvd = ColumnHelper::cast_to_raw<LType>(lv)->immutable_data().data();
const auto* rvd = ColumnHelper::cast_to_raw<RType>(rv)->immutable_data().data();
auto* lnd = ln->get_data().data();
auto* rnd = rn->get_data().data();
const auto* lnd = ln->immutable_data().data();
const auto* rnd = rn->immutable_data().data();
int size = rv->size();
@ -463,11 +482,11 @@ public:
template <LogicalType LType, LogicalType RType, LogicalType ResultType>
static ColumnPtr vector_const(const ColumnPtr& lv, const NullColumnPtr& ln, const ColumnPtr& rv,
const NullColumnPtr& rn) {
auto* lvd = ColumnHelper::cast_to_raw<LType>(lv)->get_data().data();
auto* rvd = ColumnHelper::cast_to_raw<RType>(rv)->get_data().data();
const auto* lvd = ColumnHelper::cast_to_raw<LType>(lv)->immutable_data().data();
const auto* rvd = ColumnHelper::cast_to_raw<RType>(rv)->immutable_data().data();
auto* lnd = ln->get_data().data();
auto* rnd = rn->get_data().data();
const auto* lnd = ln->immutable_data().data();
const auto* rnd = rn->immutable_data().data();
int size = lv->size();

View File

@ -425,15 +425,15 @@ StatusOr<ColumnPtr> BitmapFunctions::array_to_bitmap(FunctionContext* context, c
: nullptr;
const auto* array_column = down_cast<const ArrayColumn*>(data_column);
auto element_container =
array_column->elements_column()->is_nullable()
? down_cast<const RunTimeColumnType<TYPE>*>(
down_cast<const NullableColumn*>(array_column->elements_column().get())
->data_column()
.get())
->get_data()
: down_cast<const RunTimeColumnType<TYPE>*>(array_column->elements_column().get())->get_data();
const auto& offsets = array_column->offsets_column()->get_data();
auto element_container = array_column->elements_column()->is_nullable()
? down_cast<const RunTimeColumnType<TYPE>*>(
down_cast<const NullableColumn*>(array_column->elements_column().get())
->data_column()
.get())
->immutable_data()
: down_cast<const RunTimeColumnType<TYPE>*>(array_column->elements_column().get())
->immutable_data();
const auto offsets = array_column->offsets_column()->immutable_data();
auto element_null_data =
array_column->elements_column()->is_nullable()

View File

@ -142,7 +142,7 @@ struct DecimalNonDecimalCast<overflow_mode, DecimalType, NonDecimalType, Decimal
static inline ColumnPtr decimal_from(const ColumnPtr& column, int precision, int scale) {
const auto num_rows = column->size();
typename DecimalColumnType::MutablePtr result = DecimalColumnType::create(precision, scale, num_rows);
const auto data = &ColumnHelper::cast_to_raw<NonDecimalType>(column.get())->get_data().front();
const auto data = &ColumnHelper::cast_to_raw<NonDecimalType>(column.get())->immutable_data().front();
auto result_data = &ColumnHelper::cast_to_raw<DecimalType>(result.get())->get_data().front();
NullColumn::MutablePtr null_column;
NullColumn::ValueType* nulls = nullptr;

View File

@ -49,7 +49,7 @@ void FunctionHelper::union_produce_nullable_column(const ColumnPtr& v1, const Co
auto* result = (*produce_null_column)->get_data().data();
if (v1->has_null()) {
auto* null1 = down_cast<const NullableColumn*>(v1.get())->null_column()->get_data().data();
auto* null1 = down_cast<const NullableColumn*>(v1.get())->null_column()->immutable_data().data();
int size = v1->size();
for (int i = 0; i < size; ++i) {
@ -58,7 +58,7 @@ void FunctionHelper::union_produce_nullable_column(const ColumnPtr& v1, const Co
}
if (v2->has_null()) {
auto* null2 = down_cast<const NullableColumn*>(v2.get())->null_column()->get_data().data();
auto* null2 = down_cast<const NullableColumn*>(v2.get())->null_column()->immutable_data().data();
int size = v2->size();
for (int i = 0; i < size; ++i) {
@ -71,7 +71,7 @@ void FunctionHelper::union_produce_nullable_column(const ColumnPtr& v1, NullColu
auto* result = (*produce_null_column)->get_data().data();
if (v1->has_null()) {
auto* null1 = down_cast<const NullableColumn*>(v1.get())->null_column()->get_data().data();
const auto* null1 = down_cast<const NullableColumn*>(v1.get())->null_column()->immutable_data().data();
int size = v1->size();
for (int i = 0; i < size; ++i) {
@ -129,8 +129,8 @@ MFV_DEFAULT(void union_null_column_impl(uint8_t* dest, const uint8_t* v1, const
NullColumn::MutablePtr FunctionHelper::union_null_column(const NullColumnPtr& v1, const NullColumnPtr& v2) {
// union null column
auto null1_begin = (uint8_t*)v1->get_data().data();
auto null2_begin = (uint8_t*)v2->get_data().data();
auto null1_begin = (uint8_t*)v1->immutable_data().data();
auto null2_begin = (uint8_t*)v2->immutable_data().data();
const size_t row_num = v1->size();
NullColumn::MutablePtr null_result = NullColumn::create();

View File

@ -17,6 +17,7 @@
#include "column/column_helper.h"
#include "column/const_column.h"
#include "column/type_traits.h"
#include "column/vectorized_fwd.h"
#include "gutil/casts.h"
namespace starrocks {
@ -43,15 +44,7 @@ public:
* @param col, row_num, data
*/
template <typename ToColumnType, typename CppType>
static void get_data_of_column(const Column* col, size_t row_num, CppType& data) {
if (col->is_constant()) {
auto const_col = down_cast<const ConstColumn*>(col);
col = const_col->data_column().get();
row_num = 0;
}
const auto* column = down_cast<const ToColumnType*>(col);
data = column->get_data()[row_num];
}
static inline void get_data_of_column(const Column* col, size_t row_num, CppType& data);
/**
* if ptr is ConstColumn, return data column
@ -87,6 +80,28 @@ public:
static ColumnPtr merge_column_and_null_column(ColumnPtr&& column, NullColumnPtr&& null_column);
};
template <typename ToColumnType, typename CppType>
inline void FunctionHelper::get_data_of_column(const Column* col, size_t row_num, CppType& data) {
if (col->is_constant()) {
auto const_col = down_cast<const ConstColumn*>(col);
col = const_col->data_column().get();
row_num = 0;
}
const auto* column = down_cast<const ToColumnType*>(col);
data = column->immutable_data()[row_num];
}
template <>
inline void FunctionHelper::get_data_of_column<BinaryColumn, Slice>(const Column* col, size_t row_num, Slice& data) {
if (col->is_constant()) {
auto const_col = down_cast<const ConstColumn*>(col);
col = const_col->data_column().get();
row_num = 0;
}
const auto* column = down_cast<const BinaryColumn*>(col);
data = column->get_slice(row_num);
}
#define DEFINE_VECTORIZED_FN(NAME) static StatusOr<ColumnPtr> NAME(FunctionContext* context, const Columns& columns)
#define DEFINE_VECTORIZED_FN_TEMPLATE(NAME) \

View File

@ -74,7 +74,7 @@ StatusOr<ColumnPtr> MapApplyExpr::evaluate_checked(ExprContext* context, Chunk*
data_column = nullable->data_column();
// empty null map with non-empty elements
data_column->empty_null_in_complex_column(
nullable->null_column()->get_data(),
nullable->null_column()->immutable_data(),
down_cast<MapColumn*>(data_column.get())->offsets_column()->get_data());
if (input_null_map) {
input_null_map = FunctionHelper::union_null_column(nullable->null_column(),

Some files were not shown because too many files have changed in this diff Show More