// This file is licensed under the Elastic License 2.0. Copyright 2021 StarRocks Limited. #pragma once #include "column/fixed_length_column.h" #include "common/logging.h" namespace starrocks::vectorized { using NullData = FixedLengthColumn::Container; using NullColumn = FixedLengthColumn; using NullColumnPtr = FixedLengthColumn::Ptr; using NullColumns = std::vector; using NullValueType = NullColumn::ValueType; static constexpr NullValueType DATUM_NULL = NullValueType(1); static constexpr NullValueType DATUM_NOT_NULL = NullValueType(0); class NullableColumn final : public ColumnFactory { friend class ColumnFactory; public: NullableColumn(MutableColumnPtr&& data_column, MutableColumnPtr&& null_column); NullableColumn(ColumnPtr data_column, NullColumnPtr null_column); // Copy constructor NullableColumn(const NullableColumn& rhs) : _data_column(rhs._data_column->clone_shared()), _null_column(std::static_pointer_cast(rhs._null_column->clone_shared())), _has_null(rhs._has_null) {} // Move constructor NullableColumn(NullableColumn&& rhs) : _data_column(std::move(rhs._data_column)), _null_column(std::move(rhs._null_column)), _has_null(rhs._has_null) {} // Copy assignment NullableColumn& operator=(const NullableColumn& rhs) { NullableColumn tmp(rhs); this->swap_column(tmp); return *this; } // Move assignment NullableColumn& operator=(NullableColumn&& rhs) noexcept { NullableColumn tmp(std::move(rhs)); this->swap_column(tmp); return *this; } ~NullableColumn() override = default; bool has_null() const override { return _has_null; } void set_has_null(bool has_null) { _has_null = _has_null | has_null; } void update_has_null() { const NullColumn::Container& v = _null_column->get_data(); const auto* p = v.data(); _has_null = (p != nullptr) && (nullptr != memchr(p, 1, v.size() * sizeof(v[0]))); } bool is_nullable() const override { return true; } bool is_null(size_t index) const override { DCHECK_EQ(_null_column->size(), _data_column->size()); return _has_null && immutable_null_column_data()[index]; } bool low_cardinality() const override { return false; } const uint8_t* raw_data() const override { return _data_column->raw_data(); } uint8_t* mutable_raw_data() override { return reinterpret_cast(_data_column->mutable_raw_data()); } size_t size() const override { DCHECK_EQ(_data_column->size(), _null_column->size()); return _data_column->size(); } size_t type_size() const override { return _data_column->type_size() + _null_column->type_size(); } size_t byte_size() const override { return byte_size(0, size()); } size_t byte_size(size_t from, size_t size) const override { DCHECK_LE(from + size, this->size()) << "Range error"; return _data_column->byte_size(from, size) + _null_column->Column::byte_size(from, size); } size_t byte_size(size_t idx) const override { return _data_column->byte_size(idx) + sizeof(bool); } void reserve(size_t n) override { _data_column->reserve(n); _null_column->reserve(n); } void resize(size_t n) override { _data_column->resize(n); _null_column->resize(n); } void assign(size_t n, size_t idx) override { _data_column->assign(n, idx); _null_column->assign(n, idx); } void remove_first_n_values(size_t count) override { _data_column->remove_first_n_values(count); _null_column->remove_first_n_values(count); } void append_datum(const Datum& datum) override; void append(const Column& src, size_t offset, size_t count) override; void append_selective(const Column& src, const uint32_t* indexes, uint32_t from, uint32_t size) override; void append_value_multiple_times(const Column& src, uint32_t index, uint32_t size) override; bool append_nulls(size_t count) override; bool append_strings(const std::vector& strs) override; bool append_strings_overflow(const std::vector& strs, size_t max_length) override; bool append_continuous_strings(const std::vector& strs) override; size_t append_numbers(const void* buff, size_t length) override; void append_value_multiple_times(const void* value, size_t count) override; void append_default() override { append_nulls(1); } void append_default_not_null_value() { _data_column->append_default(); _null_column->append(0); } void append_default(size_t count) override { append_nulls(count); } uint32_t max_one_element_serialize_size() const override { return sizeof(bool) + _data_column->max_one_element_serialize_size(); } uint32_t serialize(size_t idx, uint8_t* pos) override; uint32_t serialize_default(uint8_t* pos) override; void serialize_batch(uint8_t* dst, Buffer& slice_sizes, size_t chunk_size, uint32_t max_one_row_size) override; const uint8_t* deserialize_and_append(const uint8_t* pos) override; void deserialize_and_append_batch(std::vector& srcs, size_t batch_size) override; uint32_t serialize_size(size_t idx) const override { if (_null_column->get_data()[idx]) { return sizeof(uint8_t); } return sizeof(uint8_t) + _data_column->serialize_size(idx); } size_t serialize_size() const override { return _data_column->serialize_size() + _null_column->serialize_size(); } uint8_t* serialize_column(uint8_t* dst) override; const uint8_t* deserialize_column(const uint8_t* src) override; MutableColumnPtr clone_empty() const override { return create_mutable(_data_column->clone_empty(), _null_column->clone_empty()); } size_t serialize_batch_at_interval(uint8_t* dst, size_t byte_offset, size_t byte_interval, size_t start, size_t count) override; size_t filter_range(const Column::Filter& filter, size_t from, size_t to) override; int compare_at(size_t left, size_t right, const Column& rhs, int nan_direction_hint) const override; void fvn_hash(uint32_t* hash, uint16_t from, uint16_t to) const override; void crc32_hash(uint32_t* hash, uint16_t from, uint16_t to) const override; void put_mysql_row_buffer(MysqlRowBuffer* buf, size_t idx) const override; std::string get_name() const override { return "nullable-" + _data_column->get_name(); } NullData& null_column_data() { return _null_column->get_data(); } const NullData& immutable_null_column_data() const { return _null_column->get_data(); } Column* mutable_data_column() { return _data_column.get(); } NullColumn* mutable_null_column() { return _null_column.get(); } const Column& data_column_ref() const { return *_data_column; } const ColumnPtr& data_column() const { return _data_column; } ColumnPtr& data_column() { return _data_column; } const NullColumnPtr& null_column() const { return _null_column; } Datum get(size_t n) const override { if (_has_null && _null_column->get_data()[n]) { return Datum(); } else { return _data_column->get(n); } } bool set_null(size_t idx) override { null_column_data()[idx] = 1; _has_null = true; return true; } size_t memory_usage() const override { return _data_column->memory_usage() + _null_column->memory_usage() + sizeof(bool); } size_t shrink_memory_usage() const override { return _data_column->shrink_memory_usage() + _null_column->shrink_memory_usage() + sizeof(bool); } size_t container_memory_usage() const override { return _data_column->container_memory_usage() + _null_column->container_memory_usage(); } size_t element_memory_usage(size_t from, size_t size) const override { DCHECK_LE(from + size, this->size()) << "Range error"; return _data_column->element_memory_usage(from, size) + _null_column->element_memory_usage(from, size); } void swap_column(Column& rhs) override { auto& r = down_cast(rhs); _data_column->swap_column(*r._data_column); _null_column->swap_column(*r._null_column); std::swap(_delete_state, r._delete_state); std::swap(_has_null, r._has_null); } void reset_column() override { Column::reset_column(); _data_column->reset_column(); _null_column->reset_column(); _has_null = false; } std::string debug_item(uint32_t idx) const override { DCHECK(_null_column->size() == _data_column->size()); std::stringstream ss; if (_null_column->get_data()[idx]) { ss << "NULL"; } else { ss << _data_column->debug_item(idx); } return ss.str(); } std::string debug_string() const override { DCHECK(_null_column->size() == _data_column->size()); std::stringstream ss; ss << "["; int size = _data_column->size(); for (int i = 0; i < size - 1; ++i) { ss << debug_item(i) << ", "; } if (size > 0) { ss << debug_item(size - 1); } ss << "]"; return ss.str(); } private: ColumnPtr _data_column; NullColumnPtr _null_column; bool _has_null; }; } // namespace starrocks::vectorized