// Copyright 2021-present StarRocks, Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once // arrow dependencies #include #include #include #include #include #include #include // project dependencies #include "column_converter.h" // starrocks dependencies #include "column/array_column.h" #include "column/column.h" #include "column/field.h" #include "column/map_column.h" #include "column/struct_column.h" #include "types/logical_type.h" namespace starrocks::lake::format { template ::Type>::value || arrow::is_struct_type::Type>::value>> class NestedConverter : public ColumnConverter { using ArrorType = typename arrow::TypeIdTraits::Type; using ArrowArrayType = typename arrow::TypeTraits::ArrayType; using SrColumnType = RunTimeColumnType; public: NestedConverter(const std::shared_ptr arrow_type, const std::shared_ptr sr_field, const arrow::MemoryPool* pool) : ColumnConverter(arrow_type, sr_field, pool) {} arrow::Status toSrColumn(const std::shared_ptr array, MutableColumnPtr& column) override { if (!column->is_nullable() && array->null_count() > 0) { return arrow::Status::Invalid("Column ", column->get_name(), " is non-nullable, but there are some null data in array."); } // copy data column const auto& nested_array = arrow::internal::checked_pointer_cast(array); ARROW_ASSIGN_OR_RAISE(arrow::ArrayVector arrow_children_arrays, get_children_arrays(nested_array)); auto data_column = SrColumnType::static_pointer_cast(get_data_column(column.get())); ARROW_ASSIGN_OR_RAISE(std::vector sr_sub_columns, get_children_columns(data_column.get())); if (arrow_children_arrays.size() != sr_sub_columns.size()) { return arrow::Status::Invalid("Can't convert nested array, the array children size(", arrow_children_arrays.size(), ") is not same as starrocks sub-column size (", sr_sub_columns.size(), ")"); } if (_children.size() < arrow_children_arrays.size()) { return arrow::Status::Invalid("Converter size (", _children.size(), ") is less than arrow array size(", arrow_children_arrays.size(), ")"); } // copy data column for (size_t idx = 0; idx < arrow_children_arrays.size(); ++idx) { auto mutable_sr_column = sr_sub_columns[idx]->as_mutable_ptr(); ARROW_RETURN_NOT_OK(_children[idx]->toSrColumn(arrow_children_arrays[idx], mutable_sr_column)); } // for print sr sub column; ARROW_ASSIGN_OR_RAISE(std::vector sr_sub_columns2, get_children_columns(data_column.get())); // copy null bitmap if (column->is_nullable()) { size_t num_rows = array->length(); auto nullable = down_cast(column.get()); auto null_column = down_cast(nullable->null_column().get()); null_column->resize(num_rows); for (size_t i = 0; i < num_rows; ++i) { nullable->null_column_data()[i] = array->IsNull(i); } nullable->set_has_null(true); } return arrow::Status::OK(); } arrow::Result> toArrowArray(const ColumnPtr& column) override { // convert data column,include list:offsets, values, map: offsets, keys, values, struct: children columns. const auto data_column = SrColumnType::static_pointer_cast(get_data_column(column.get())); ARROW_ASSIGN_OR_RAISE(std::vector sr_sub_columns, get_children_columns(data_column.get())); std::vector> arrays; arrays.resize(sr_sub_columns.size()); if (_children.size() < arrays.size()) { return arrow::Status::Invalid("Converter size (", _children.size(), ") is less than arrow array size(", arrays.size(), ")"); } // convert children data column for (size_t idx = 0; idx < arrays.size(); ++idx) { ARROW_ASSIGN_OR_RAISE(arrays[idx], _children[idx]->toArrowArray(sr_sub_columns[idx])); } // convert null bitmap std::shared_ptr null_bitmap; if (column->is_nullable()) { auto nullable = down_cast(column.get()); auto& null_bytes = nullable->immutable_null_column_data(); ARROW_ASSIGN_OR_RAISE(null_bitmap, convert_null_bitmap(null_bytes)); } return make_nested_array(arrays, null_bitmap); } private: template || std::is_same_v || std::is_same_v>> arrow::Result get_children_arrays(const std::shared_ptr array) { if constexpr (std::is_same_v) { arrow::ArrayVector all_arrays = {array->offsets(), array->values()}; return all_arrays; } else if constexpr (std::is_same_v) { arrow::ArrayVector all_arrays = {array->offsets(), array->keys(), array->items()}; return all_arrays; } else if constexpr (std::is_same_v) { return array->fields(); } else { static_assert(true, "Unsupported type"); } } template || std::is_same_v || std::is_same_v>> arrow::Result get_children_columns(const SrColumnClass* data_column) { if constexpr (std::is_same_v) { Columns all_sub_columns = {data_column->offsets_column(), data_column->elements_column()}; return all_sub_columns; } else if constexpr (std::is_same_v) { Columns all_sub_columns = {data_column->offsets_column(), data_column->keys_column(), data_column->values_column()}; return all_sub_columns; } else if constexpr (std::is_same_v) { return data_column->fields(); } else { static_assert(true, "Unsupported type"); } } template || std::is_same_v || std::is_same_v>> arrow::Result> make_nested_array(const arrow::ArrayVector& arrays, const std::shared_ptr null_bitmap) { if constexpr (std::is_same_v) { const auto& offsets = *arrays[0]; const auto& values = *arrays[1]; return arrow::ListArray::FromArrays(_arrow_type, offsets, values, const_cast(_pool), null_bitmap); } else if constexpr (std::is_same_v) { // array[0] is offset, array[1] is key, array[2] is value using OffsetArrayType = arrow::TypeTraits::OffsetArrayType; const auto& typed_offsets = arrow::internal::checked_pointer_cast(arrays[0]); return std::make_shared(_arrow_type, arrays[0]->length() - 1, typed_offsets->values(), arrays[1], arrays[2], null_bitmap); } else if constexpr (std::is_same_v) { return arrow::StructArray::Make(arrays, _arrow_type->fields(), null_bitmap); } else { static_assert(true, "Unsupported type"); } } }; } // namespace starrocks::lake::format