diff --git a/format-sdk/build.sh b/format-sdk/build.sh old mode 100644 new mode 100755 diff --git a/format-sdk/src/main/cpp/CMakeLists.txt b/format-sdk/src/main/cpp/CMakeLists.txt index 6bb14c00132..6acdf2ae0d0 100644 --- a/format-sdk/src/main/cpp/CMakeLists.txt +++ b/format-sdk/src/main/cpp/CMakeLists.txt @@ -59,7 +59,7 @@ include_directories( set(JAVA_HOME ${THIRDPARTY_DIR}/open_jdk/) add_library(jvm SHARED IMPORTED) -FILE(GLOB_RECURSE LIB_JVM ${JAVA_HOME}/jre/lib/*/libjvm.so) +FILE(GLOB_RECURSE LIB_JVM ${JAVA_HOME}/lib/*/libjvm.so) set_target_properties(jvm PROPERTIES IMPORTED_LOCATION ${LIB_JVM}) include_directories(${JAVA_HOME}/include) include_directories(${JAVA_HOME}/include/linux) diff --git a/format-sdk/src/main/cpp/convert/binary_converter.h b/format-sdk/src/main/cpp/convert/binary_converter.h index f2ab7d4abda..6bfe2dba8bc 100644 --- a/format-sdk/src/main/cpp/convert/binary_converter.h +++ b/format-sdk/src/main/cpp/convert/binary_converter.h @@ -49,7 +49,7 @@ public: const arrow::MemoryPool* pool) : ColumnConverter(arrow_type, sr_field, pool){}; - arrow::Status toSrColumn(const std::shared_ptr array, ColumnPtr& column) override { + arrow::Status toSrColumn(const std::shared_ptr array, MutableColumnPtr& column) override { if (!column->is_nullable() && array->null_count() > 0) { return arrow::Status::Invalid("Column ", column->get_name(), " is non-nullable, but there are some null data in array."); @@ -101,7 +101,7 @@ public: return arrow::Status::OK(); } - arrow::Result> toArrowArray(const std::shared_ptr& column) override { + arrow::Result> toArrowArray(const ColumnPtr& column) override { using ArrowBuilderType = typename arrow::TypeTraits::BuilderType; std::unique_ptr builder = diff --git a/format-sdk/src/main/cpp/convert/column_converter.cpp b/format-sdk/src/main/cpp/convert/column_converter.cpp index 81bc2ca22c9..a6bec53423a 100644 --- a/format-sdk/src/main/cpp/convert/column_converter.cpp +++ b/format-sdk/src/main/cpp/convert/column_converter.cpp @@ -199,18 +199,18 @@ arrow::Result> ColumnConverter::convert_null_bitm return null_bitmap; } -ColumnPtr ColumnConverter::get_data_column(const ColumnPtr& column) { - if (column->is_nullable()) { - auto* nullable_column = down_cast(column.get()); +ColumnPtr ColumnConverter::get_data_column(const Column* column_ptr) { + if (column_ptr->is_nullable()) { + auto* nullable_column = down_cast(column_ptr); return nullable_column->data_column(); } - if (column->is_constant()) { - auto* const_column = down_cast(column.get()); + if (column_ptr->is_constant()) { + auto* const_column = down_cast(column_ptr); return const_column->data_column(); } - return column; + return column_ptr->get_ptr(); } } // namespace starrocks::lake::format \ No newline at end of file diff --git a/format-sdk/src/main/cpp/convert/column_converter.h b/format-sdk/src/main/cpp/convert/column_converter.h index 62ba6d599a3..4c16eb1cfab 100644 --- a/format-sdk/src/main/cpp/convert/column_converter.h +++ b/format-sdk/src/main/cpp/convert/column_converter.h @@ -56,17 +56,17 @@ public: /** * Convert arrow array to starrocks column. */ - virtual arrow::Status toSrColumn(std::shared_ptr array, ColumnPtr& column) = 0; + virtual arrow::Status toSrColumn(std::shared_ptr array, MutableColumnPtr& column) = 0; /** * Convert starrocks column to arrow array. */ - virtual arrow::Result> toArrowArray(const std::shared_ptr& column) = 0; + virtual arrow::Result> toArrowArray(const ColumnPtr& column) = 0; protected: arrow::Result> convert_null_bitmap(const Buffer& null_bytes); - static ColumnPtr get_data_column(const ColumnPtr& column); + static ColumnPtr get_data_column(const Column* column_ptr); protected: const std::shared_ptr _arrow_type; diff --git a/format-sdk/src/main/cpp/convert/nested_converter.h b/format-sdk/src/main/cpp/convert/nested_converter.h index fad43ac489c..d6934cd2730 100644 --- a/format-sdk/src/main/cpp/convert/nested_converter.h +++ b/format-sdk/src/main/cpp/convert/nested_converter.h @@ -51,7 +51,7 @@ public: const arrow::MemoryPool* pool) : ColumnConverter(arrow_type, sr_field, pool) {} - arrow::Status toSrColumn(const std::shared_ptr array, ColumnPtr& column) override { + arrow::Status toSrColumn(const std::shared_ptr array, MutableColumnPtr& column) override { if (!column->is_nullable() && array->null_count() > 0) { return arrow::Status::Invalid("Column ", column->get_name(), " is non-nullable, but there are some null data in array."); @@ -61,8 +61,9 @@ public: const auto& nested_array = arrow::internal::checked_pointer_cast(array); ARROW_ASSIGN_OR_RAISE(arrow::ArrayVector arrow_children_arrays, get_children_arrays(nested_array)); - const auto data_column = arrow::internal::checked_pointer_cast(get_data_column(column)); - ARROW_ASSIGN_OR_RAISE(std::vector sr_sub_columns, get_children_columns(data_column)); + auto data_column = SrColumnType::static_pointer_cast(get_data_column(column.get())); + ARROW_ASSIGN_OR_RAISE(std::vector sr_sub_columns, + get_children_columns(data_column.get())); if (arrow_children_arrays.size() != sr_sub_columns.size()) { return arrow::Status::Invalid("Can't convert nested array, the array children size(", @@ -76,10 +77,12 @@ public: // copy data column for (size_t idx = 0; idx < arrow_children_arrays.size(); ++idx) { - ARROW_RETURN_NOT_OK(_children[idx]->toSrColumn(arrow_children_arrays[idx], sr_sub_columns[idx])); + auto mutable_sr_column = sr_sub_columns[idx]->as_mutable_ptr(); + ARROW_RETURN_NOT_OK(_children[idx]->toSrColumn(arrow_children_arrays[idx], mutable_sr_column)); } // for print sr sub column; - ARROW_ASSIGN_OR_RAISE(std::vector sr_sub_columns2, get_children_columns(data_column)); + ARROW_ASSIGN_OR_RAISE(std::vector sr_sub_columns2, + get_children_columns(data_column.get())); // copy null bitmap if (column->is_nullable()) { @@ -95,10 +98,11 @@ public: return arrow::Status::OK(); } - arrow::Result> toArrowArray(const std::shared_ptr& column) override { + arrow::Result> toArrowArray(const ColumnPtr& column) override { // convert data column,include list:offsets, values, map: offsets, keys, values, struct: children columns. - const auto data_column = arrow::internal::checked_pointer_cast(get_data_column(column)); - ARROW_ASSIGN_OR_RAISE(std::vector sr_sub_columns, get_children_columns(data_column)); + const auto data_column = SrColumnType::static_pointer_cast(get_data_column(column.get())); + ARROW_ASSIGN_OR_RAISE(std::vector sr_sub_columns, + get_children_columns(data_column.get())); std::vector> arrays; arrays.resize(sr_sub_columns.size()); @@ -115,7 +119,7 @@ public: // convert null bitmap std::shared_ptr null_bitmap; if (column->is_nullable()) { - auto nullable = down_cast(column.get()); + auto nullable = down_cast(column.get()); auto& null_bytes = nullable->immutable_null_column_data(); ARROW_ASSIGN_OR_RAISE(null_bitmap, convert_null_bitmap(null_bytes)); } @@ -144,7 +148,8 @@ private: template || std::is_same_v || std::is_same_v>> - arrow::Result get_children_columns(const std::shared_ptr data_column) { + + arrow::Result get_children_columns(const SrColumnClass* data_column) { if constexpr (std::is_same_v) { Columns all_sub_columns = {data_column->offsets_column(), data_column->elements_column()}; return all_sub_columns; diff --git a/format-sdk/src/main/cpp/convert/primitive_converter.h b/format-sdk/src/main/cpp/convert/primitive_converter.h index 2a9183d8679..98e04f7ef6c 100644 --- a/format-sdk/src/main/cpp/convert/primitive_converter.h +++ b/format-sdk/src/main/cpp/convert/primitive_converter.h @@ -68,7 +68,7 @@ public: } }; - arrow::Status toSrColumn(const std::shared_ptr array, ColumnPtr& column) override { + arrow::Status toSrColumn(const std::shared_ptr array, MutableColumnPtr& column) override { if (!column->is_nullable() && array->null_count() > 0) { return arrow::Status::Invalid("Column ", column->get_name(), " is non-nullable, but there are some null data in array."); @@ -79,7 +79,7 @@ public: // copy data column const auto& real_arrow_type = arrow::internal::checked_pointer_cast(_arrow_type); const auto& real_array = arrow::internal::checked_pointer_cast(array); - const auto data_column = arrow::internal::checked_pointer_cast(get_data_column(column)); + auto data_column = SrColumnType::dynamic_pointer_cast(get_data_column(column.get())); if constexpr (SR_TYPE == TYPE_DATE || SR_TYPE == TYPE_DATETIME) { for (size_t i = 0; i < num_rows; ++i) { SrCppType value; @@ -154,7 +154,7 @@ public: return arrow::Status::OK(); } - arrow::Result> toArrowArray(const std::shared_ptr& column) override { + arrow::Result> toArrowArray(const ColumnPtr& column) override { using ArrowBuilderType = typename arrow::TypeTraits::BuilderType; const auto& real_arrow_type = arrow::internal::checked_pointer_cast(_arrow_type); diff --git a/format-sdk/src/main/cpp/convert/starrocks_arrow_converter.cpp b/format-sdk/src/main/cpp/convert/starrocks_arrow_converter.cpp index 5d458222f9f..86d19cbc6b4 100644 --- a/format-sdk/src/main/cpp/convert/starrocks_arrow_converter.cpp +++ b/format-sdk/src/main/cpp/convert/starrocks_arrow_converter.cpp @@ -69,7 +69,8 @@ public: auto chunk = ChunkHelper::new_chunk(*_sr_schema, recordBatch->num_rows()); for (size_t idx = 0; idx < column_size; ++idx) { - ARROW_RETURN_NOT_OK(_converters[idx]->toSrColumn(recordBatch->column(idx), chunk->columns()[idx])); + auto mutable_column = chunk->columns()[idx]->as_mutable_ptr(); + ARROW_RETURN_NOT_OK(_converters[idx]->toSrColumn(recordBatch->column(idx), mutable_column)); } return chunk; diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh index 8a20ee1beea..76d48174219 100755 --- a/thirdparty/build-thirdparty.sh +++ b/thirdparty/build-thirdparty.sh @@ -1382,6 +1382,8 @@ build_icu() { # Use a subshell to prevent LD_LIBRARY_PATH from affecting the external environment ( export LD_LIBRARY_PATH=${STARROCKS_GCC_HOME}/lib:${STARROCKS_GCC_HOME}/lib64:${LD_LIBRARY_PATH:-} + export CFLAGS="-O3 -fno-omit-frame-pointer -fPIC" + export CXXFLAGS="-O3 -fno-omit-frame-pointer -fPIC" ./runConfigureICU Linux --prefix=$TP_INSTALL_DIR --enable-static --disable-shared make -j$PARALLEL make install