[Refactor] Refactor scalar function registration to speed up compilation (#61358)

Signed-off-by: stdpain <drfeng08@gmail.com>
This commit is contained in:
stdpain 2025-07-29 09:45:15 +08:00 committed by GitHub
parent d46937ed5c
commit 70a7f618d5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
39 changed files with 499 additions and 335 deletions

View File

@ -854,7 +854,7 @@ set(STARROCKS_LINK_LIBS
Column
Connector
Exec
Exprs
-Wl,--whole-archive Exprs -Wl,--no-whole-archive
FileSystem
Formats
Gutil

View File

@ -105,6 +105,8 @@ set(EXPR_FILES
dictionary_get_expr.cpp
ngram.cpp
match_expr.cpp
bit_functions.cpp
hash_functions.cpp
gin_functions.cpp
)

View File

@ -1763,3 +1763,5 @@ StatusOr<ColumnPtr> ArrayFunctions::array_flatten(FunctionContext* ctx, const Co
return result;
}
} // namespace starrocks
#include "gen_cpp/opcode/ArrayFunctions.inc"

View File

@ -17,13 +17,10 @@
#include "column/binary_column.h"
#include "column/column_builder.h"
#include "column/column_helper.h"
#include "column/column_viewer.h"
#include "column/nullable_column.h"
#include "exprs/base64.h"
#include "exprs/encryption_functions.h"
#include "exprs/function_helper.h"
#include "exprs/string_functions.h"
#include "gutil/strings/escaping.h"
namespace starrocks {
@ -147,3 +144,5 @@ StatusOr<ColumnPtr> BinaryFunctions::iceberg_truncate_binary(FunctionContext* co
}
} // namespace starrocks
#include "gen_cpp/opcode/BinaryFunctions.inc"

View File

@ -14,8 +14,6 @@
#pragma once
#include "column/column_builder.h"
#include "column/column_viewer.h"
#include "exprs/function_context.h"
#include "exprs/function_helper.h"

View File

@ -0,0 +1,102 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "exprs/bit_functions.h"
#include "exprs/binary_function.h"
#include "exprs/unary_function.h"
namespace starrocks {
#define VECTORIZED_BIT_BINARY_IMPL(NAME, OP) \
DEFINE_BINARY_FUNCTION_WITH_IMPL(NAME##Impl, l, r) { return l OP r; }
VECTORIZED_BIT_BINARY_IMPL(bitAnd, &);
VECTORIZED_BIT_BINARY_IMPL(bitOr, |);
VECTORIZED_BIT_BINARY_IMPL(bitXor, ^);
VECTORIZED_BIT_BINARY_IMPL(bitShiftLeft, <<);
VECTORIZED_BIT_BINARY_IMPL(bitShiftRight, >>);
#undef VECTORIZED_BIT_BINARY_IMPL
DEFINE_BINARY_FUNCTION_WITH_IMPL(bitShiftRightLogicalImpl, v, shift) {
if constexpr (std::is_same_v<LType, int8_t>) {
return uint8_t(v) >> shift;
} else if constexpr (std::is_same_v<LType, int16_t>) {
return uint16_t(v) >> shift;
} else if constexpr (std::is_same_v<LType, int32_t>) {
return uint32_t(v) >> shift;
} else if constexpr (std::is_same_v<LType, int64_t>) {
return uint64_t(v) >> shift;
} else if constexpr (std::is_same_v<LType, __int128_t>) {
return uint128_t(v) >> shift;
} else {
return v >> shift;
}
}
DEFINE_UNARY_FN_WITH_IMPL(bitNotImpl, v) {
return ~v;
}
template <LogicalType Type>
StatusOr<ColumnPtr> BitFunctions::bitAnd(FunctionContext* context, const Columns& columns) {
auto& l = VECTORIZED_FN_ARGS(0);
auto& r = VECTORIZED_FN_ARGS(1);
return VectorizedStrictBinaryFunction<bitAndImpl>::evaluate<Type>(l, r);
}
template <LogicalType Type>
StatusOr<ColumnPtr> BitFunctions::bitOr(FunctionContext* context, const Columns& columns) {
auto& l = VECTORIZED_FN_ARGS(0);
auto& r = VECTORIZED_FN_ARGS(1);
return VectorizedStrictBinaryFunction<bitOrImpl>::evaluate<Type>(l, r);
}
template <LogicalType Type>
StatusOr<ColumnPtr> BitFunctions::bitXor(FunctionContext* context, const Columns& columns) {
auto& l = VECTORIZED_FN_ARGS(0);
auto& r = VECTORIZED_FN_ARGS(1);
return VectorizedStrictBinaryFunction<bitXorImpl>::evaluate<Type>(l, r);
}
template <LogicalType Type>
StatusOr<ColumnPtr> BitFunctions::bitShiftLeft(FunctionContext* context, const Columns& columns) {
auto& l = VECTORIZED_FN_ARGS(0);
auto& r = VECTORIZED_FN_ARGS(1);
return VectorizedStrictBinaryFunction<bitShiftLeftImpl>::evaluate<Type>(l, r);
}
template <LogicalType Type>
StatusOr<ColumnPtr> BitFunctions::bitShiftRight(FunctionContext* context, const Columns& columns) {
auto& l = VECTORIZED_FN_ARGS(0);
auto& r = VECTORIZED_FN_ARGS(1);
return VectorizedStrictBinaryFunction<bitShiftRightImpl>::evaluate<Type>(l, r);
}
template <LogicalType Type>
StatusOr<ColumnPtr> BitFunctions::bitShiftRightLogical(FunctionContext* context, const Columns& columns) {
auto& l = VECTORIZED_FN_ARGS(0);
auto& r = VECTORIZED_FN_ARGS(1);
return VectorizedStrictBinaryFunction<bitShiftRightLogicalImpl>::evaluate<Type>(l, r);
}
template <LogicalType Type>
StatusOr<ColumnPtr> BitFunctions::bitNot(FunctionContext* context, const Columns& columns) {
auto& v = VECTORIZED_FN_ARGS(0);
return VectorizedStrictUnaryFunction<bitNotImpl>::evaluate<Type>(v);
}
} // namespace starrocks
#include "gen_cpp/opcode/BitFunctions.inc"

View File

@ -14,44 +14,11 @@
#pragma once
#include "column/column.h"
#include "exprs/binary_function.h"
#include "exprs/unary_function.h"
#include "exprs/function_helper.h"
#include "types/logical_type.h"
namespace starrocks {
#define VECTORIZED_BIT_BINARY_IMPL(NAME, OP) \
DEFINE_BINARY_FUNCTION_WITH_IMPL(NAME##Impl, l, r) { return l OP r; }
VECTORIZED_BIT_BINARY_IMPL(bitAnd, &);
VECTORIZED_BIT_BINARY_IMPL(bitOr, |);
VECTORIZED_BIT_BINARY_IMPL(bitXor, ^);
VECTORIZED_BIT_BINARY_IMPL(bitShiftLeft, <<);
VECTORIZED_BIT_BINARY_IMPL(bitShiftRight, >>);
#undef VECTORIZED_BIT_BINARY_IMPL
DEFINE_BINARY_FUNCTION_WITH_IMPL(bitShiftRightLogicalImpl, v, shift) {
if constexpr (std::is_same_v<LType, int8_t>) {
return uint8_t(v) >> shift;
} else if constexpr (std::is_same_v<LType, int16_t>) {
return uint16_t(v) >> shift;
} else if constexpr (std::is_same_v<LType, int32_t>) {
return uint32_t(v) >> shift;
} else if constexpr (std::is_same_v<LType, int64_t>) {
return uint64_t(v) >> shift;
} else if constexpr (std::is_same_v<LType, __int128_t>) {
return uint128_t(v) >> shift;
} else {
return v >> shift;
}
}
DEFINE_UNARY_FN_WITH_IMPL(bitNotImpl, v) {
return ~v;
}
class BitFunctions {
public:
/**
@ -60,11 +27,7 @@ public:
* @return: TypeColumn
*/
template <LogicalType Type>
DEFINE_VECTORIZED_FN(bitAnd) {
auto l = VECTORIZED_FN_ARGS(0);
auto r = VECTORIZED_FN_ARGS(1);
return VectorizedStrictBinaryFunction<bitAndImpl>::evaluate<Type>(l, r);
}
DEFINE_VECTORIZED_FN(bitAnd);
/**
* @tparam : TYPE_TINYINT, TYPE_SMALLINT, TYPE_INT, TYPE_BIGINT, TYPE_LARGEINT
@ -72,11 +35,7 @@ public:
* @return: TypeColumn
*/
template <LogicalType Type>
DEFINE_VECTORIZED_FN(bitOr) {
auto l = VECTORIZED_FN_ARGS(0);
auto r = VECTORIZED_FN_ARGS(1);
return VectorizedStrictBinaryFunction<bitOrImpl>::evaluate<Type>(l, r);
}
DEFINE_VECTORIZED_FN(bitOr);
/**
* @tparam : TYPE_TINYINT, TYPE_SMALLINT, TYPE_INT, TYPE_BIGINT, TYPE_LARGEINT
@ -84,11 +43,7 @@ public:
* @return: TypeColumn
*/
template <LogicalType Type>
DEFINE_VECTORIZED_FN(bitXor) {
auto l = VECTORIZED_FN_ARGS(0);
auto r = VECTORIZED_FN_ARGS(1);
return VectorizedStrictBinaryFunction<bitXorImpl>::evaluate<Type>(l, r);
}
DEFINE_VECTORIZED_FN(bitXor);
/**
* @tparam : TYPE_TINYINT, TYPE_SMALLINT, TYPE_INT, TYPE_BIGINT, TYPE_LARGEINT
@ -96,11 +51,7 @@ public:
* @return: TypeColumn
*/
template <LogicalType Type>
DEFINE_VECTORIZED_FN(bitShiftLeft) {
auto l = VECTORIZED_FN_ARGS(0);
auto r = VECTORIZED_FN_ARGS(1);
return VectorizedStrictBinaryFunction<bitShiftLeftImpl>::evaluate<Type>(l, r);
}
DEFINE_VECTORIZED_FN(bitShiftLeft);
/**
* @tparam : TYPE_TINYINT, TYPE_SMALLINT, TYPE_INT, TYPE_BIGINT, TYPE_LARGEINT
@ -108,11 +59,7 @@ public:
* @return: TypeColumn
*/
template <LogicalType Type>
DEFINE_VECTORIZED_FN(bitShiftRight) {
auto l = VECTORIZED_FN_ARGS(0);
auto r = VECTORIZED_FN_ARGS(1);
return VectorizedStrictBinaryFunction<bitShiftRightImpl>::evaluate<Type>(l, r);
}
DEFINE_VECTORIZED_FN(bitShiftRight);
/**
* @tparam : TYPE_TINYINT, TYPE_SMALLINT, TYPE_INT, TYPE_BIGINT, TYPE_LARGEINT
@ -120,11 +67,7 @@ public:
* @return: TypeColumn
*/
template <LogicalType Type>
DEFINE_VECTORIZED_FN(bitShiftRightLogical) {
auto l = VECTORIZED_FN_ARGS(0);
auto r = VECTORIZED_FN_ARGS(1);
return VectorizedStrictBinaryFunction<bitShiftRightLogicalImpl>::evaluate<Type>(l, r);
}
DEFINE_VECTORIZED_FN(bitShiftRightLogical);
/**
* @tparam : TYPE_TINYINT, TYPE_SMALLINT, TYPE_INT, TYPE_BIGINT, TYPE_LARGEINT
@ -132,9 +75,6 @@ public:
* @return: TypeColumn
*/
template <LogicalType Type>
DEFINE_VECTORIZED_FN(bitNot) {
auto v = VECTORIZED_FN_ARGS(0);
return VectorizedStrictUnaryFunction<bitNotImpl>::evaluate<Type>(v);
}
DEFINE_VECTORIZED_FN(bitNot);
};
} // namespace starrocks

View File

@ -754,3 +754,5 @@ StatusOr<ColumnPtr> BitmapFunctions::bitmap_from_binary(FunctionContext* context
}
} // namespace starrocks
#include "gen_cpp/opcode/BitmapFunctions.inc"

View File

@ -14,7 +14,6 @@
#pragma once
#include "column/column.h"
#include "exprs/function_helper.h"
namespace starrocks {

View File

@ -45,16 +45,16 @@ struct FunctionDescriptor {
bool exception_safe_, bool check_overflow_)
: name(std::move(nm)),
args_nums(args),
scalar_function(sf),
prepare_function(pf),
close_function(cf),
scalar_function(std::move(sf)),
prepare_function(std::move(pf)),
close_function(std::move(cf)),
exception_safe(exception_safe_),
check_overflow(check_overflow_) {}
FunctionDescriptor(std::string nm, uint8_t args, ScalarFunction sf, bool exception_safe_, bool check_overflow_)
: name(std::move(nm)),
args_nums(args),
scalar_function(sf),
scalar_function(std::move(sf)),
prepare_function(nullptr),
close_function(nullptr),
exception_safe(exception_safe_),
@ -66,14 +66,22 @@ class BuiltinFunctions {
public:
static const FunctionDescriptor* find_builtin_function(uint64_t id) {
if (auto iter = _fn_tables.find(id); iter != _fn_tables.end()) {
if (auto iter = fn_tables().find(id); iter != fn_tables().end()) {
return &iter->second;
}
return nullptr;
};
template <class... Args>
static void emplace_builtin_function(uint64_t id, Args&&... args) {
fn_tables().emplace(id, FunctionDescriptor(std::forward<Args>(args)...));
}
private:
static FunctionTables _fn_tables;
static FunctionTables& fn_tables() {
static FunctionTables fn_tables;
return fn_tables;
}
};
} // namespace starrocks

View File

@ -14,14 +14,12 @@
#include "exprs/encryption_functions.h"
#include "column/column_builder.h"
#include "column/column_helper.h"
#include "column/column_viewer.h"
#include "common/status.h"
#include "exprs/base64.h"
#include "exprs/expr.h"
#include "util/aes_util.h"
#include "util/debug_util.h"
#include "util/integer_util.h"
#include "util/md5.h"
#include "util/sha.h"
@ -420,3 +418,5 @@ Status EncryptionFunctions::sha2_close(FunctionContext* context, FunctionContext
}
} // namespace starrocks
#include "gen_cpp/opcode/EncryptionFunctions.inc"

View File

@ -14,9 +14,6 @@
#pragma once
#include <rapidjson/document.h>
#include "column/column_builder.h"
#include "exprs/builtin_functions.h"
#include "exprs/function_helper.h"

View File

@ -15,7 +15,6 @@
#include "exprs/es_functions.h"
#include "column/column_builder.h"
#include "column/column_viewer.h"
namespace starrocks {
@ -30,3 +29,5 @@ StatusOr<ColumnPtr> ESFunctions::match(FunctionContext* context, const Columns&
}
} // namespace starrocks
#include "gen_cpp/opcode/ESFunctions.inc"

View File

@ -435,3 +435,5 @@ Status GeoFunctions::st_polygon_prepare(FunctionContext* ctx, FunctionContext::F
}
} // namespace starrocks
#include "gen_cpp/opcode/GeoFunctions.inc"

View File

@ -14,7 +14,6 @@
#pragma once
#include "column/vectorized_fwd.h"
#include "common/status.h"
#include "exprs/function_context.h"
#include "exprs/function_helper.h"

View File

@ -22,7 +22,6 @@
#include "column/array_column.h"
#include "column/column_viewer.h"
#include "column/datum.h"
#include "util/faststring.h"
namespace starrocks {
@ -114,4 +113,5 @@ StatusOr<ColumnPtr> GinFunctions::tokenize(FunctionContext* context, const starr
return NullableColumn::create(result_array, null_array);
}
} // namespace starrocks
} // namespace starrocks
#include "gen_cpp/opcode/GinFunctions.inc"

View File

@ -30,3 +30,5 @@ StatusOr<ColumnPtr> GroupingSetsFunctions::grouping(FunctionContext* context, co
}
} // namespace starrocks
#include "gen_cpp/opcode/GroupingSetsFunctions.inc"

View File

@ -14,11 +14,7 @@
#pragma once
#include "column/vectorized_fwd.h"
#include "common/status.h"
#include "exprs/function_context.h"
#include "exprs/function_helper.h"
#include "geo/geo_common.h"
namespace starrocks {

View File

@ -0,0 +1,181 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "exprs/hash_functions.h"
#include "column/column_builder.h"
#include "column/column_viewer.h"
#include "exprs/function_context.h"
#include "util/xxh3.h"
namespace starrocks {
StatusOr<ColumnPtr> HashFunctions::murmur_hash3_32(FunctionContext* context, const starrocks::Columns& columns) {
std::vector<ColumnViewer<TYPE_VARCHAR>> viewers;
viewers.reserve(columns.size());
for (const auto& column : columns) {
viewers.emplace_back(column);
}
size_t size = columns[0]->size();
ColumnBuilder<TYPE_INT> builder(size);
for (int row = 0; row < size; ++row) {
uint32_t seed = HashUtil::MURMUR3_32_SEED;
bool has_null = false;
for (const auto& viewer : viewers) {
if (viewer.is_null(row)) {
has_null = true;
break;
}
auto slice = viewer.value(row);
seed = HashUtil::murmur_hash3_32(slice.data, slice.size, seed);
}
builder.append(seed, has_null);
}
return builder.build(ColumnHelper::is_all_const(columns));
}
StatusOr<ColumnPtr> HashFunctions::xx_hash3_64(FunctionContext* context, const starrocks::Columns& columns) {
std::vector<ColumnViewer<TYPE_VARCHAR>> column_viewers;
column_viewers.reserve(columns.size());
for (const auto& column : columns) {
column_viewers.emplace_back(column);
}
const uint64_t default_xxhash_seed = HashUtil::XXHASH3_64_SEED;
size_t row_size = columns[0]->size();
std::vector<uint64_t> seeds_vec(row_size, default_xxhash_seed);
std::vector<bool> is_null_vec(row_size, false);
for (const auto& viewer : column_viewers) {
for (size_t row = 0; row < row_size; ++row) {
if (is_null_vec[row]) {
continue;
}
if (viewer.is_null(row)) {
is_null_vec[row] = true;
continue;
}
auto slice = viewer.value(row);
uint64_t seed = seeds_vec[row];
seeds_vec[row] = HashUtil::xx_hash3_64(slice.data, slice.size, seed);
}
}
ColumnBuilder<TYPE_BIGINT> builder(row_size);
for (int row = 0; row < row_size; ++row) {
builder.append(seeds_vec[row], is_null_vec[row]);
}
return builder.build(ColumnHelper::is_all_const(columns));
}
StatusOr<ColumnPtr> HashFunctions::xx_hash3_128(FunctionContext* context, const starrocks::Columns& columns) {
std::vector<ColumnViewer<TYPE_VARCHAR>> column_viewers;
column_viewers.reserve(columns.size());
for (const auto& column : columns) {
column_viewers.emplace_back(column);
}
size_t row_size = columns[0]->size();
const uint64_t default_xxhash_seed = HashUtil::XXHASH3_64_SEED;
std::vector<XXH3_state_t> states(row_size);
std::vector<bool> is_null_vec(row_size, false);
for (size_t i = 0; i < row_size; i++) {
XXH_errorcode code = XXH3_128bits_reset_withSeed(&(states[i]), default_xxhash_seed);
if (code != XXH_OK) {
return Status::InternalError("init xxh3 state failed");
}
}
for (const auto& viewer : column_viewers) {
for (size_t row = 0; row < row_size; ++row) {
if (is_null_vec[row]) {
continue;
}
if (viewer.is_null(row)) {
is_null_vec[row] = true;
continue;
}
auto slice = viewer.value(row);
XXH_errorcode code = XXH3_128bits_update(&(states[row]), slice.data, slice.size);
if (code != XXH_OK) {
return Status::InternalError("update xxh3 state failed");
}
}
}
ColumnBuilder<TYPE_LARGEINT> builder(row_size);
for (int row = 0; row < row_size; ++row) {
XXH128_hash_t value = XXH3_128bits_digest(&states[row]);
int128_t res = ((int128_t)value.high64 << 64) | (uint64_t)value.low64;
builder.append(res, is_null_vec[row]);
}
return builder.build(ColumnHelper::is_all_const(columns));
}
inline StatusOr<ColumnPtr> HashFunctions::crc32_hash(FunctionContext* context, const starrocks::Columns& columns) {
DCHECK_EQ(columns.size(), 1);
const auto& col = columns[0];
const size_t row_size = col->size();
if (col->only_null()) {
return col;
}
if (col->is_constant()) {
uint32_t hash_value = 0;
auto const_column = ColumnHelper::as_raw_column<ConstColumn>(col);
const_column->data_column()->crc32_hash(&hash_value, 0, 1);
return ColumnHelper::create_const_column<TYPE_BIGINT>(hash_value, row_size);
}
std::vector<uint32_t> hash_values(row_size);
col->crc32_hash(hash_values.data(), 0, row_size);
ColumnBuilder<TYPE_BIGINT> builder(row_size);
const bool is_nullable = col->is_nullable();
const uint8_t* null_data = nullptr;
if (is_nullable) {
auto* null_column = ColumnHelper::as_raw_column<NullableColumn>(col);
null_data = null_column->null_column()->get_data().data();
}
for (size_t row = 0; row < row_size; ++row) {
if (is_nullable && null_data[row]) {
builder.append_null();
} else {
builder.append(hash_values[row]);
}
}
return builder.build(false);
}
} // namespace starrocks
#include "gen_cpp/opcode/HashFunctions.inc"

View File

@ -14,11 +14,7 @@
#pragma once
#include "column/column_builder.h"
#include "column/column_viewer.h"
#include "exprs/function_context.h"
#include "exprs/function_helper.h"
#include "util/xxh3.h"
namespace starrocks {
class HashFunctions {
@ -48,160 +44,4 @@ public:
DEFINE_VECTORIZED_FN(crc32_hash);
};
inline StatusOr<ColumnPtr> HashFunctions::murmur_hash3_32(FunctionContext* context, const starrocks::Columns& columns) {
std::vector<ColumnViewer<TYPE_VARCHAR>> viewers;
viewers.reserve(columns.size());
for (const auto& column : columns) {
viewers.emplace_back(column);
}
size_t size = columns[0]->size();
ColumnBuilder<TYPE_INT> builder(size);
for (int row = 0; row < size; ++row) {
uint32_t seed = HashUtil::MURMUR3_32_SEED;
bool has_null = false;
for (const auto& viewer : viewers) {
if (viewer.is_null(row)) {
has_null = true;
break;
}
auto slice = viewer.value(row);
seed = HashUtil::murmur_hash3_32(slice.data, slice.size, seed);
}
builder.append(seed, has_null);
}
return builder.build(ColumnHelper::is_all_const(columns));
}
inline StatusOr<ColumnPtr> HashFunctions::xx_hash3_64(FunctionContext* context, const starrocks::Columns& columns) {
std::vector<ColumnViewer<TYPE_VARCHAR>> column_viewers;
column_viewers.reserve(columns.size());
for (const auto& column : columns) {
column_viewers.emplace_back(column);
}
const uint64_t default_xxhash_seed = HashUtil::XXHASH3_64_SEED;
size_t row_size = columns[0]->size();
std::vector<uint64_t> seeds_vec(row_size, default_xxhash_seed);
std::vector<bool> is_null_vec(row_size, false);
for (const auto& viewer : column_viewers) {
for (size_t row = 0; row < row_size; ++row) {
if (is_null_vec[row]) {
continue;
}
if (viewer.is_null(row)) {
is_null_vec[row] = true;
continue;
}
auto slice = viewer.value(row);
uint64_t seed = seeds_vec[row];
seeds_vec[row] = HashUtil::xx_hash3_64(slice.data, slice.size, seed);
}
}
ColumnBuilder<TYPE_BIGINT> builder(row_size);
for (int row = 0; row < row_size; ++row) {
builder.append(seeds_vec[row], is_null_vec[row]);
}
return builder.build(ColumnHelper::is_all_const(columns));
}
inline StatusOr<ColumnPtr> HashFunctions::xx_hash3_128(FunctionContext* context, const starrocks::Columns& columns) {
std::vector<ColumnViewer<TYPE_VARCHAR>> column_viewers;
column_viewers.reserve(columns.size());
for (const auto& column : columns) {
column_viewers.emplace_back(column);
}
size_t row_size = columns[0]->size();
const uint64_t default_xxhash_seed = HashUtil::XXHASH3_64_SEED;
std::vector<XXH3_state_t> states(row_size);
std::vector<bool> is_null_vec(row_size, false);
for (size_t i = 0; i < row_size; i++) {
XXH_errorcode code = XXH3_128bits_reset_withSeed(&(states[i]), default_xxhash_seed);
if (code != XXH_OK) {
return Status::InternalError("init xxh3 state failed");
}
}
for (const auto& viewer : column_viewers) {
for (size_t row = 0; row < row_size; ++row) {
if (is_null_vec[row]) {
continue;
}
if (viewer.is_null(row)) {
is_null_vec[row] = true;
continue;
}
auto slice = viewer.value(row);
XXH_errorcode code = XXH3_128bits_update(&(states[row]), slice.data, slice.size);
if (code != XXH_OK) {
return Status::InternalError("update xxh3 state failed");
}
}
}
ColumnBuilder<TYPE_LARGEINT> builder(row_size);
for (int row = 0; row < row_size; ++row) {
XXH128_hash_t value = XXH3_128bits_digest(&states[row]);
int128_t res = ((int128_t)value.high64 << 64) | (uint64_t)value.low64;
builder.append(res, is_null_vec[row]);
}
return builder.build(ColumnHelper::is_all_const(columns));
}
inline StatusOr<ColumnPtr> HashFunctions::crc32_hash(FunctionContext* context, const starrocks::Columns& columns) {
DCHECK_EQ(columns.size(), 1);
const auto& col = columns[0];
const size_t row_size = col->size();
if (col->only_null()) {
return col;
}
if (col->is_constant()) {
uint32_t hash_value = 0;
auto const_column = ColumnHelper::as_raw_column<ConstColumn>(col);
const_column->data_column()->crc32_hash(&hash_value, 0, 1);
return ColumnHelper::create_const_column<TYPE_BIGINT>(hash_value, row_size);
}
std::vector<uint32_t> hash_values(row_size);
col->crc32_hash(hash_values.data(), 0, row_size);
ColumnBuilder<TYPE_BIGINT> builder(row_size);
const bool is_nullable = col->is_nullable();
const uint8_t* null_data = nullptr;
if (is_nullable) {
auto* null_column = ColumnHelper::as_raw_column<NullableColumn>(col);
null_data = null_column->null_column()->get_data().data();
}
for (size_t row = 0; row < row_size; ++row) {
if (is_nullable && null_data[row]) {
builder.append_null();
} else {
builder.append(hash_values[row]);
}
}
return builder.build(false);
}
} // namespace starrocks

View File

@ -112,3 +112,5 @@ StatusOr<ColumnPtr> HyperloglogFunctions::hll_deserialize(FunctionContext* conte
}
} // namespace starrocks
#include "gen_cpp/opcode/HyperloglogFunctions.inc"

View File

@ -1162,3 +1162,5 @@ StatusOr<ColumnPtr> JsonFunctions::to_json(FunctionContext* context, const Colum
}
} // namespace starrocks
#include "gen_cpp/opcode/JsonFunctions.inc"

View File

@ -20,13 +20,9 @@
#include <utility>
#include "column/column_builder.h"
#include "column/vectorized_fwd.h"
#include "common/compiler_util.h"
#include "common/status.h"
#include "exprs/function_context.h"
#include "exprs/function_helper.h"
#include "exprs/jsonpath.h"
#include "types/logical_type.h"
namespace starrocks {

View File

@ -685,3 +685,5 @@ void LikePredicate::remove_escape_character(std::string* search_string) {
}
} // namespace starrocks
#include "gen_cpp/opcode/LikePredicate.inc"

View File

@ -484,3 +484,5 @@ StatusOr<ColumnPtr> MapFunctions::map_concat(FunctionContext* context, const Col
}
} // namespace starrocks
#include "gen_cpp/opcode/MapFunctions.inc"

View File

@ -14,15 +14,8 @@
#pragma once
#include "column/array_column.h"
#include "column/column_builder.h"
#include "column/column_hash.h"
#include "column/column_viewer.h"
#include "column/map_column.h"
#include "exprs/function_context.h"
#include "exprs/function_helper.h"
#include "util/orlp/pdqsort.h"
#include "util/phmap/phmap.h"
namespace starrocks {

View File

@ -29,7 +29,6 @@
#include "exprs/function_helper.h"
#include "exprs/math_functions.h"
#include "util/murmur_hash3.h"
#include "util/time.h"
namespace starrocks {
@ -1342,3 +1341,5 @@ StatusOr<ColumnPtr> MathFunctions::l2_distance(FunctionContext* context, const C
template StatusOr<ColumnPtr> MathFunctions::l2_distance<TYPE_FLOAT>(FunctionContext* context, const Columns& columns);
} // namespace starrocks
#include "gen_cpp/opcode/MathFunctions.inc"

View File

@ -19,10 +19,8 @@
#include "column/column_viewer.h"
#include "column/vectorized_fwd.h"
#include "exprs/agg/percentile_cont.h"
#include "gutil/strings/substitute.h"
#include "types/logical_type.h"
#include "util/percentile_value.h"
#include "util/string_parser.hpp"
namespace starrocks {
@ -97,3 +95,5 @@ struct LCPercentileExtracter {
};
} // namespace starrocks
#include "gen_cpp/opcode/PercentileFunctions.inc"

View File

@ -14,7 +14,6 @@
#pragma once
#include "column/column.h"
#include "exprs/function_helper.h"
namespace starrocks {

View File

@ -4849,3 +4849,5 @@ StatusOr<ColumnPtr> StringFunctions::crc32(FunctionContext* context, const Colum
return VectorizedStrictUnaryFunction<crc32Impl>::evaluate<TYPE_VARCHAR, TYPE_BIGINT>(columns[0]);
}
} // namespace starrocks
#include "gen_cpp/opcode/StringFunctions.inc"

View File

@ -51,3 +51,5 @@ StatusOr<ColumnPtr> StructFunctions::named_struct(FunctionContext* context, cons
return new_struct(context, cols);
}
} // namespace starrocks
#include "gen_cpp/opcode/StructFunctions.inc"

View File

@ -19,7 +19,6 @@
#include <libdivide.h>
#include <algorithm>
#include <mutex>
#include <string_view>
#include <unordered_map>
@ -3891,3 +3890,5 @@ StatusOr<ColumnPtr> TimeFunctions::time_format(FunctionContext* context, const s
}
} // namespace starrocks
#include "gen_cpp/opcode/TimeFunctions.inc"

View File

@ -21,7 +21,6 @@
#include "exprs/function_helper.h"
#include "runtime/datetime_value.h"
#include "types/logical_type.h"
#include "util/timezone_hsscan.h"
namespace starrocks {
// TODO:

View File

@ -27,9 +27,7 @@
#include <cstdint>
#include <cstdlib>
#include <limits>
#include <mutex>
#include <random>
#include <thread>
#include "column/column_builder.h"
#include "column/column_viewer.h"
@ -45,10 +43,8 @@
#include "util/cidr.h"
#include "util/monotime.h"
#include "util/network_util.h"
#include "util/thread.h"
#include "util/thrift_rpc_helper.h"
#include "util/time.h"
#include "util/uid_util.h"
namespace starrocks {
@ -371,3 +367,5 @@ StatusOr<ColumnPtr> UtilityFunctions::equiwidth_bucket(FunctionContext* context,
}
} // namespace starrocks
#include "gen_cpp/opcode/UtilityFunctions.inc"

View File

@ -70,7 +70,6 @@ set(SRC_FILES
#$${GEN_CPP_DIR}/opcode/functions.cc
#$${GEN_CPP_DIR}/opcode/vector-functions.cc
#$${GEN_CPP_DIR}/opcode/opcode-registry-init.cc
${GEN_CPP_DIR}/opcode/builtin_functions.cpp
${GEN_CPP_DIR}/RuntimeFilter_types.cpp
${GEN_CPP_DIR}/version.cpp
${GEN_CPP_DIR}/CloudConfiguration_types.cpp

View File

@ -43,7 +43,6 @@ if ((NOT ${MAKE_TEST} STREQUAL "ON") AND (NOT BUILD_FORMAT_LIB))
target_link_libraries(starrocks_be
${STARROCKS_LINK_LIBS}
)
install(DIRECTORY DESTINATION ${OUTPUT_DIR}/lib/)
install(TARGETS starrocks_be

View File

@ -26,7 +26,7 @@ set(STARROCKS_LIBS
Column
Connector
Exec
Exprs
-Wl,--whole-archive Exprs -Wl,--no-whole-archive
FileSystem
Formats
Gutil

View File

@ -26,7 +26,27 @@ all: gen_version gen_functions
# generated vectorized engine function
GEN_FUNCTIONS_OUTPUT = ${FE_TARGET_DIR}/com/starrocks/builtins/VectorizedBuiltinFunctions.java \
${BUILD_DIR}/gen_cpp/opcode/builtin_functions.cpp
${BUILD_DIR}/gen_cpp/opcode/MathFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/StringFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/LikePredicate.inc \
${BUILD_DIR}/gen_cpp/opcode/BinaryFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/BitFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/TimeFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/ConditionFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/HyperloglogFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/BitmapFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/HashFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/GroupingSetsFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/StructFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/UtilityFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/JsonFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/EncryptionFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/ESFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/GeoFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/PercentileFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/ArrayFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/MapFunctions.inc \
${BUILD_DIR}/gen_cpp/opcode/GinFunctions.inc \
${GEN_FUNCTIONS_OUTPUT}: functions.py gen_functions.py
${PYTHON} ${CURDIR}/gen_functions.py --cpp ${BUILD_DIR}/gen_cpp --java ${FE_TARGET_DIR}

View File

@ -48,7 +48,8 @@ license_string = """
// common/function-registry/starrocks_builtins_functions.py.
"""
java_template = Template("""
java_template = Template(
"""
${license}
package com.starrocks.builtins;
@ -62,42 +63,17 @@ public class VectorizedBuiltinFunctions {
}
}
""")
"""
)
cpp_template = Template("""
${license}
#include "exprs/array_functions.h"
cpp_template = """
#include "exprs/builtin_functions.h"
#include "exprs/map_functions.h"
#include "exprs/struct_functions.h"
#include "exprs/math_functions.h"
#include "exprs/bit_functions.h"
#include "exprs/binary_functions.h"
#include "exprs/string_functions.h"
#include "exprs/time_functions.h"
#include "exprs/like_predicate.h"
#include "exprs/is_null_predicate.h"
#include "exprs/hyperloglog_functions.h"
#include "exprs/bitmap_functions.h"
#include "exprs/json_functions.h"
#include "exprs/hash_functions.h"
#include "exprs/encryption_functions.h"
#include "exprs/geo_functions.h"
#include "exprs/percentile_functions.h"
#include "exprs/grouping_sets_functions.h"
#include "exprs/es_functions.h"
#include "exprs/utility_functions.h"
#include "exprs/gin_functions.h"
namespace starrocks {
BuiltinFunctions::FunctionTables BuiltinFunctions::_fn_tables = {
${functions}
};
}
""")
namespace starrocks {{
void __attribute__((constructor)) {module}_initialize() {{
{content}
}}
}}
"""
function_list = list()
function_set = set()
@ -120,7 +96,11 @@ def add_function(fn_data):
entry["ret"] = fn_data[4]
entry["args"] = fn_data[5]
function_signature = "%s#%s#(%s)" % (entry["ret"], entry["name"], ", ".join(entry["args"]))
function_signature = "%s#%s#(%s)" % (
entry["ret"],
entry["name"],
", ".join(entry["args"]),
)
if function_signature in function_signature_set:
print("=================================================================")
@ -130,8 +110,12 @@ def add_function(fn_data):
function_signature_set.add(function_signature)
if "..." in fn_data[5]:
assert 2 <= len(fn_data[5]), "Invalid arguments in functions.py:\n\t" + repr(fn_data)
assert "..." == fn_data[5][-1], "variadic parameter must at the end:\n\t" + repr(fn_data)
assert 2 <= len(fn_data[5]), "Invalid arguments in functions.py:\n\t" + repr(
fn_data
)
assert (
"..." == fn_data[5][-1]
), "variadic parameter must at the end:\n\t" + repr(fn_data)
entry["args_nums"] = len(fn_data[5]) - 1
else:
@ -148,11 +132,14 @@ def add_function(fn_data):
def generate_fe(path):
fn_template = Template(
'functionSet.addVectorizedScalarBuiltin(${id}, "${name}", ${has_vargs}, Type.${ret}${args_types});')
'functionSet.addVectorizedScalarBuiltin(${id}, "${name}", ${has_vargs}, Type.${ret}${args_types});'
)
def gen_fe_fn(fnm):
fnm["args_types"] = ", " if len(fnm["args"]) > 0 else ""
fnm["args_types"] = fnm["args_types"] + ", ".join(["Type." + i for i in fnm["args"] if i != "..."])
fnm["args_types"] = fnm["args_types"] + ", ".join(
["Type." + i for i in fnm["args"] if i != "..."]
)
fnm["has_vargs"] = "true" if "..." in fnm["args"] else "false"
return fn_template.substitute(fnm)
@ -172,12 +159,24 @@ def generate_cpp(path):
res = ""
if "prepare" in fnm:
res = '{%d, {"%s", %d, %s, %s, %s, %s, %s' % (
fnm["id"], fnm["name"], fnm["args_nums"], fnm["fn"], fnm["prepare"], fnm["close"],
fnm["exception_safe"], fnm["check_overflow"])
fnm["id"],
fnm["name"],
fnm["args_nums"],
fnm["fn"],
fnm["prepare"],
fnm["close"],
fnm["exception_safe"],
fnm["check_overflow"],
)
else:
res = '{%d, {"%s", %d, %s, %s, %s' % (
fnm["id"], fnm["name"], fnm["args_nums"], fnm["fn"], fnm["exception_safe"],
fnm["check_overflow"])
fnm["id"],
fnm["name"],
fnm["args_nums"],
fnm["fn"],
fnm["exception_safe"],
fnm["check_overflow"],
)
return res + "}}"
@ -185,25 +184,99 @@ def generate_cpp(path):
value["license"] = license_string
value["functions"] = ", \n ".join([gen_be_fn(i) for i in function_list])
content = cpp_template.substitute(value)
modules = [
"MathFunctions",
"StringFunctions",
"LikePredicate",
"BinaryFunctions",
"BitFunctions",
"TimeFunctions",
"ConditionFunctions",
"HyperloglogFunctions",
"BitmapFunctions",
"HashFunctions",
"GroupingSetsFunctions",
"StructFunctions",
"UtilityFunctions",
"JsonFunctions",
"EncryptionFunctions",
"ESFunctions",
"GeoFunctions",
"PercentileFunctions",
"ArrayFunctions",
"MapFunctions",
"GinFunctions",
]
with open(path, mode="w+") as f:
f.write(content)
modules_contents = dict()
for module in modules:
modules_contents[module] = ""
for fnm in function_list:
target = "Unknown"
if fnm["fn"] == "nullptr":
continue
for module in modules:
if module in fnm["fn"]:
target = module
break
if target == "Unknown":
print("fnm:" + fnm["fn"] + str(fnm))
if "prepare" in fnm:
modules_contents[target] = modules_contents[
target
] + '\tBuiltinFunctions::emplace_builtin_function(static_cast<uint64_t>(%d), "%s", %d, %s, %s, %s, %s, %s);\n' % (
fnm["id"],
fnm["name"],
fnm["args_nums"],
fnm["fn"],
fnm["prepare"],
fnm["close"],
fnm["exception_safe"],
fnm["check_overflow"],
)
else:
modules_contents[target] = modules_contents[
target
] + '\tBuiltinFunctions::emplace_builtin_function(static_cast<uint64_t>(%d), "%s", %d, %s, %s, %s);\n' % (
fnm["id"],
fnm["name"],
fnm["args_nums"],
fnm["fn"],
fnm["exception_safe"],
fnm["check_overflow"],
)
for module in modules:
with open(path + module + ".inc", mode="w+") as f:
content = cpp_template.format(
module=module, content=modules_contents[module]
)
f.write(content)
if __name__ == '__main__':
if __name__ == "__main__":
FE_PATH = "../../fe/fe-core/target/generated-sources/build"
BE_PATH = "../build/gen_cpp"
parser = argparse.ArgumentParser()
parser.add_argument("--cpp", dest='cpp_path', default=BE_PATH, help="Path of generated cpp file", type=str)
parser.add_argument("--java", dest='java_path', default=FE_PATH, help="Path of generated java file", type=str)
parser.add_argument(
"--cpp",
dest="cpp_path",
default=BE_PATH,
help="Path of generated cpp file",
type=str,
)
parser.add_argument(
"--java",
dest="java_path",
default=FE_PATH,
help="Path of generated java file",
type=str,
)
args = parser.parse_args()
# Read the function metadata inputs
for function in functions.vectorized_functions:
add_function(function)
be_functions_dir = args.cpp_path + "/opcode"
if not os.path.exists(be_functions_dir):
os.makedirs(be_functions_dir)
@ -212,5 +285,9 @@ if __name__ == '__main__':
if not os.path.exists(fe_functions_dir):
os.makedirs(fe_functions_dir)
# Read the function metadata inputs
for function in functions.vectorized_functions:
add_function(function)
generate_fe(fe_functions_dir + "/VectorizedBuiltinFunctions.java")
generate_cpp(be_functions_dir + "/builtin_functions.cpp")
generate_cpp(be_functions_dir + "/")