Merge pull request #28778 from fzhedu/groupConcat

[Feature] Group concat support order by and distinct
2023-08-24 10:13:30 +08:00 · 2023-08-24 10:13:30 +08:00 · 34b655cd3d
parent 1b32651454 1bc6b03f00
commit 34b655cd3d
32 changed files with 2942 additions and 114 deletions
--- a/be/src/exec/aggregator.cpp
+++ b/be/src/exec/aggregator.cpp
@ -129,13 +129,16 @@ void AggregatorParams::init() {

            bool is_input_nullable = has_outer_join_child || desc.nodes[0].has_nullable_child;
            agg_fn_types[i] = {return_type, serde_type, arg_typedescs, is_input_nullable, desc.nodes[0].is_nullable};
-            if (fn.name.function_name == "array_agg") {
+            if (fn.name.function_name == "array_agg" || fn.name.function_name == "group_concat") {
                // set order by info
                if (fn.aggregate_fn.__isset.is_asc_order && fn.aggregate_fn.__isset.nulls_first &&
                    !fn.aggregate_fn.is_asc_order.empty()) {
                    agg_fn_types[i].is_asc_order = fn.aggregate_fn.is_asc_order;
                    agg_fn_types[i].nulls_first = fn.aggregate_fn.nulls_first;
                }
+                if (fn.aggregate_fn.__isset.is_distinct) {
+                    agg_fn_types[i].is_distinct = fn.aggregate_fn.is_distinct;
+                }
            }
        }
    }
@ -459,7 +462,11 @@ Status Aggregator::prepare(RuntimeState* state, ObjectPool* pool, RuntimeProfile
    for (int i = 0; i < _agg_fn_ctxs.size(); ++i) {
        _agg_fn_ctxs[i] = FunctionContext::create_context(
                state, _mem_pool.get(), AnyValUtil::column_type_to_type_desc(_agg_fn_types[i].result_type),
-                _agg_fn_types[i].arg_typedescs, _agg_fn_types[i].is_asc_order, _agg_fn_types[i].nulls_first);
+                _agg_fn_types[i].arg_typedescs, _agg_fn_types[i].is_distinct, _agg_fn_types[i].is_asc_order,
+                _agg_fn_types[i].nulls_first);
+        if (state->query_options().__isset.group_concat_max_len) {
+            _agg_fn_ctxs[i]->set_group_concat_max_len(state->query_options().group_concat_max_len);
+        }
        state->obj_pool()->add(_agg_fn_ctxs[i]);
    }

@ -868,7 +875,6 @@ Status Aggregator::output_chunk_by_streaming(Chunk* input_chunk, ChunkPtr* chunk
        DCHECK(!_group_by_columns.empty());

        RETURN_IF_ERROR(evaluate_agg_fn_exprs(input_chunk));
-
        const auto num_rows = _group_by_columns[0]->size();
        Columns agg_result_column = _create_agg_result_columns(num_rows, true);
        for (size_t i = 0; i < _agg_fn_ctxs.size(); i++) {
--- a/be/src/exec/aggregator.h
+++ b/be/src/exec/aggregator.h
@ -134,6 +134,8 @@ struct AggFunctionTypes {
    // hold order-by info
    std::vector<bool> is_asc_order;
    std::vector<bool> nulls_first;
+
+    bool is_distinct = false;
 };

 struct ColumnType {
--- a/be/src/exprs/agg/factory/aggregate_factory.cpp
+++ b/be/src/exprs/agg/factory/aggregate_factory.cpp
@ -134,6 +134,12 @@ static const AggregateFunction* get_function(const std::string& name, LogicalTyp
        }
    }

+    if (func_version > 6) {
+        if (name == "group_concat") {
+            func_name = "group_concat2";
+        }
+    }
+
    if (binary_type == TFunctionBinaryType::BUILTIN) {
        auto func = AggregateFuncResolver::instance()->get_aggregate_info(func_name, arg_type, return_type,
                                                                          is_window_function, is_null);
--- a/be/src/exprs/agg/factory/aggregate_factory.hpp
+++ b/be/src/exprs/agg/factory/aggregate_factory.hpp
@ -114,6 +114,10 @@ public:
        return std::make_shared<ArrayAggAggregateFunctionV2>();
    }

+    static AggregateFunctionPtr MakeGroupConcatAggregateFunctionV2() {
+        return std::make_shared<GroupConcatAggregateFunctionV2>();
+    }
+
    template <LogicalType LT>
    static auto MakeMaxAggregateFunction();

--- a/be/src/exprs/agg/factory/aggregate_resolver_others.cpp
+++ b/be/src/exprs/agg/factory/aggregate_resolver_others.cpp
@ -75,6 +75,7 @@ void AggregateFuncResolver::register_others() {

    add_general_mapping<AnyValueSemiState>("any_value", false, AggregateFactory::MakeAnyValueSemiAggregateFunction());
    add_general_mapping_notnull("array_agg2", false, AggregateFactory::MakeArrayAggAggregateFunctionV2());
+    add_general_mapping_notnull("group_concat2", false, AggregateFactory::MakeGroupConcatAggregateFunctionV2());
 }

 } // namespace starrocks
--- a/be/src/exprs/agg/group_concat.h
+++ b/be/src/exprs/agg/group_concat.h
@ -16,10 +16,17 @@

 #include <cmath>

+#include "column/array_column.h"
+#include "column/binary_column.h"
 #include "column/column_helper.h"
+#include "column/struct_column.h"
 #include "column/type_traits.h"
+#include "exec/sorting/sorting.h"
 #include "exprs/agg/aggregate.h"
+#include "exprs/function_context.h"
 #include "gutil/casts.h"
+#include "runtime/runtime_state.h"
+#include "util/utf8.h"

 namespace starrocks {
 template <LogicalType LT, typename = guard::Guard>
@ -292,4 +299,421 @@ public:
    std::string get_name() const override { return "group concat"; }
 };

+// input columns result in intermediate result: struct{array[col0], array[col1], array[col2]... array[coln]}
+// return ordered string("col0col1...colnSEPcol0col1...coln...")
+struct GroupConcatAggregateStateV2 {
+    // update without null elements
+    void update(FunctionContext* ctx, const Column& column, size_t index, size_t offset, size_t count) {
+        (*data_columns)[index]->append(column, offset, count);
+    }
+
+    // order-by items may be null
+    void update_nulls(FunctionContext* ctx, size_t index, size_t count) { (*data_columns)[index]->append_nulls(count); }
+
+    // release the trailing order-by columns
+    void release_order_by_columns() const {
+        if (data_columns == nullptr) {
+            return;
+        }
+        for (auto i = output_col_num + 1; i < data_columns->size(); ++i) { // after the separator column
+            data_columns->at(i).reset();
+        }
+        data_columns->resize(output_col_num + 1);
+    }
+
+    ~GroupConcatAggregateStateV2() {
+        if (data_columns != nullptr) {
+            for (auto& col : *data_columns) {
+                col.reset();
+            }
+            data_columns->clear();
+            data_columns.reset(nullptr);
+        }
+    }
+    // using pointer rather than vector to avoid variadic size
+    // group_concat(a, b order by c, d), the a,b,',',c,d are put into data_columns in order, and reject null for
+    // output columns a and b.
+    std::unique_ptr<Columns> data_columns = nullptr;
+    int output_col_num = 0;
+};
+
+// group_concat concatenates non-null values from a group, and output null if the group is empty.
+// TODO(fzh) we can further optimize group_concat in following 3 ways:
+// 1. reuse columns for order-by clause, group_concat(a order by 1) can avoid replacing '1' with a in plan, just keep a;
+// 2. convert output columns to string in finalized phase, instead of add cast functions in plan, which leads to
+// redundancy columns in intermediate results. For example, group_concat(a,b order by 1,2) is rewritten to
+// group_concat(cast(a to string), cast(b to string) order by a, b), resulting to keeping 4 columns, but it only needs
+// keep 2 columns in intermediate results.
+// 3. refactor order-by and distinct function to a combinator to clean the code.
+class GroupConcatAggregateFunctionV2
+        : public AggregateFunctionBatchHelper<GroupConcatAggregateStateV2, GroupConcatAggregateFunctionV2> {
+public:
+    // group_concat(a, b order by c, d), the arguments are a,b,',',c,d
+    void create_impl(FunctionContext* ctx, GroupConcatAggregateStateV2& state) const {
+        auto num = ctx->get_num_args();
+        state.data_columns = std::make_unique<Columns>();
+        auto order_by_num = ctx->get_nulls_first().size();
+        state.output_col_num = num - order_by_num - 1; // excluding separator column
+        if (UNLIKELY(state.output_col_num <= 0)) {
+            ctx->set_error("group_concat output column should not be empty", false);
+            return;
+        }
+        for (auto i = 0; i < state.output_col_num; ++i) {
+            if (UNLIKELY(!is_string_type(ctx->get_arg_type(i)->type))) {
+                ctx->set_error(std::string(std::to_string(i) + "-th input of group_concat is not string type, but is " +
+                                           type_to_string(ctx->get_arg_type(i)->type))
+                                       .c_str(),
+                               false);
+                return;
+            }
+        }
+        for (auto i = 0; i < num; ++i) {
+            state.data_columns->emplace_back(ctx->create_column(*ctx->get_arg_type(i), true));
+        }
+        DCHECK(ctx->get_is_asc_order().size() == ctx->get_nulls_first().size());
+    }
+
+    void reset(FunctionContext* ctx, const Columns& args, AggDataPtr __restrict state) const override {
+        auto& state_impl = this->data(state);
+        if (state_impl.data_columns != nullptr) {
+            for (auto& col : *state_impl.data_columns) {
+                col->resize(0);
+            }
+        }
+    }
+
+    // reject null for output columns, but non-output columns may be null
+    void update(FunctionContext* ctx, const Column** columns, AggDataPtr __restrict state,
+                size_t row_num) const override {
+        auto num = ctx->get_num_args();
+        auto& state_impl = this->data(state);
+        if (state_impl.data_columns == nullptr) {
+            create_impl(ctx, state_impl);
+        }
+        for (auto i = 0; i < state_impl.output_col_num; ++i) {
+            if (columns[i]->is_nullable() && columns[i]->is_null(row_num)) {
+                return;
+            }
+        }
+
+        for (auto i = 0; i < num; ++i) {
+            // non-output columns is null
+            if (i >= state_impl.output_col_num && (columns[i]->is_nullable() && columns[i]->is_null(row_num))) {
+                this->data(state).update_nulls(ctx, i, 1);
+                continue;
+            }
+            auto* data_col = columns[i];
+            auto tmp_row_num = row_num;
+            if (columns[i]->is_constant()) {
+                // just copy the first const value.
+                data_col = down_cast<const ConstColumn*>(columns[i])->data_column().get();
+                tmp_row_num = 0;
+            }
+            this->data(state).update(ctx, *data_col, i, tmp_row_num, 1);
+        }
+    }
+
+    void update_batch_single_state(FunctionContext* ctx, size_t chunk_size, const Column** columns,
+                                   AggDataPtr __restrict state) const override {
+        auto& state_impl = this->data(state);
+        if (state_impl.data_columns == nullptr) {
+            create_impl(ctx, state_impl);
+        }
+        for (auto i = 0; i < state_impl.output_col_num; ++i) {
+            if (columns[i]->only_null()) {
+                return;
+            }
+        }
+        for (size_t i = 0; i < chunk_size; ++i) {
+            update(ctx, columns, state, i);
+        }
+    }
+
+    void update_batch_single_state_with_frame(FunctionContext* ctx, AggDataPtr __restrict state, const Column** columns,
+                                              int64_t peer_group_start, int64_t peer_group_end, int64_t frame_start,
+                                              int64_t frame_end) const override {
+        auto& state_impl = this->data(state);
+        if (state_impl.data_columns == nullptr) {
+            create_impl(ctx, state_impl);
+        }
+        for (auto i = 0; i < state_impl.output_col_num; ++i) {
+            if (columns[i]->only_null()) {
+                return;
+            }
+        }
+        for (size_t i = frame_start; i < frame_end; ++i) {
+            update(ctx, columns, state, i);
+        }
+    }
+
+    // input struct column, array may be null, but array->elements of output columns should not null
+    void merge(FunctionContext* ctx, const Column* column, AggDataPtr __restrict state, size_t row_num) const override {
+        if (UNLIKELY(row_num >= column->size())) {
+            ctx->set_error(std::string(get_name() + " merge() row id overflow").c_str(), false);
+            return;
+        }
+        // input struct is null
+        if (column->is_nullable() && column->is_null(row_num)) {
+            return;
+        }
+        auto& input_columns = down_cast<const StructColumn*>(ColumnHelper::get_data_column(column))->fields();
+        auto& state_impl = this->data(state);
+        if (state_impl.data_columns == nullptr) {
+            create_impl(ctx, state_impl);
+        }
+        // output columns is null
+        for (auto i = 0; i < state_impl.output_col_num; i++) {
+            if (input_columns[i]->is_null(row_num)) {
+                return;
+            }
+        }
+        for (auto i = 0; i < input_columns.size(); ++i) {
+            auto array_column = down_cast<const ArrayColumn*>(ColumnHelper::get_data_column(input_columns[i].get()));
+            auto& offsets = array_column->offsets().get_data();
+            state_impl.update(ctx, array_column->elements(), i, offsets[row_num],
+                              offsets[row_num + 1] - offsets[row_num]);
+        }
+    }
+
+    // TODO: if any output column is nullable, the result and intermediate result should be nullable
+    // serialize each state->column to a (nullable but no null) array in a nullable struct
+    // if the data_columns is empty, output null into nullable struct
+    // otherwise, each state->column construct an array.
+    // nullable struct {nullable array[nullable elements]...}, the struct may be null, array and array elements from
+    // output columns wouldn't be null.
+    void serialize_to_column(FunctionContext* ctx, ConstAggDataPtr __restrict state, Column* to) const override {
+        auto& state_impl = this->data(state);
+        if (state_impl.data_columns == nullptr || (*state_impl.data_columns)[0]->size() == 0) {
+            to->append_default();
+            return;
+        }
+        auto& columns = down_cast<StructColumn*>(ColumnHelper::get_data_column(to))->fields_column();
+        if (to->is_nullable()) {
+            down_cast<NullableColumn*>(to)->null_column_data().emplace_back(0);
+        }
+        for (auto i = 0; i < columns.size(); ++i) {
+            auto elem_size = (*state_impl.data_columns)[i]->size();
+            auto array_col = down_cast<ArrayColumn*>(ColumnHelper::get_data_column(columns[i].get()));
+            if (columns[i]->is_nullable()) {
+                down_cast<NullableColumn*>(columns[i].get())->null_column_data().emplace_back(0);
+            }
+            array_col->elements_column()->append(
+                    *ColumnHelper::unpack_and_duplicate_const_column(elem_size, (*state_impl.data_columns)[i]), 0,
+                    elem_size);
+            auto& offsets = array_col->offsets_column()->get_data();
+            offsets.push_back(offsets.back() + elem_size);
+        }
+    }
+
+    // convert each cell of a row to a [nullable] array in a nullable struct, keep the same of chunk_size
+    // if i-th output row is null, set i-th output column in the struct is null whether the i-th struct is null.
+    // nullable struct {nullable array[nullable elements]...}, the struct and array may be null, array elements from
+    // output columns wouldn't be null.
+    void convert_to_serialize_format(FunctionContext* ctx, const Columns& src, size_t chunk_size,
+                                     ColumnPtr* dst) const override {
+        auto columns = down_cast<StructColumn*>(ColumnHelper::get_data_column(dst->get()))->fields_column();
+        if (UNLIKELY(src.size() != columns.size())) {
+            ctx->set_error(std::string(get_name() + " to-serialized column num " + std::to_string(src.size()) +
+                                       " != expected " + std::to_string(columns.size()))
+                                   .c_str(),
+                           false);
+            return;
+        }
+        // get null info from output columns
+        auto output_col_num = ctx->get_num_args() - ctx->get_nulls_first().size() - 1;
+        NullColumnPtr nulls = NullColumn::create(chunk_size, false);
+        auto null_data = nulls->get_data();
+        for (int j = 0; j < output_col_num; ++j) {
+            if (src[j]->only_null()) {
+                for (int i = 0; i < chunk_size; ++i) {
+                    null_data[i] = true;
+                }
+                break;
+            }
+            if (src[j]->is_constant()) {
+                continue;
+            }
+            if (src[j]->is_nullable()) {
+                auto null_col = down_cast<NullableColumn*>(src[j].get())->null_column_data();
+                for (int i = 0; i < chunk_size; ++i) {
+                    null_data[i] |= null_col[i];
+                }
+            }
+        }
+        if (dst->get()->is_nullable()) {
+            auto nullable_col = down_cast<NullableColumn*>(dst->get());
+            for (size_t i = 0; i < chunk_size; i++) {
+                nullable_col->null_column_data().emplace_back(null_data[i]);
+            }
+            nullable_col->update_has_null();
+        }
+        // if i-th row is null, set nullable_array[x][i] = null, otherwise, set array[x][i]=src[x][i]
+        std::vector<ArrayColumn*> arrays(columns.size());
+        std::vector<NullData*> array_nulls(columns.size());
+        std::vector<std::vector<uint32_t>*> array_offsets(columns.size());
+        std::vector<NullableColumn*> nullable_arrays(columns.size());
+        auto old_size = columns[0]->size();
+        for (auto j = 0; j < columns.size(); ++j) {
+            nullable_arrays[j] = down_cast<NullableColumn*>(columns[j].get());
+            arrays[j] = down_cast<ArrayColumn*>(nullable_arrays[j]->data_column().get());
+            arrays[j]->reserve(old_size + chunk_size);
+            array_nulls[j] = &(nullable_arrays[j]->null_column_data());
+            array_nulls[j]->resize(old_size + chunk_size);
+            array_offsets[j] = &(arrays[j]->offsets_column()->get_data());
+        }
+        for (auto i = 0; i < chunk_size; i++) {
+            if (null_data[i]) {
+                for (auto j = 0; j < columns.size(); ++j) {
+                    (*array_nulls[j])[i + old_size] = 1;
+                    array_offsets[j]->push_back(array_offsets[j]->back());
+                }
+            } else {
+                for (auto j = 0; j < columns.size(); ++j) {
+                    (*array_nulls[j])[i + old_size] = 0;
+                    arrays[j]->elements_column()->append_datum(src[j]->get(i));
+                    array_offsets[j]->push_back(array_offsets[j]->back() + 1);
+                }
+            }
+        }
+        for (auto j = 0; j < columns.size(); ++j) {
+            nullable_arrays[j]->update_has_null();
+        }
+    }
+
+    // group_concat(a, b order by c, d), output a,b,',' by c and d, but ignore the last separator ','
+    // empty state return null, other output row by row.
+    // note as output columns and order-by columns are put in group-by clause if specify DISTINCT, so here need to do
+    // distinct further after order by data columns.
+    void finalize_to_column(FunctionContext* ctx, ConstAggDataPtr __restrict state, Column* to) const override {
+        auto defer = DeferOp([&]() {
+            if (ctx->has_error() && to != nullptr) {
+                to->append_default();
+            }
+        });
+        if (UNLIKELY(!(ColumnHelper::get_data_column(to)->is_binary()))) {
+            ctx->set_error(std::string("The output column of " + get_name() +
+                                       " finalize_to_column() is not string, but is " + to->get_name())
+                                   .c_str(),
+                           false);
+        }
+        auto& state_impl = this->data(state);
+        if (state_impl.data_columns == nullptr) {
+            to->append_default();
+            return;
+        }
+        auto elem_size = (*state_impl.data_columns)[0]->size();
+        if (elem_size == 0) {
+            to->append_default();
+            return;
+        }
+        auto output_col_num = state_impl.output_col_num + 1; // include sep
+        Columns outputs(output_col_num);
+        for (auto i = 0; i < output_col_num; ++i) {
+            outputs[i] = (*state_impl.data_columns)[i];
+        }
+        // order by
+        if (!ctx->get_is_asc_order().empty()) {
+            for (auto i = 0; i < output_col_num; ++i) {
+                outputs[i] = (*state_impl.data_columns)[i]->clone_empty();
+            }
+            Permutation perm;
+            Columns order_by_columns;
+            SortDescs sort_desc(ctx->get_is_asc_order(), ctx->get_nulls_first());
+            order_by_columns.assign(state_impl.data_columns->begin() + output_col_num, state_impl.data_columns->end());
+            Status st = sort_and_tie_columns(ctx->state()->cancelled_ref(), order_by_columns, sort_desc, &perm);
+            // release order-by columns early
+            order_by_columns.clear();
+            state_impl.release_order_by_columns();
+            if (UNLIKELY(ctx->state()->cancelled_ref())) {
+                ctx->set_error("group_concat detects cancelled.", false);
+                return;
+            }
+            if (UNLIKELY(!st.ok())) {
+                ctx->set_error(st.to_string().c_str(), false);
+                return;
+            }
+            for (auto i = 0; i < output_col_num; ++i) {
+                materialize_column_by_permutation(outputs[i].get(), {(*state_impl.data_columns)[i]}, perm);
+            }
+        }
+        // further remove duplicated values, pick the last unique one to identify the last sep and don't output it.
+        // TODO(fzh) optimize it later
+        std::vector<bool> duplicated(outputs[0]->size(), false);
+        if (ctx->get_is_distinct()) {
+            for (auto row_id = 0; row_id < elem_size; row_id++) {
+                bool is_duplicated = false;
+                for (auto next_id = row_id + 1; next_id < elem_size; next_id++) {
+                    bool tmp_duplicated = true;
+                    for (auto col_id = 0; col_id < output_col_num - 1; col_id++) { // exclude sep
+                        if (!outputs[col_id]->equals(next_id, *outputs[col_id], row_id)) {
+                            tmp_duplicated = false;
+                            break;
+                        }
+                    }
+                    if (tmp_duplicated) {
+                        is_duplicated = true;
+                        break;
+                    }
+                }
+                duplicated[row_id] = is_duplicated;
+            }
+        }
+        // copy col_0, col_1 ... col_n row by row
+        auto* string = down_cast<BinaryColumn*>(ColumnHelper::get_data_column(to));
+        if (to->is_nullable()) {
+            down_cast<NullableColumn*>(to)->null_column_data().emplace_back(0);
+        }
+        Bytes& bytes = string->get_bytes();
+        size_t offset = bytes.size();
+        size_t length = 0;
+        std::vector<BinaryColumn*> binary_cols(output_col_num);
+        for (auto i = 0; i < output_col_num; ++i) {
+            auto tmp = ColumnHelper::get_data_column(outputs[i].get());
+            binary_cols[i] = down_cast<BinaryColumn*>(tmp);
+            length += binary_cols[i]->get_bytes().size();
+        }
+
+        bytes.resize(offset + length);
+        bool overflow = false;
+        size_t limit = ctx->get_group_concat_max_len() + offset;
+        for (auto j = 0; j < elem_size && !overflow; ++j) {
+            if (duplicated[j]) {
+                continue;
+            }
+            for (auto i = 0; i < output_col_num && !overflow; ++i) {
+                if (j + 1 == elem_size && i + 1 == output_col_num) { // ignore the last separator
+                    continue;
+                }
+                if (UNLIKELY(i + 1 < output_col_num && binary_cols[i]->is_null(j))) {
+                    ctx->set_error("group_concat mustn't output null", false);
+                    return;
+                }
+                auto str = binary_cols[i]->get_slice(j);
+                if (offset + str.get_size() <= limit) {
+                    memcpy(bytes.data() + offset, str.get_data(), str.get_size());
+                    offset += str.get_size();
+                    overflow = offset == limit;
+                } else { // make the last utf8 character valid
+                    std::vector<size_t> index;
+                    get_utf8_index(str, &index);
+                    size_t end = 0;
+                    for (auto id : index) {
+                        if (offset + id > limit) {
+                            break;
+                        }
+                        end = id;
+                    }
+                    memcpy(bytes.data() + offset, str.get_data(), end);
+                    offset += end;
+                    overflow = true;
+                }
+            }
+        }
+        bytes.resize(offset);
+        string->get_offset().emplace_back(offset);
+    }
+
+    std::string get_name() const override { return "group_concat2"; }
+};
+
 } // namespace starrocks
--- a/be/src/exprs/agg/nullable_aggregate.h
+++ b/be/src/exprs/agg/nullable_aggregate.h
@ -26,6 +26,7 @@
 #include "column/nullable_column.h"
 #include "exprs/agg/maxmin.h"
 #include "exprs/function_context.h"
+#include "exprs/function_helper.h"
 #include "simd/simd.h"

 namespace starrocks {
@ -746,6 +747,9 @@ public:
        const Column* data_columns[column_size];

        for (size_t i = 0; i < column_size; i++) {
+            if (columns[i]->only_null()) {
+                return;
+            }
            if (columns[i]->is_nullable()) {
                if (columns[i]->is_null(row_num)) {
                    // If at least one column has a null value in the current row,
@ -772,6 +776,11 @@ public:
    void update_batch_selectively(FunctionContext* ctx, size_t chunk_size, size_t state_offset, const Column** columns,
                                  AggDataPtr* states, const std::vector<uint8_t>& selection) const override {
        auto column_size = ctx->get_num_args();
+        for (size_t i = 0; i < column_size; i++) {
+            if (columns[i]->only_null()) {
+                return;
+            }
+        }
        // This container stores the columns we really pass to the nested function.
        const Column* data_columns[column_size];

@ -863,7 +872,8 @@ public:
            if (i->is_nullable()) {
                has_nullable_column = true;

-                const auto* nullable_column = down_cast<const NullableColumn*>(i.get());
+                const auto* nullable_column = down_cast<const NullableColumn*>(
+                        ColumnHelper::unpack_and_duplicate_const_column(i->size(), i).get());
                data_columns.emplace_back(nullable_column->data_column());
                if (i->has_null()) {
                    dst_nullable_column->set_has_null(true);
--- a/be/src/exprs/function_context.cpp
+++ b/be/src/exprs/function_context.cpp
@ -43,7 +43,7 @@ FunctionContext* FunctionContext::create_context(RuntimeState* state, MemPool* p
 FunctionContext* FunctionContext::create_context(RuntimeState* state, MemPool* pool,
                                                 const FunctionContext::TypeDesc& return_type,
                                                 const std::vector<FunctionContext::TypeDesc>& arg_types,
-                                                 const std::vector<bool>& is_asc_order,
+                                                 bool is_distinct, const std::vector<bool>& is_asc_order,
                                                 const std::vector<bool>& nulls_first) {
    auto* ctx = new FunctionContext();
    ctx->_state = state;
@ -51,6 +51,7 @@ FunctionContext* FunctionContext::create_context(RuntimeState* state, MemPool* p
    ctx->_return_type = return_type;
    ctx->_arg_types = arg_types;
    ctx->_jvm_udaf_ctxs = std::make_unique<JavaUDAFContext>();
+    ctx->_is_distinct = is_distinct;
    ctx->_is_asc_order = is_asc_order;
    ctx->_nulls_first = nulls_first;
    return ctx;
--- a/be/src/exprs/function_context.h
+++ b/be/src/exprs/function_context.h
@ -82,7 +82,7 @@ public:

    static FunctionContext* create_context(RuntimeState* state, MemPool* pool,
                                           const FunctionContext::TypeDesc& return_type,
-                                           const std::vector<FunctionContext::TypeDesc>& arg_types,
+                                           const std::vector<FunctionContext::TypeDesc>& arg_types, bool is_distinct,
                                           const std::vector<bool>& isAscOrder, const std::vector<bool>& nullsFirst);

    ~FunctionContext();
@ -119,10 +119,12 @@ public:

    std::vector<bool> get_is_asc_order() { return _is_asc_order; }
    std::vector<bool> get_nulls_first() { return _nulls_first; }
+    bool get_is_distinct() { return _is_distinct; }
    // for tests
    void set_is_asc_order(const std::vector<bool>& order) { _is_asc_order = order; }
    void set_nulls_first(const std::vector<bool>& nulls) { _nulls_first = nulls; }
    void set_runtime_state(RuntimeState* const state) { _state = state; }
+    void set_is_distinct(bool is_distinct) { _is_distinct = is_distinct; }

    // Returns _constant_columns size
    int get_num_constant_columns() const;
@ -165,6 +167,10 @@ public:

    JavaUDAFContext* udaf_ctxs() { return _jvm_udaf_ctxs.get(); }

+    ssize_t get_group_concat_max_len() { return group_concat_max_len; }
+    // min value is 4, default is 1024
+    void set_group_concat_max_len(ssize_t len) { group_concat_max_len = len < 4 ? 4 : len; }
+
 private:
    friend class ExprContext;

@ -205,6 +211,8 @@ private:

    std::vector<bool> _is_asc_order;
    std::vector<bool> _nulls_first;
+    bool _is_distinct = false;
+    ssize_t group_concat_max_len = 1024;
 };

 } // namespace starrocks
--- a/be/test/exprs/agg/aggregate_test.cpp
+++ b/be/test/exprs/agg/aggregate_test.cpp
@ -2122,4 +2122,291 @@ TEST_F(AggregateTest, test_array_agg) {
    }
 }

+TEST_F(AggregateTest, test_group_concatV2) {
+    std::vector<FunctionContext::TypeDesc> arg_types = {
+            AnyValUtil::column_type_to_type_desc(TypeDescriptor::from_logical_type(TYPE_VARCHAR)),
+            AnyValUtil::column_type_to_type_desc(TypeDescriptor::from_logical_type(TYPE_VARCHAR)),
+            AnyValUtil::column_type_to_type_desc(TypeDescriptor::from_logical_type(TYPE_INT))};
+
+    auto return_type = AnyValUtil::column_type_to_type_desc(TypeDescriptor::from_logical_type(TYPE_VARCHAR));
+    std::unique_ptr<RuntimeState> runtime_state = std::make_unique<RuntimeState>();
+    std::unique_ptr<FunctionContext> local_ctx(FunctionContext::create_test_context(std::move(arg_types), return_type));
+    std::vector<bool> is_asc_order{0};
+    std::vector<bool> nulls_first{1};
+    local_ctx->set_is_asc_order(is_asc_order);
+    local_ctx->set_nulls_first(nulls_first);
+    local_ctx->set_runtime_state(runtime_state.get());
+
+    const AggregateFunction* gc_func = get_aggregate_function("group_concat2", TYPE_BIGINT, TYPE_VARCHAR, true);
+    auto state = ManagedAggrState::create(local_ctx.get(), gc_func);
+
+    // nullable columns input
+    {
+        auto char_type = TypeDescriptor::create_varchar_type(30);
+        auto char_column = ColumnHelper::create_column(char_type, true);
+        char_column->append_datum(Datum());
+        char_column->append_datum("bcd");
+        char_column->append_datum("cdrdfe");
+        char_column->append_datum(Datum());
+        char_column->append_datum("esfg");
+
+        auto sep_column = ColumnHelper::create_const_column<TYPE_VARCHAR>(",", 5);
+
+        auto int_type = TypeDescriptor::from_logical_type(LogicalType::TYPE_INT);
+        auto int_column = ColumnHelper::create_column(int_type, true);
+        int_column->append_datum(Datum());
+        int_column->append_datum(9);
+        int_column->append_datum(Datum());
+        int_column->append_datum(7);
+        int_column->append_datum(6);
+
+        std::vector<const Column*> raw_columns;
+        std::vector<ColumnPtr> columns;
+        columns.push_back(char_column);
+        columns.push_back(sep_column);
+        columns.push_back(int_column);
+        raw_columns.resize(3);
+        raw_columns[0] = char_column.get();
+        raw_columns[1] = sep_column.get();
+        raw_columns[2] = int_column.get();
+
+        // test update
+        gc_func->update_batch_single_state(local_ctx.get(), int_column->size(), raw_columns.data(), state->state());
+        auto agg_state = (GroupConcatAggregateStateV2*)(state->state());
+        ASSERT_EQ(agg_state->data_columns->size(), 3);
+        // data_columns in state are nullable
+        ASSERT_EQ((*agg_state->data_columns)[0]->debug_string(), "['bcd', 'cdrdfe', 'esfg']");
+        ASSERT_EQ((*agg_state->data_columns)[1]->debug_string(), "[',', ',', ',']");
+        ASSERT_EQ((*agg_state->data_columns)[2]->debug_string(), "[9, NULL, 6]");
+
+        TypeDescriptor type_array_char;
+        type_array_char.type = LogicalType::TYPE_ARRAY;
+        type_array_char.children.emplace_back(TypeDescriptor(LogicalType::TYPE_VARCHAR));
+
+        TypeDescriptor type_array_int;
+        type_array_int.type = LogicalType::TYPE_ARRAY;
+        type_array_int.children.emplace_back(TypeDescriptor(LogicalType::TYPE_INT));
+
+        TypeDescriptor type_struct_char_int;
+        type_struct_char_int.type = LogicalType::TYPE_STRUCT;
+        type_struct_char_int.children.emplace_back(type_array_char);
+        type_struct_char_int.children.emplace_back(type_array_char);
+        type_struct_char_int.children.emplace_back(type_array_int);
+        type_struct_char_int.field_names.emplace_back("vchar");
+        type_struct_char_int.field_names.emplace_back("sep");
+        type_struct_char_int.field_names.emplace_back("int");
+        auto res_struct_col = ColumnHelper::create_column(type_struct_char_int, true);
+        gc_func->serialize_to_column(local_ctx.get(), state->state(), res_struct_col.get());
+        ASSERT_EQ(res_struct_col->debug_string(), "[{vchar:['bcd','cdrdfe','esfg'],sep:[',',',',','],int:[9,NULL,6]}]");
+
+        res_struct_col->resize(0);
+        gc_func->convert_to_serialize_format(local_ctx.get(), columns, int_column->size(), &res_struct_col);
+        ASSERT_EQ(res_struct_col->debug_string(),
+                  "[NULL, {vchar:['bcd'],sep:[','],int:[9]}, {vchar:['cdrdfe'],sep:[','],int:[NULL]}, "
+                  "NULL, {vchar:['esfg'],sep:[','],int:[6]}]");
+
+        auto res_col = ColumnHelper::create_column(char_type, false);
+        gc_func->finalize_to_column(local_ctx.get(), state->state(), res_col.get());
+        ASSERT_EQ(res_col->debug_string(), "['cdrdfe,bcd,esfg']");
+    }
+    // not nullable columns input
+    gc_func = get_aggregate_function("group_concat2", TYPE_BIGINT, TYPE_VARCHAR, false);
+    state = ManagedAggrState::create(local_ctx.get(), gc_func);
+    {
+        auto char_type = TypeDescriptor::create_varchar_type(30);
+        auto char_column = ColumnHelper::create_column(char_type, false);
+        char_column->append_datum("");
+        char_column->append_datum("bcd");
+        char_column->append_datum("cdrdfe");
+        char_column->append_datum("Datum()");
+        char_column->append_datum("esfg");
+
+        auto sep_column = ColumnHelper::create_const_column<TYPE_VARCHAR>(",", 5);
+
+        auto int_type = TypeDescriptor::from_logical_type(LogicalType::TYPE_INT);
+        auto int_column = ColumnHelper::create_column(int_type, false);
+        int_column->append_datum(2);
+        int_column->append_datum(9);
+        int_column->append_datum(5);
+        int_column->append_datum(7);
+        int_column->append_datum(6);
+
+        std::vector<const Column*> raw_columns;
+        std::vector<ColumnPtr> columns;
+        columns.push_back(char_column);
+        columns.push_back(sep_column);
+        columns.push_back(int_column);
+        raw_columns.resize(3);
+        raw_columns[0] = char_column.get();
+        raw_columns[1] = sep_column.get();
+        raw_columns[2] = int_column.get();
+
+        // test update
+        gc_func->update_batch_single_state(local_ctx.get(), int_column->size(), raw_columns.data(), state->state());
+        auto agg_state = (GroupConcatAggregateStateV2*)(state->state());
+        ASSERT_EQ(agg_state->data_columns->size(), 3);
+        // data_columns in state are nullable
+        ASSERT_EQ((*agg_state->data_columns)[0]->debug_string(), char_column->debug_string());
+        ASSERT_EQ((*agg_state->data_columns)[2]->debug_string(), int_column->debug_string());
+
+        TypeDescriptor type_array_char;
+        type_array_char.type = LogicalType::TYPE_ARRAY;
+        type_array_char.children.emplace_back(TypeDescriptor(LogicalType::TYPE_VARCHAR));
+
+        TypeDescriptor type_array_int;
+        type_array_int.type = LogicalType::TYPE_ARRAY;
+        type_array_int.children.emplace_back(TypeDescriptor(LogicalType::TYPE_INT));
+
+        TypeDescriptor type_struct_char_int;
+        type_struct_char_int.type = LogicalType::TYPE_STRUCT;
+        type_struct_char_int.children.emplace_back(type_array_char);
+        type_struct_char_int.children.emplace_back(type_array_char);
+        type_struct_char_int.children.emplace_back(type_array_int);
+        type_struct_char_int.field_names.emplace_back("vchar");
+        type_struct_char_int.field_names.emplace_back("sep");
+        type_struct_char_int.field_names.emplace_back("int");
+        auto res_struct_col = ColumnHelper::create_column(type_struct_char_int, true);
+        gc_func->serialize_to_column(local_ctx.get(), state->state(), res_struct_col.get());
+        ASSERT_EQ(res_struct_col->debug_string(),
+                  "[{vchar:['','bcd','cdrdfe','Datum()','esfg'],sep:[',',',',',',',',','],int:[2,9,5,7,6]}]");
+
+        res_struct_col->resize(0);
+        gc_func->convert_to_serialize_format(local_ctx.get(), columns, int_column->size(), &res_struct_col);
+        ASSERT_EQ(res_struct_col->debug_string(),
+                  "[{vchar:[''],sep:[','],int:[2]}, {vchar:['bcd'],sep:[','],int:[9]}, "
+                  "{vchar:['cdrdfe'],sep:[','],int:[5]}, {vchar:['Datum()'],sep:[','],int:[7]}, "
+                  "{vchar:['esfg'],sep:[','],int:[6]}]");
+        auto res_col = ColumnHelper::create_column(char_type, false);
+        gc_func->finalize_to_column(local_ctx.get(), state->state(), res_col.get());
+        ASSERT_EQ(res_col->debug_string(), "['bcd,Datum(),esfg,cdrdfe,']");
+    }
+
+    gc_func = get_aggregate_function("group_concat2", TYPE_BIGINT, TYPE_VARCHAR, true);
+    state = ManagedAggrState::create(local_ctx.get(), gc_func);
+    // append only column + const column
+    {
+        auto char_column = ColumnHelper::create_const_null_column(2);
+        auto int_column = ColumnHelper::create_const_column<TYPE_INT>(3, 2);
+        auto sep_column = ColumnHelper::create_const_column<TYPE_VARCHAR>(",", 2);
+
+        std::vector<const Column*> raw_columns;
+        std::vector<ColumnPtr> columns;
+        columns.push_back(char_column);
+        columns.push_back(sep_column);
+        columns.push_back(int_column);
+        raw_columns.resize(3);
+        raw_columns[0] = char_column.get();
+        raw_columns[1] = sep_column.get();
+        raw_columns[2] = int_column.get();
+
+        // test update
+        gc_func->update_batch_single_state(local_ctx.get(), int_column->size(), raw_columns.data(), state->state());
+        auto agg_state = (GroupConcatAggregateStateV2*)(state->state());
+
+        ASSERT_EQ(agg_state->data_columns->size(), 3);
+
+        TypeDescriptor type_array_char;
+        type_array_char.type = LogicalType::TYPE_ARRAY;
+        type_array_char.children.emplace_back(TypeDescriptor(LogicalType::TYPE_VARCHAR));
+
+        TypeDescriptor type_array_int;
+        type_array_int.type = LogicalType::TYPE_ARRAY;
+        type_array_int.children.emplace_back(TypeDescriptor(LogicalType::TYPE_INT));
+
+        TypeDescriptor type_struct_char_int;
+        type_struct_char_int.type = LogicalType::TYPE_STRUCT;
+        type_struct_char_int.children.emplace_back(type_array_char);
+        type_struct_char_int.children.emplace_back(type_array_char);
+        type_struct_char_int.children.emplace_back(type_array_int);
+        type_struct_char_int.field_names.emplace_back("vchar");
+        type_struct_char_int.field_names.emplace_back("sep");
+        type_struct_char_int.field_names.emplace_back("int");
+        auto res_struct_col = ColumnHelper::create_column(type_struct_char_int, true);
+        gc_func->serialize_to_column(local_ctx.get(), state->state(), res_struct_col.get());
+        ASSERT_EQ(res_struct_col->size(), 1); // empty also need output
+
+        res_struct_col->resize(0);
+        gc_func->convert_to_serialize_format(local_ctx.get(), columns, int_column->size(), &res_struct_col);
+        ASSERT_EQ(res_struct_col->debug_string(), "[NULL, NULL]");
+
+        auto res_col = ColumnHelper::create_column(TypeDescriptor(LogicalType::TYPE_VARCHAR), true);
+        gc_func->finalize_to_column(local_ctx.get(), state->state(), res_col.get());
+        ASSERT_EQ(res_col->debug_string(), "[NULL]");
+    }
+
+    gc_func = get_aggregate_function("group_concat2", TYPE_BIGINT, TYPE_VARCHAR, true);
+    state = ManagedAggrState::create(local_ctx.get(), gc_func);
+
+    // nullable columns input with cancelled
+    {
+        auto char_type = TypeDescriptor::create_varchar_type(30);
+        auto char_column = ColumnHelper::create_column(char_type, true);
+        char_column->append_datum(Datum());
+        char_column->append_datum("bcd");
+        char_column->append_datum("cdrdfe");
+        char_column->append_datum(Datum());
+        char_column->append_datum("esfg");
+
+        auto sep_column = ColumnHelper::create_const_column<TYPE_VARCHAR>(",", 5);
+
+        auto int_type = TypeDescriptor::from_logical_type(LogicalType::TYPE_INT);
+        auto int_column = ColumnHelper::create_column(int_type, true);
+        int_column->append_datum(Datum());
+        int_column->append_datum(9);
+        int_column->append_datum(Datum());
+        int_column->append_datum(7);
+        int_column->append_datum(6);
+
+        std::vector<const Column*> raw_columns;
+        std::vector<ColumnPtr> columns;
+        columns.push_back(char_column);
+        columns.push_back(sep_column);
+        columns.push_back(int_column);
+        raw_columns.resize(3);
+        raw_columns[0] = char_column.get();
+        raw_columns[1] = sep_column.get();
+        raw_columns[2] = int_column.get();
+
+        // test update
+        gc_func->update_batch_single_state(local_ctx.get(), int_column->size(), raw_columns.data(), state->state());
+        auto agg_state = (GroupConcatAggregateStateV2*)(state->state());
+        ASSERT_EQ(agg_state->data_columns->size(), 3);
+        // data_columns in state are nullable
+        ASSERT_EQ((*agg_state->data_columns)[0]->debug_string(), "['bcd', 'cdrdfe', 'esfg']");
+        ASSERT_EQ((*agg_state->data_columns)[1]->debug_string(), "[',', ',', ',']");
+        ASSERT_EQ((*agg_state->data_columns)[2]->debug_string(), "[9, NULL, 6]");
+
+        TypeDescriptor type_array_char;
+        type_array_char.type = LogicalType::TYPE_ARRAY;
+        type_array_char.children.emplace_back(TypeDescriptor(LogicalType::TYPE_VARCHAR));
+
+        TypeDescriptor type_array_int;
+        type_array_int.type = LogicalType::TYPE_ARRAY;
+        type_array_int.children.emplace_back(TypeDescriptor(LogicalType::TYPE_INT));
+
+        TypeDescriptor type_struct_char_int;
+        type_struct_char_int.type = LogicalType::TYPE_STRUCT;
+        type_struct_char_int.children.emplace_back(type_array_char);
+        type_struct_char_int.children.emplace_back(type_array_char);
+        type_struct_char_int.children.emplace_back(type_array_int);
+        type_struct_char_int.field_names.emplace_back("vchar");
+        type_struct_char_int.field_names.emplace_back("sep");
+        type_struct_char_int.field_names.emplace_back("int");
+        auto res_struct_col = ColumnHelper::create_column(type_struct_char_int, true);
+        gc_func->serialize_to_column(local_ctx.get(), state->state(), res_struct_col.get());
+        ASSERT_EQ(res_struct_col->debug_string(), "[{vchar:['bcd','cdrdfe','esfg'],sep:[',',',',','],int:[9,NULL,6]}]");
+
+        res_struct_col->resize(0);
+        gc_func->convert_to_serialize_format(local_ctx.get(), columns, int_column->size(), &res_struct_col);
+        ASSERT_EQ(res_struct_col->debug_string(),
+                  "[NULL, {vchar:['bcd'],sep:[','],int:[9]}, {vchar:['cdrdfe'],sep:[','],int:[NULL]}, "
+                  "NULL, {vchar:['esfg'],sep:[','],int:[6]}]");
+
+        auto res_col = ColumnHelper::create_column(char_type, false);
+        local_ctx->state()->set_is_cancelled(true);
+        gc_func->finalize_to_column(local_ctx.get(), state->state(), res_col.get());
+        ASSERT_TRUE(local_ctx->has_error());
+    }
+}
+
 } // namespace starrocks
--- a/docs/sql-reference/sql-functions/string-functions/group_concat.md
+++ b/docs/sql-reference/sql-functions/string-functions/group_concat.md
@ -2,70 +2,105 @@

 ## Description

-This is an aggregate function similar to sum(). group_concat concatenates non-null values into one string, with the second argument `sep` being the separator. The second argument can also be omitted. This function usually needs to be used along with `group by`.
-
-> Please note that strings may not be concatenated in sequence because it uses distributed computing.
+group_concat concatenates non-null values into one string from a group, with a separator argument, which is ',' by default if not specified.

 ## Syntax

-```Haskell
-VARCHAR group_concat(VARCHAR str[, VARCHAR sep])
+```SQL
+VARCHAR GROUP_CONCAT([DISTINCT] expr [,expr ...]
+             [ORDER BY {unsigned_integer | col_name | expr}
+                 [ASC | DESC] [,col_name ...]]
+             [SEPARATOR str_val])
 ```

 ## Parameters

- `str`: the values to concatenate. It must evaluate to VARCHAR.
- `sep`: the separator, optional. If it is not specified, `, ` (a comma and a space) is used by default.
+- `expr`: the values to concatenate, ignoring null. It should be cast to VARCHAR. They can be optionally specified `DISTINCT` to eliminate duplicate values. More `expr` are concatenated directly, use `concat()` or `concat_ws` to specify formats.
+- order-by items can be unsigned integers (identify `expr`, starting from 1), column names or normal expressions. To sort in reverse order, add the DESC (descending) keyword to the name of the column you are sorting by in the ORDER BY clause. The default is ascending order; this may be specified explicitly using the ASC keyword
+- `str_val`: the optional separator is used to concat non-null values from different rows. If it is not specified, `,` (a comma) is used by default.

 ## Return value

-Returns a VARCHAR value.
+Returns a string value for each group, but returns NULL if there are no non-NULL values.
+
+set `group_concat_max_len` to limit the length of output string from a group, its default value is 1024, minimal value is 4.

 ## Examples

 ```sql
-CREATE TABLE IF NOT EXISTS group_concat (
-    id        tinyint(4)      NULL,
-    value   varchar(65533)  NULL
+CREATE TABLE `ss` (
+  `id` int(11) NULL COMMENT "",
+  `name` varchar(255) NULL COMMENT "",
+  `subject` varchar(255) NULL COMMENT "",
+  `score` int(11) NULL COMMENT ""
 ) ENGINE=OLAP
-DISTRIBUTED BY HASH(id);
+DUPLICATE KEY(`id`)
+DISTRIBUTED BY HASH(`id`) BUCKETS 4
+PROPERTIES (
+"replication_num" = "1"
+);

-INSERT INTO group_concat VALUES
-(1,'fruit'),
-(2,'drinks'),
-(3,null),
-(4,'fruit'),
-(5,'meat'),
-(6,'seafood');
-
-select * from group_concat order by id;
-+------+---------+
-| id   | value   |
-+------+---------+
-|    1 | fruit   |
-|    2 | drinks  |
-|    3 | NULL    |
-|    4 | fruit   |
-|    5 | meat    |
-|    6 | seafood |
+insert into ss values (1,"Tom","English",90);
+insert into ss values (1,"Tom","Math",80);
+insert into ss values (2,"Tom","English",NULL);
+insert into ss values (2,"Tom",NULL,NULL);
+insert into ss values (3,"May",NULL,NULL);
+insert into ss values (3,"Ti","English",98);
+insert into ss values (4,NULL,NULL,NULL);
+insert into ss values (NULL,"Ti","Phy",98);
 ```

 ```sql
-select group_concat(value) from group_concat;
-+-------------------------------------+
-| group_concat(value)                 |
-+-------------------------------------+
-| meat, fruit, seafood, fruit, drinks |
-+-------------------------------------+
+select id, group_concat(distinct name,subject order by score) as res from ss group by id order by id;
+------+--------------------+
+| id   | res                |
+------+--------------------+
+| NULL | TiPhy              |
+|    1 | TomMath,TomEnglish |
+|    2 | NULL               |
+|    3 | TiEnglish          |
+|    4 | NULL               |
+------+--------------------+

-MySQL > select group_concat(value, " ") from group_concat;
-+---------------------------------+
-| group_concat(value, ' ')        |
-+---------------------------------+
-| fruit meat fruit drinks seafood |
-+---------------------------------+
+mysql> select id, group_concat(distinct concat(name,'-',subject) order by score) as res from ss group by id order by id;
+------+----------------------+
+| id   | res                  |
+------+----------------------+
+| NULL | Ti-Phy               |
+|    1 | Tom-Math,Tom-English |
+|    2 | NULL                 |
+|    3 | Ti-English           |
+|    4 | NULL                 |
+------+----------------------+
+    
+select group_concat(name) as res from ss;
+---------------------------+
+| res                       |
+---------------------------+
+| Tom,Tom,Ti,Tom,Tom,May,Ti |
+---------------------------+
+
+select group_concat(distinct name) as res from ss where id < 0;
+------+
+| res  |
+------+
+| NULL |
+------+
+ 
+set group_concat_max_len = 6;
+
+select id, group_concat(distinct name,subject order by score) as res from ss group by id order by id;
+------+--------+
+| id   | res    |
+------+--------+
+| NULL | TiPhy  |
+|    1 | TomMat |
+|    2 | NULL   |
+|    3 | TiEngl |
+|    4 | NULL   |
+------+--------+
 ```

 ## keyword

-GROUP_CONCAT,GROUP,CONCAT
+GROUP_CONCAT,CONCAT,ARRAY_AGG
--- a/fe/fe-core/src/main/java/com/starrocks/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/com/starrocks/analysis/FunctionCallExpr.java
@ -240,14 +240,11 @@ public class FunctionCallExpr extends Expr {
        if (fnParams.isDistinct()) {
            sb.append("DISTINCT ");
        }
-        if (fnParams.getOrderByElements() == null) {
-            sb.append(Joiner.on(", ").join(childrenToSql())).append(")");
-        } else {
-            sb.append(Joiner.on(", ").join(firstNChildrenToSql(
-                    children.size() - fnParams.getOrderByElements().size())));
+        sb.append(Joiner.on(", ").join(firstNChildrenToSql(children.size() - fnParams.getOrderByElemNum())));
+        if (fnParams.getOrderByElements() != null) {
            sb.append(fnParams.getOrderByStringToSql());
-            sb.append(')');
        }
+        sb.append(')');
        return sb.toString();
    }

--- a/fe/fe-core/src/main/java/com/starrocks/analysis/FunctionParams.java
+++ b/fe/fe-core/src/main/java/com/starrocks/analysis/FunctionParams.java
@ -98,6 +98,10 @@ public class FunctionParams implements Writable {
        return orderByElements == null ? null : orderByElements.isEmpty() ? null : orderByElements;
    }

+    public int getOrderByElemNum() {
+        return orderByElements == null ? 0 : orderByElements.size();
+    }
+
    public String getOrderByStringToSql() {
        if (orderByElements != null && !orderByElements.isEmpty()) {
            StringBuilder sb = new StringBuilder();
--- a/fe/fe-core/src/main/java/com/starrocks/catalog/AggregateFunction.java
+++ b/fe/fe-core/src/main/java/com/starrocks/catalog/AggregateFunction.java
@ -117,6 +117,12 @@ public class AggregateFunction extends Function {
    // True if "NULLS FIRST", false if "NULLS LAST", null if not specified.
    private List<Boolean> nullsFirst;

+    private boolean isDistinct = false;
+
+    public void setIsDistinct(boolean isDistinct) {
+        this.isDistinct = isDistinct;
+    }
+
    // only used for serialization
    protected AggregateFunction() {
    }
@ -200,6 +206,9 @@ public class AggregateFunction extends Function {
        isAggregateFn = other.isAggregateFn;
        returnsNonNullOnEmpty = other.returnsNonNullOnEmpty;
        symbolName = other.symbolName;
+        isAscOrder = other.isAscOrder;
+        nullsFirst = other.nullsFirst;
+        isDistinct = other.isDistinct;
    }

    public String getSymbolName() {
@ -337,6 +346,8 @@ public class AggregateFunction extends Function {
        if (nullsFirst != null && !nullsFirst.isEmpty()) {
            aggFn.setNulls_first(nullsFirst);
        }
+        aggFn.setIs_distinct(isDistinct);
+
        aggFn.setSymbol(getSymbolName());
        fn.setAggregate_fn(aggFn);
        return fn;
--- a/fe/fe-core/src/main/java/com/starrocks/catalog/FunctionSet.java
+++ b/fe/fe-core/src/main/java/com/starrocks/catalog/FunctionSet.java
@ -826,6 +826,10 @@ public class FunctionSet {
                Lists.newArrayList(Type.ANY_ELEMENT), Type.ANY_ARRAY, Type.ANY_STRUCT, true,
                true, false, false));

+        addBuiltin(AggregateFunction.createBuiltin(GROUP_CONCAT,
+                Lists.newArrayList(Type.ANY_ELEMENT), Type.VARCHAR, Type.ANY_STRUCT, true,
+                false, false, false));
+
        for (Type t : Type.getSupportedTypes()) {
            if (t.isFunctionType()) {
                continue;
@ -971,14 +975,6 @@ public class FunctionSet {
        addBuiltin(AggregateFunction.createBuiltin(RETENTION, Lists.newArrayList(Type.ARRAY_BOOLEAN),
                Type.ARRAY_BOOLEAN, Type.BIGINT, false, false, false));

-        // Group_concat(string)
-        addBuiltin(AggregateFunction.createBuiltin(GROUP_CONCAT,
-                Lists.newArrayList(Type.VARCHAR), Type.VARCHAR, Type.VARCHAR,
-                false, false, false));
-        // Group_concat(string, string)
-        addBuiltin(AggregateFunction.createBuiltin(GROUP_CONCAT,
-                Lists.newArrayList(Type.VARCHAR, Type.VARCHAR), Type.VARCHAR, Type.VARCHAR,
-                false, false, false));

        // Type.DATE must before Type.DATATIME, because DATE could be considered as DATETIME.
        addBuiltin(AggregateFunction.createBuiltin(WINDOW_FUNNEL,
--- a/fe/fe-core/src/main/java/com/starrocks/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/com/starrocks/qe/SessionVariable.java
@ -1190,7 +1190,7 @@ public class SessionVariable implements Serializable, Writable, Cloneable {
    private boolean quoteShowCreate = true; // Defined but unused now, for compatibility with MySQL

    @VariableMgr.VarAttr(name = GROUP_CONCAT_MAX_LEN)
-    private long groupConcatMaxLen = 65535;
+    private long groupConcatMaxLen = 1024;

    @VariableMgr.VarAttr(name = FULL_SORT_MAX_BUFFERED_ROWS, flag = VariableMgr.INVISIBLE)
    private long fullSortMaxBufferedRows = 1024000;
@ -2461,6 +2461,7 @@ public class SessionVariable implements Serializable, Writable, Cloneable {
        }

        tResult.setTransmission_encode_level(transmissionEncodeLevel);
+        tResult.setGroup_concat_max_len(groupConcatMaxLen);
        tResult.setRpc_http_min_size(rpcHttpMinSize);

        TCompressionType loadCompressionType =
--- a/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/AstToStringBuilder.java
+++ b/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/AstToStringBuilder.java
@ -917,12 +917,25 @@ public class AstToStringBuilder {
                StringLiteral boundary = (StringLiteral) node.getChild(3);
                sb.append(", ").append(boundary.getValue());
                sb.append(")");
-            } else if (functionName.equalsIgnoreCase(FunctionSet.ARRAY_AGG)) {
-                sb.append(visit(node.getChild(0)));
+            } else if (functionName.equals(FunctionSet.ARRAY_AGG) || functionName.equals(FunctionSet.GROUP_CONCAT)) {
+                int end = 1;
+                if (functionName.equals(FunctionSet.GROUP_CONCAT)) {
+                    end = fnParams.exprs().size() - fnParams.getOrderByElemNum() - 1;
+                }
+                for (int i = 0; i < end; ++i) {
+                    if (i != 0) {
+                        sb.append(",");
+                    }
+                    sb.append(visit(node.getChild(i)));
+                }
                List<OrderByElement> sortClause = fnParams.getOrderByElements();
                if (sortClause != null) {
                    sb.append(" ORDER BY ").append(visitAstList(sortClause));
                }
+                if (functionName.equals(FunctionSet.GROUP_CONCAT) && end < node.getChildren().size() && end > 0) {
+                    sb.append(" SEPARATOR ");
+                    sb.append(visit(node.getChild(end)));
+                }
                sb.append(")");
            } else {
                List<String> p = node.getChildren().stream().map(this::visit).collect(Collectors.toList());
--- a/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/ExpressionAnalyzer.java
+++ b/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/ExpressionAnalyzer.java
@ -967,7 +967,7 @@ public class ExpressionAnalyzer {
                fn = fn.copy();
                fn.setArgsType(argumentTypes); // as accepting various types
                fn.setIsNullable(false);
-            } else if (fnName.equals(FunctionSet.ARRAY_AGG)) {
+            } else if (fnName.equals(FunctionSet.ARRAY_AGG) || fnName.equals(FunctionSet.GROUP_CONCAT)) {
                // move order by expr to node child, and extract is_asc and null_first information.
                fn = Expr.getBuiltinFunction(fnName, new Type[] {argumentTypes[0]},
                        Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
@ -984,6 +984,9 @@ public class ExpressionAnalyzer {
                Type[] argsTypes = new Type[argumentTypes.length];
                for (int i = 0; i < argumentTypes.length; ++i) {
                    argsTypes[i] = argumentTypes[i] == Type.NULL ? Type.BOOLEAN : argumentTypes[i];
+                    if (fnName.equals(FunctionSet.GROUP_CONCAT) && i < node.getChildren().size() - isAscOrder.size()) {
+                        argsTypes[i] = Type.VARCHAR;
+                    }
                }
                fn.setArgsType(argsTypes); // as accepting various types
                ArrayList<Type> structTypes = new ArrayList<>(argsTypes.length);
@ -993,7 +996,20 @@ public class ExpressionAnalyzer {
                ((AggregateFunction) fn).setIntermediateType(new StructType(structTypes));
                ((AggregateFunction) fn).setIsAscOrder(isAscOrder);
                ((AggregateFunction) fn).setNullsFirst(nullsFirst);
-                fn.setRetType(new ArrayType(argsTypes[0]));     // return null if scalar agg with empty input
+                if (fnName.equals(FunctionSet.ARRAY_AGG)) {
+                    fn.setRetType(new ArrayType(argsTypes[0]));     // return null if scalar agg with empty input
+                } else {
+                    boolean outputConst = true;
+                    for (int i = 0; i < node.getChildren().size() - isAscOrder.size() - 1; i++) {
+                        if (!node.getChild(i).isConstant()) {
+                            outputConst = false;
+                            break;
+                        }
+                    }
+                    ((AggregateFunction) fn).setIsDistinct(node.getParams().isDistinct() &&
+                            (!isAscOrder.isEmpty() || outputConst));
+                    fn.setRetType(Type.VARCHAR);
+                }
            } else if (FunctionSet.PERCENTILE_DISC.equals(fnName)) {
                argumentTypes[1] = Type.DOUBLE;
                fn = Expr.getBuiltinFunction(fnName, argumentTypes, Function.CompareMode.IS_IDENTICAL);
@ -1216,10 +1232,18 @@ public class ExpressionAnalyzer {
                                node.getChild(1).getType().toString() + "  can't cast to ARRAY<BOOL>");
                    }
                    break;
+                case FunctionSet.GROUP_CONCAT:
                case FunctionSet.ARRAY_AGG: {
-                    for (int i = 1; i < argumentTypes.length; ++i) {
-                        if (argumentTypes[i].isComplexType()) {
-                            throw new SemanticException("array_agg can't support order by nested types, " +
+                    if (node.getChildren().size() == 0) {
+                        throw new SemanticException(fnName + " should have at least one input", node.getPos());
+                    }
+                    int start = 1;
+                    if (fnName.equals(FunctionSet.GROUP_CONCAT)) {
+                        start = argumentTypes.length - node.getParams().getOrderByElemNum();
+                    }
+                    for (int i = start; i < argumentTypes.length; ++i) {
+                        if (argumentTypes[i].isComplexType() || argumentTypes[i].isJsonType()) {
+                            throw new SemanticException(fnName + " can't support order by nested types, " +
                                    "but " + i + "-th input is " + argumentTypes[i].toSql());
                        }
                    }
--- a/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/FunctionAnalyzer.java
+++ b/fe/fe-core/src/main/java/com/starrocks/sql/analyzer/FunctionAnalyzer.java
@ -121,30 +121,18 @@ public class FunctionAnalyzer {
        }

        if (fnName.getFunction().equals(FunctionSet.GROUP_CONCAT)) {
-            if (functionCallExpr.getChildren().size() > 2 || functionCallExpr.getChildren().isEmpty()) {
+            if (functionCallExpr.getChildren().size() - fnParams.getOrderByElemNum() < 2) {
                throw new SemanticException(
-                        "group_concat requires one or two parameters: " + functionCallExpr.toSql(),
+                        "group_concat requires at least one parameter: " + functionCallExpr.toSql(),
                        functionCallExpr.getPos());
            }

-            if (fnParams.isDistinct()) {
-                throw new SemanticException("group_concat does not support DISTINCT", functionCallExpr.getPos());
-            }
-
-            Expr arg0 = functionCallExpr.getChild(0);
-            if (!arg0.getType().isStringType() && !arg0.getType().isNull()) {
+            int sepPos = functionCallExpr.getParams().exprs().size() - functionCallExpr.getParams().getOrderByElemNum() - 1;
+            Expr arg1 = functionCallExpr.getChild(sepPos);
+            if (!arg1.getType().isStringType() && !arg1.getType().isNull()) {
                throw new SemanticException(
-                        "group_concat requires first parameter to be of getType() STRING: " + functionCallExpr.toSql(),
-                        arg0.getPos());
-            }
-
-            if (functionCallExpr.getChildren().size() == 2) {
-                Expr arg1 = functionCallExpr.getChild(1);
-                if (!arg1.getType().isStringType() && !arg1.getType().isNull()) {
-                    throw new SemanticException(
-                            "group_concat requires second parameter to be of getType() STRING: " +
-                                    functionCallExpr.toSql(), arg1.getPos());
-                }
+                        "group_concat requires separator to be of getType() STRING: " +
+                                functionCallExpr.toSql(), arg1.getPos());
            }
            return;
        }
--- a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/AggType.java
+++ b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/operator/AggType.java
@ -42,6 +42,10 @@ public enum AggType {
        return this.equals(AggType.GLOBAL);
    }

+    public boolean isAnyGlobal() {
+        return this.equals(AggType.GLOBAL) || this.equals(AggType.DISTINCT_GLOBAL);
+    }
+
    public boolean isDistinctGlobal() {
        return this.equals(AggType.DISTINCT_GLOBAL);
    }
--- a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/RewriteMultiDistinctByCTERule.java
+++ b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/RewriteMultiDistinctByCTERule.java
@ -279,11 +279,14 @@ public class RewriteMultiDistinctByCTERule extends TransformationRule {
        for (ColumnRefOperator distinctAggRef : distinctAggList) {
            CallOperator aggCallOperator = aggregate.getAggregations().get(distinctAggRef);
            if (aggCallOperator.getFnName().equalsIgnoreCase(FunctionSet.COUNT) ||
-                    aggCallOperator.getFnName().equalsIgnoreCase(FunctionSet.SUM)) {
+                    aggCallOperator.getFnName().equalsIgnoreCase(FunctionSet.SUM) ||
+                    aggCallOperator.getFnName().equalsIgnoreCase(FunctionSet.GROUP_CONCAT)) {
                allCteConsumes.offer(buildCountSumDistinctCTEConsume(distinctAggRef, aggCallOperator,
                        aggregate, cteProduce, factory));
                consumeAggCallMap.put(aggCallOperator, distinctAggRef);
                projectionMap.put(distinctAggRef, distinctAggRef);
+            } else if (!aggCallOperator.getFnName().equals(FunctionSet.AVG)) {
+                throw new UnsupportedOperationException(aggCallOperator.getFnName() + " does not distinct.");
            }
        }

--- a/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/SplitAggregateRule.java
+++ b/fe/fe-core/src/main/java/com/starrocks/sql/optimizer/rule/transformation/SplitAggregateRule.java
@ -317,6 +317,8 @@ public class SplitAggregateRule extends TransformationRule {
                    fnCall.getFunction(), fnCall.getChild(0).getType());
            return new CallOperator(
                    FunctionSet.MULTI_DISTINCT_SUM, fnCall.getType(), fnCall.getChildren(), multiDistinctSumFn, false);
+        } else if (functionName.equals(FunctionSet.GROUP_CONCAT)) {
+            return fnCall;
        }
        return null;
    }
@ -599,8 +601,8 @@ public class SplitAggregateRule extends TransformationRule {
            ColumnRefOperator column = entry.getKey();
            CallOperator aggregation = entry.getValue();
            CallOperator callOperator;
+            Type intermediateType = getIntermediateType(aggregation);
            if (!aggregation.isDistinct()) {
-                Type intermediateType = getIntermediateType(aggregation);
                List<ScalarOperator> arguments =
                        Lists.newArrayList(new ColumnRefOperator(column.getId(), intermediateType, column.getName(),
                                aggregation.isNullable()));
@ -633,7 +635,8 @@ public class SplitAggregateRule extends TransformationRule {
                            Lists.newArrayList(newChildren), fn);
                }
                // Remove distinct
-                callOperator = new CallOperator(aggregation.getFnName(), aggregation.getType(),
+                callOperator = new CallOperator(aggregation.getFnName(), aggType.isAnyGlobal() ?
+                        aggregation.getType() : intermediateType,
                        aggregation.getChildren(), aggregation.getFunction());
            }
            newAggregationMap.put(column, callOperator);
--- a/fe/fe-core/src/main/java/com/starrocks/sql/parser/AstBuilder.java
+++ b/fe/fe-core/src/main/java/com/starrocks/sql/parser/AstBuilder.java
@ -435,6 +435,7 @@ import java.util.TreeMap;
 import java.util.stream.Collectors;

 import static com.starrocks.sql.common.ErrorMsgProxy.PARSER_ERROR_MSG;
+import static java.lang.String.format;
 import static java.util.stream.Collectors.toList;

 public class AstBuilder extends StarRocksBaseVisitor<ParseNode> {
@ -5675,6 +5676,7 @@ public class AstBuilder extends StarRocksBaseVisitor<ParseNode> {
    public ParseNode visitAggregationFunctionCall(StarRocksParser.AggregationFunctionCallContext context) {
        NodePosition pos = createPos(context);
        String functionName;
+        boolean isGroupConcat = false;
        if (context.aggregationFunction().COUNT() != null) {
            functionName = FunctionSet.COUNT;
        } else if (context.aggregationFunction().AVG() != null) {
@ -5685,6 +5687,9 @@ public class AstBuilder extends StarRocksBaseVisitor<ParseNode> {
            functionName = FunctionSet.MIN;
        } else if (context.aggregationFunction().ARRAY_AGG() != null) {
            functionName = FunctionSet.ARRAY_AGG;
+        } else if (context.aggregationFunction().GROUP_CONCAT() != null) {
+            functionName = FunctionSet.GROUP_CONCAT;
+            isGroupConcat = true;
        } else {
            functionName = FunctionSet.MAX;
        }
@ -5706,11 +5711,36 @@ public class AstBuilder extends StarRocksBaseVisitor<ParseNode> {
        if (isDistinct && CollectionUtils.isEmpty(context.aggregationFunction().expression())) {
            throw new ParsingException(PARSER_ERROR_MSG.wrongNumOfArgs(functionName), pos);
        }
-
+        List<Expr> exprs = visit(context.aggregationFunction().expression(), Expr.class);
+        if (isGroupConcat && !exprs.isEmpty() && context.aggregationFunction().SEPARATOR() == null) {
+            Expr sepExpr;
+            String sep = ",";
+            sepExpr = new StringLiteral(sep, pos);
+            exprs.add(sepExpr);
+        }
+        if (!orderByElements.isEmpty()) {
+            int exprSize = exprs.size();
+            if (isGroupConcat) { // the last expr of group_concat is the separator
+                exprSize--;
+            }
+            for (OrderByElement orderByElement : orderByElements) {
+                Expr by = orderByElement.getExpr();
+                if (by instanceof IntLiteral) {
+                    long ordinal = ((IntLiteral) by).getLongValue();
+                    if (ordinal < 1 || ordinal > exprSize) {
+                        throw new ParsingException(format("ORDER BY position %s is not in %s output list", ordinal,
+                                functionName), pos);
+                    }
+                    by = exprs.get((int) ordinal - 1);
+                    orderByElement.setExpr(by);
+                }
+            }
+            // remove const order-by items
+            orderByElements = orderByElements.stream().filter(x -> !x.getExpr().isConstant()).collect(toList());
+        }
        FunctionCallExpr functionCallExpr = new FunctionCallExpr(functionName,
                context.aggregationFunction().ASTERISK_SYMBOL() == null ?
-                        new FunctionParams(isDistinct,
-                                visit(context.aggregationFunction().expression(), Expr.class), orderByElements) :
+                        new FunctionParams(isDistinct, exprs, orderByElements) :
                        FunctionParams.createStarParam(), pos);

        functionCallExpr = SyntaxSugars.parse(functionCallExpr);
--- a/fe/fe-core/src/main/java/com/starrocks/sql/parser/StarRocks.g4
+++ b/fe/fe-core/src/main/java/com/starrocks/sql/parser/StarRocks.g4
@ -2192,6 +2192,7 @@ aggregationFunction
    | MIN '(' setQuantifier? expression ')'
    | SUM '(' setQuantifier? expression ')'
    | ARRAY_AGG '(' expression (ORDER BY sortItem (',' sortItem)*)? ')'
+    | GROUP_CONCAT '(' setQuantifier? expression (',' expression)* (ORDER BY sortItem (',' sortItem)*)? (SEPARATOR expression)? ')'
    ;

 userVariable
@ -2589,7 +2590,7 @@ nonReserved
    | ENABLE | END | ENGINE | ENGINES | ERRORS | EVENTS | EXECUTE | EXTERNAL | EXTRACT | EVERY | ENCLOSE | ESCAPE | EXPORT
    | FAILPOINT | FAILPOINTS | FIELDS | FILE | FILTER | FIRST | FLOOR | FOLLOWING | FORMAT | FN | FRONTEND | FRONTENDS | FOLLOWER | FREE
    | FUNCTIONS
-    | GLOBAL | GRANTS
+    | GLOBAL | GRANTS | GROUP_CONCAT
    | HASH | HISTOGRAM | HELP | HLL_UNION | HOST | HOUR | HUB
    | IDENTIFIED | IMAGE | IMPERSONATE | INACTIVE | INCREMENTAL | INDEXES | INSTALL | INTEGRATION | INTEGRATIONS | INTERMEDIATE
    | INTERVAL | ISOLATION
@ -2604,7 +2605,7 @@ nonReserved
    | REMOVE | REWRITE | RANDOM | RANK | RECOVER | REFRESH | REPAIR | REPEATABLE | REPLACE_IF_NOT_NULL | REPLICA | REPOSITORY
    | REPOSITORIES
    | RESOURCE | RESOURCES | RESTORE | RESUME | RETURNS | RETRY | REVERT | ROLE | ROLES | ROLLUP | ROLLBACK | ROUTINE | ROW
-    | SAMPLE | SCHEDULER | SECOND | SECURITY | SERIALIZABLE |SEMI | SESSION | SETS | SIGNED | SNAPSHOT | SQLBLACKLIST | START
+    | SAMPLE | SCHEDULER | SECOND | SECURITY | SEPARATOR | SERIALIZABLE |SEMI | SESSION | SETS | SIGNED | SNAPSHOT | SQLBLACKLIST | START
    | STREAM | SUM | STATUS | STOP | SKIP_HEADER | SWAP
    | STORAGE| STRING | STRUCT | STATS | SUBMIT | SUSPEND | SYNC | SYSTEM_TIME
    | TABLES | TABLET | TASK | TEMPORARY | TIMESTAMP | TIMESTAMPADD | TIMESTAMPDIFF | THAN | TIME | TIMES | TRANSACTION | TRACE
--- a/fe/fe-core/src/main/java/com/starrocks/sql/parser/StarRocksLex.g4
+++ b/fe/fe-core/src/main/java/com/starrocks/sql/parser/StarRocksLex.g4
@ -180,6 +180,7 @@ GROUP: 'GROUP';
 GROUPS: 'GROUPS';
 GROUPING: 'GROUPING';
 GROUPING_ID: 'GROUPING_ID';
+GROUP_CONCAT: 'GROUP_CONCAT';
 HASH: 'HASH';
 HAVING: 'HAVING';
 HELP: 'HELP';
@ -361,6 +362,7 @@ SECOND: 'SECOND';
 SECURITY: 'SECURITY';
 SELECT: 'SELECT';
 SEMI: 'SEMI';
+SEPARATOR: 'SEPARATOR';
 SERIALIZABLE: 'SERIALIZABLE';
 SESSION: 'SESSION';
 SET: 'SET';
--- a/fe/fe-core/src/test/java/com/starrocks/sql/analyzer/AnalyzeExprTest.java
+++ b/fe/fe-core/src/test/java/com/starrocks/sql/analyzer/AnalyzeExprTest.java
@ -318,7 +318,6 @@ public class AnalyzeExprTest {
        analyzeSuccess("select array_agg(v1 order by 1) from t0;");
        analyzeSuccess("select array_agg(null);");
        analyzeSuccess("select array_agg(v1 order by v1) from t0;");
-        analyzeSuccess("select array_agg(null order by 11);");
        analyzeSuccess("select array_agg(null order by 1,1);");
        analyzeSuccess("select array_agg(1 order by null,null);");
        analyzeSuccess("select array_agg(1 order by null,null,v1) from t0 group by v2;");
@ -332,7 +331,7 @@ public class AnalyzeExprTest {
        analyzeSuccess("select array_agg(case when c1='a' then struct(1,3) else struct(1,2) end order by c3) as arr1" +
                " from (select 'a' as c1, 1 as c2, 2 as c3)t");

-
+        analyzeFail("select array_agg(null order by 11);");
        analyzeFail("select array_agg(null order by);");
        analyzeFail("select array_agg(null,'a');");
        analyzeFail("select array_agg(1,1);");
@ -343,6 +342,44 @@ public class AnalyzeExprTest {
                " from (select 'a' as c1, 1 as c2, 2 as c3)t");
    }

+    @Test
+    public void testGroupConcat() {
+        analyzeSuccess("select group_concat(v1 order by v2 desc), group_concat(distinct v1 order by v2) from t0;");
+        analyzeSuccess("select group_concat(v1 order by v2, v3 desc nulls last) from t0 group by v3;");
+        analyzeSuccess("select group_concat(null) from t0;");
+        analyzeSuccess("select group_concat(null order by null) from t0;");
+        analyzeSuccess("select group_concat(v1 order by null) from t0;");
+        analyzeSuccess("select group_concat(v1 order by 1) from t0;");
+        analyzeSuccess("select group_concat(null);");
+        analyzeSuccess("select group_concat(v1 order by v1) from t0;");
+        analyzeSuccess("select group_concat(distinct null order by 1,1);");
+        analyzeSuccess("select group_concat(1 order by null,null);");
+        analyzeSuccess("select group_concat(1 order by null,null,v1) from t0 group by v2;");
+        analyzeSuccess("select group_concat(a order by b) from (select null as a, null as b " +
+                "union all select v1 as a, v3 as b from t0)A;");
+        analyzeSuccess("select group_concat(v1 order by v1), group_concat(v1),group_concat(v2) from t0;");
+        analyzeSuccess("select group_concat(null,'a');");
+        analyzeSuccess("select group_concat(1,1);");
+        analyzeSuccess("select group_concat(1 order by 1 desc nulls first)");
+
+        analyzeFail("select group_concat(case when c1='a' then [1,3] else [1,2] end order by c3) as arr1 " +
+                "from (select 'a' as c1, 1 as c2, 2 as c3)t");
+        analyzeFail("select group_concat(case when c1='a' then map(1,3) else map(1,2) end order by c3) as arr1 " +
+                "from (select 'a' as c1, 1 as c2, 2 as c3)t");
+        analyzeFail("select group_concat(case when c1='a' then struct(1,3) else struct(1,2) end order by c3) as arr1" +
+                " from (select 'a' as c1, 1 as c2, 2 as c3)t");
+        analyzeFail("select group_concat(null order by 11);");
+        analyzeFail("select group_concat(null order by);");
+        analyzeFail("select group_concat()");
+        analyzeFail("select group_concat(separator '')");
+        analyzeFail("select group_concat(case when c1='a' then struct(1,3) else map(1,2) end order by c3) as arr1 from " +
+                " (select 'a' as c1, 1 as c2, 2 as c3)t");
+        analyzeFail("select group_concat(case when c1='a' then [1,3] else map(1,2) end order by c3) as arr1" +
+                " from (select 'a' as c1, 1 as c2, 2 as c3)t");
+        analyzeFail("select group_concat(1 order by 1 nulls first desc)");
+        analyzeFail("select group_concat(name) over (partition by id) from ss");
+    }
+
    @Test
    public void testMapTypeConstructor() {
        analyzeSuccess("select map()");
--- a/fe/fe-core/src/test/java/com/starrocks/sql/plan/MetricTypeTest.java
+++ b/fe/fe-core/src/test/java/com/starrocks/sql/plan/MetricTypeTest.java
@ -32,7 +32,7 @@ public class MetricTypeTest extends PlanTestBase {
                        "PREAGGREGATION: OFF. Reason: Aggregate Operator not match: COUNT <--> BITMAP_UNION");

        starRocksAssert.query("select group_concat(id2) from test.bitmap_table;")
-                .analysisError("No matching function with signature: group_concat(bitmap).");
+                .analysisError("No matching function with signature: group_concat(bitmap");

        starRocksAssert.query("select sum(id2) from test.bitmap_table;").analysisError(
                "No matching function with signature: sum(bitmap).");
@ -61,7 +61,7 @@ public class MetricTypeTest extends PlanTestBase {
                "PREAGGREGATION: OFF. Reason: Aggregate Operator not match: COUNT <--> HLL_UNION");

        starRocksAssert.query("select group_concat(id2) from test.hll_table;")
-                .analysisError("No matching function with signature: group_concat(hll).");
+                .analysisError("No matching function with signature: group_concat(hll");

        starRocksAssert.query("select sum(id2) from test.hll_table;")
                .analysisError("No matching function with signature: sum(hll).");
--- a/gensrc/thrift/InternalService.thrift
+++ b/gensrc/thrift/InternalService.thrift
@ -49,7 +49,7 @@ include "RuntimeFilter.thrift"

 // constants for function version
 enum TFunctionVersion {
-    RUNTIME_FILTER_SERIALIZE_VERSION_2 = 6,
+    RUNTIME_FILTER_SERIALIZE_VERSION_2 = 7,
 }

 enum TQueryType {
@ -215,6 +215,7 @@ struct TQueryOptions {
  // used to judge whether the profile need to report to FE, only meaningful when enable_profile=true
  97: optional i64 load_profile_collect_second;

+  100: optional i64 group_concat_max_len = 1024;
  101: optional i64 runtime_profile_report_interval = 30;

  102: optional bool enable_collect_table_level_scan_stats;
--- a/gensrc/thrift/Types.thrift
+++ b/gensrc/thrift/Types.thrift
@ -309,6 +309,7 @@ struct TAggregateFunction {
  // Indicates, for each expr, if nulls should be listed first or last. This is
  // independent of is_asc_order.
  13: optional list<bool> nulls_first
+  14: optional bool is_distinct = false
 }

 struct TTableFunction {
--- a/test/sql/test_agg/R/test_distinct_agg
+++ b/test/sql/test_agg/R/test_distinct_agg
@ -145,42 +145,42 @@ select avg(distinct c1), count(c2) from skew_agg;
 select     /*+ SET_VAR (streaming_preaggregation_mode = 'force_streaming',new_planner_agg_stage='3') */     (         ifnull(sum(murmur_hash3_32(week)), 0) + ifnull(sum(murmur_hash3_32(c0)), 0) + ifnull(sum(murmur_hash3_32(__col_0)), 0) + ifnull(sum(murmur_hash3_32(__col_1)), 0) + ifnull(sum(murmur_hash3_32(__col_2)), 0) + ifnull(sum(murmur_hash3_32(__col_3)), 0)     ) as fingerprint from     (         select             date_trunc('week', c4) as week,             c0, (array_length(array_agg(c1))) as __col_0,
 (sum(distinct length(c2))) as __col_1, (length(group_concat(cast(c3 as VARCHAR)))) as __col_2, (min(c2)) as __col_3         from             skew_agg         group by             date_trunc('week', c4),             c0     ) as t;
 -- result:
-5711937174881
+-5714598445053
 -- !result
 select     /*+ SET_VAR (streaming_preaggregation_mode = 'force_streaming',new_planner_agg_stage='2') */     (         ifnull(sum(murmur_hash3_32(week)), 0) + ifnull(sum(murmur_hash3_32(c0)), 0) + ifnull(sum(murmur_hash3_32(__col_0)), 0) + ifnull(sum(murmur_hash3_32(__col_1)), 0) + ifnull(sum(murmur_hash3_32(__col_2)), 0) + ifnull(sum(murmur_hash3_32(__col_3)), 0)     ) as fingerprint from     (         select             date_trunc('week', c4) as week,             c0, (array_length(array_agg(c1))) as __col_0,
 (sum(distinct length(c2))) as __col_1, (length(group_concat(cast(c3 as VARCHAR)))) as __col_2, (min(c2)) as __col_3         from             skew_agg         group by             date_trunc('week', c4),             c0     ) as t;
 -- result:
-5711937174881
+-5714598445053
 -- !result
 select     /*+ SET_VAR (streaming_preaggregation_mode = 'auto',new_planner_agg_stage='3') */     (         ifnull(sum(murmur_hash3_32(week)), 0) + ifnull(sum(murmur_hash3_32(c0)), 0) + ifnull(sum(murmur_hash3_32(__col_0)), 0) + ifnull(sum(murmur_hash3_32(__col_1)), 0) + ifnull(sum(murmur_hash3_32(__col_2)), 0) + ifnull(sum(murmur_hash3_32(__col_3)), 0)     ) as fingerprint from     (         select             date_trunc('week', c4) as week,             c0, (array_length(array_agg(c1))) as __col_0,
 (sum(distinct length(c2))) as __col_1, (length(group_concat(cast(c3 as VARCHAR)))) as __col_2, (min(c2)) as __col_3         from             skew_agg         group by             date_trunc('week', c4),             c0     ) as t;
 -- result:
-5711937174881
+-5714598445053
 -- !result
 select     /*+ SET_VAR (streaming_preaggregation_mode = 'auto',new_planner_agg_stage='3') */     (         ifnull(sum(murmur_hash3_32(week)), 0) + ifnull(sum(murmur_hash3_32(c0)), 0) + ifnull(sum(murmur_hash3_32(__col_0)), 0) + ifnull(sum(murmur_hash3_32(__col_1)), 0) + ifnull(sum(murmur_hash3_32(__col_2)), 0) + ifnull(sum(murmur_hash3_32(__col_3)), 0)     ) as fingerprint from     (         select             date_trunc('week', c4) as week,             c0, (array_length(array_agg(c1))) as __col_0,
 (sum(distinct length(c2))) as __col_1, (length(group_concat(cast(c3 as VARCHAR)))) as __col_2, (min(c2)) as __col_3         from             skew_agg         group by             date_trunc('week', c4),             c0     ) as t;
 -- result:
-5711937174881
+-5714598445053
 -- !result
 select     /*+ SET_VAR (streaming_preaggregation_mode = 'force_streaming',new_planner_agg_stage='2') */     (         ifnull(sum(murmur_hash3_32(week)), 0) + ifnull(sum(murmur_hash3_32(c0)), 0) + ifnull(sum(murmur_hash3_32(__col_0)), 0) + ifnull(sum(murmur_hash3_32(__col_1)), 0) + ifnull(sum(murmur_hash3_32(__col_2)), 0) + ifnull(sum(murmur_hash3_32(__col_3)), 0) + ifnull(sum(murmur_hash3_32(__col_4)), 0) + ifnull(sum(murmur_hash3_32(__col_5)), 0) + ifnull(sum(murmur_hash3_32(__col_6)), 0) + ifnull(sum(murmur_hash3_32(__col_7)), 0) + ifnull(sum(murmur_hash3_32(__col_8)), 0)     ) as fingerprint from     (         select             date_trunc('week', c2) as week,             c0, (array_length(array_agg(c1))) as __col_0, (sum(distinct length(c0))) as __col_1, (length(group_concat(cast(c1 as VARCHAR)))) as __col_2, (min(c3)) as __col_3, (left(variance(c1), 6)) as __col_4, (multi_distinct_count(length(c2))) as __col_5, (ndv(cast(c4 as BIGINT))) as __col_6, (hll_union_agg(hll_hash(c5))) as __col_7, (bitmap_union_int(cast(length(c1) as INT))) as __col_8         from
      skew_agg         group by             date_trunc('week', c2),             c0     ) as t;
 -- result:
-12028490117342
+-12031151387514
 -- !result
 select     /*+ SET_VAR (streaming_preaggregation_mode = 'force_streaming',new_planner_agg_stage='3') */     (         ifnull(sum(murmur_hash3_32(week)), 0) + ifnull(sum(murmur_hash3_32(c0)), 0) + ifnull(sum(murmur_hash3_32(__col_0)), 0) + ifnull(sum(murmur_hash3_32(__col_1)), 0) + ifnull(sum(murmur_hash3_32(__col_2)), 0) + ifnull(sum(murmur_hash3_32(__col_3)), 0) + ifnull(sum(murmur_hash3_32(__col_4)), 0) + ifnull(sum(murmur_hash3_32(__col_5)), 0) + ifnull(sum(murmur_hash3_32(__col_6)), 0) + ifnull(sum(murmur_hash3_32(__col_7)), 0) + ifnull(sum(murmur_hash3_32(__col_8)), 0)     ) as fingerprint from     (         select             date_trunc('week', c2) as week,             c0, (array_length(array_agg(c1))) as __col_0, (sum(distinct length(c0))) as __col_1, (length(group_concat(cast(c1 as VARCHAR)))) as __col_2, (min(c3)) as __col_3, (left(variance(c1), 6)) as __col_4, (multi_distinct_count(length(c2))) as __col_5, (ndv(cast(c4 as BIGINT))) as __col_6, (hll_union_agg(hll_hash(c5))) as __col_7, (bitmap_union_int(cast(length(c1) as INT))) as __col_8         from
      skew_agg         group by             date_trunc('week', c2),             c0     ) as t;
 -- result:
-12028490117342
+-12031151387514
 -- !result
 select     /*+ SET_VAR (streaming_preaggregation_mode = 'auto',new_planner_agg_stage='2') */     (         ifnull(sum(murmur_hash3_32(week)), 0) + ifnull(sum(murmur_hash3_32(c0)), 0) + ifnull(sum(murmur_hash3_32(__col_0)), 0) + ifnull(sum(murmur_hash3_32(__col_1)), 0) + ifnull(sum(murmur_hash3_32(__col_2)), 0) + ifnull(sum(murmur_hash3_32(__col_3)), 0) + ifnull(sum(murmur_hash3_32(__col_4)), 0) + ifnull(sum(murmur_hash3_32(__col_5)), 0) + ifnull(sum(murmur_hash3_32(__col_6)), 0) + ifnull(sum(murmur_hash3_32(__col_7)), 0) + ifnull(sum(murmur_hash3_32(__col_8)), 0)     ) as fingerprint from     (         select             date_trunc('week', c2) as week,             c0, (array_length(array_agg(c1))) as __col_0, (sum(distinct length(c0))) as __col_1, (length(group_concat(cast(c1 as VARCHAR)))) as __col_2, (min(c3)) as __col_3, (left(variance(c1), 6)) as __col_4, (multi_distinct_count(length(c2))) as __col_5, (ndv(cast(c4 as BIGINT))) as __col_6, (hll_union_agg(hll_hash(c5))) as __col_7, (bitmap_union_int(cast(length(c1) as INT))) as __col_8         from
      skew_agg         group by             date_trunc('week', c2),             c0     ) as t;
 -- result:
-12028490117342
+-12031151387514
 -- !result
 select     /*+ SET_VAR (streaming_preaggregation_mode = 'auto',new_planner_agg_stage='3') */     (         ifnull(sum(murmur_hash3_32(week)), 0) + ifnull(sum(murmur_hash3_32(c0)), 0) + ifnull(sum(murmur_hash3_32(__col_0)), 0) + ifnull(sum(murmur_hash3_32(__col_1)), 0) + ifnull(sum(murmur_hash3_32(__col_2)), 0) + ifnull(sum(murmur_hash3_32(__col_3)), 0) + ifnull(sum(murmur_hash3_32(__col_4)), 0) + ifnull(sum(murmur_hash3_32(__col_5)), 0) + ifnull(sum(murmur_hash3_32(__col_6)), 0) + ifnull(sum(murmur_hash3_32(__col_7)), 0) + ifnull(sum(murmur_hash3_32(__col_8)), 0)     ) as fingerprint from     (         select             date_trunc('week', c2) as week,             c0, (array_length(array_agg(c1))) as __col_0, (sum(distinct length(c0))) as __col_1, (length(group_concat(cast(c1 as VARCHAR)))) as __col_2, (min(c3)) as __col_3, (left(variance(c1), 6)) as __col_4, (multi_distinct_count(length(c2))) as __col_5, (ndv(cast(c4 as BIGINT))) as __col_6, (hll_union_agg(hll_hash(c5))) as __col_7, (bitmap_union_int(cast(length(c1) as INT))) as __col_8         from
      skew_agg         group by             date_trunc('week', c2),             c0     ) as t;
 -- result:
-12028490117342
+-12031151387514
 -- !result
 select     /*+ SET_VAR (streaming_preaggregation_mode = 'auto',new_planner_agg_stage='3') */  array_length(array_agg(a1)), count(distinct a1) as cnt from (select split('a,b,c', ',') as a1, 'aaa' as b1) t1 group by b1;
 -- result:
--- a/test/sql/test_agg_function/R/test_group_concat
+++ b/test/sql/test_agg_function/R/test_group_concat
--- a/test/sql/test_agg_function/T/test_group_concat
+++ b/test/sql/test_agg_function/T/test_group_concat
@ -1,4 +1,4 @@
-- name: testForEmptySetInput
+-- name: testGroupConcat
 CREATE TABLE IF NOT EXISTS `lineorder` (
    `lo_orderkey` int(11) NOT NULL COMMENT "",
    `lo_shipmode` varchar(11) NOT NULL COMMENT ""
@ -9,4 +9,296 @@ DISTRIBUTED BY HASH(`lo_orderkey`) BUCKETS 48
 PROPERTIES (
    "replication_num" = "1"
 );
-SELECT GROUP_CONCAT(lo_shipmode) orgs FROM lineorder WHERE 1 = 2;
+SELECT GROUP_CONCAT(lo_shipmode) orgs FROM lineorder WHERE 1 = 2;
+
+CREATE TABLE `ss` (
+  `id` int(11) NULL COMMENT "",
+  `name` varchar(255) NULL COMMENT "",
+  `subject` varchar(255) NULL COMMENT "",
+  `score` int(11) NULL COMMENT ""
+) ENGINE=OLAP
+DUPLICATE KEY(`id`)
+DISTRIBUTED BY HASH(`id`) BUCKETS 4
+PROPERTIES (
+"replication_num" = "1",
+"in_memory" = "false",
+"enable_persistent_index" = "false",
+"replicated_storage" = "true",
+"compression" = "LZ4"
+);
+
+insert into ss values (1,"Tom","English",90);
+insert into ss values (1,"Tom","Math",80);
+insert into ss values (2,"Tom","English",NULL);
+insert into ss values (2,"Tom",NULL,NULL);
+insert into ss values (3,"May",NULL,NULL);
+insert into ss values (3,"Ti","English",98);
+insert into ss values (4,NULL,NULL,NULL);
+insert into ss values (NULL,NULL,NULL,NULL);
+insert into ss values (NULL,"Ti","物理Phy",99);
+insert into ss values (11,"张三此地无银三百两","英文English",98);
+insert into ss values (11,"张三掩耳盗铃","Math数学欧拉方程",78);
+insert into ss values (12,"李四大闹天空","英语外语美誉",NULL);
+insert into ss values (2,"王武程咬金","语文北京上海",22);
+insert into ss values (3,"欧阳诸葛方程","数学大不列颠",NULL);
+
+select group_concat(distinct name,subject order by 1,2), count(distinct id), group_concat(name order by 1) from ss group by id order by 1;
+select group_concat(distinct name,subject order by 1,2) from ss group by id order by id;
+select group_concat(name,subject order by 1,2), group_concat(name,score order by 1,2) from ss group by id order by 1;
+select group_concat(name,subject order by 1,2), group_concat(distinct name,score order by 1, 2) from ss group by id order by 1;
+select group_concat(distinct name,subject order by 1,2), group_concat(distinct score order by 1) from ss group by id order by 1;
+select group_concat(distinct name,subject order by 1,2) from ss group by id order by 1;
+select group_concat(distinct name,subject order by 1,score) from ss group by id order by 1;
+select group_concat(distinct name,subject order by score, 1,2), count(distinct id), max(score)  from ss group by id order by 1;
+select group_concat(distinct name,subject order by score,4,2,1) from ss group by id order by 1;
+select group_concat(distinct name,subject order by score,4.00, 1,2) from ss group by id order by 1;
+select group_concat(distinct name,null order by score,1,4.00) from ss group by id order by 1;
+select group_concat(distinct name,subject order by 1,2, null) from ss group by id order by 1;
+select group_concat(distinct null order by score,4.00) from ss group by id order by 1;
+select group_concat(distinct name, score order by score,4.00, 1),group_concat(subject order by score,4.00, 1),array_agg(subject order by score,4.00, 1)  from ss group by id order by 1;
+select group_concat(distinct score order by 1,2) from ss group by id order by 1;
+select group_concat(distinct score order by 1,name) from ss group by id order by 1;
+select group_concat(distinct 1,2 order by 1,2) from ss group by id order by 1;
+select group_concat(distinct 1,2 order by score,2) from ss group by id order by 1;
+select group_concat(distinct 3.1323,subject order by 1, 2,-20) from ss group by id order by 1;
+select group_concat( name,subject order by 1,2), count(distinct id), max(score) from ss group by id order by 1;
+select group_concat( name,subject order by 1,score), count(distinct id), max(score)  from ss group by id order by 1;
+select group_concat( name,subject order by score,1,2), count(distinct id), max(score)  from ss group by id order by 1;
+select group_concat( name,subject order by score,4,1,2), count(distinct id), max(score)  from ss group by id order by 1;
+select group_concat( name,subject order by score,4.00,1,2), count(distinct id), max(score)  from ss group by id order by 1;
+select group_concat( name,null order by score,4.00) from ss group by id order by 1;
+select group_concat( name,subject order by 1,2, null) from ss group by id order by 1;
+select group_concat( null order by score,4.00) from ss group by id order by 1;
+select group_concat( score order by 1,2) from ss group by id order by 1;
+select group_concat( score order by 1,name) from ss group by id order by 1;
+select group_concat( 1,2 order by 1,2) from ss group by id order by 1;
+select group_concat( 1,2 order by score,2) from ss group by id order by 1;
+select group_concat( 3.1323,subject order by 1,2,-20) from ss group by id order by 1;
+
+select group_concat(distinct name,subject order by 1,2), count(distinct id), group_concat(name order by 1) from ss order by 1;
+select group_concat(distinct name,subject order by 1,2) from ss order by 1;
+select group_concat(name,subject order by 1,2), group_concat(name,score order by 1,2) from ss order by 1;
+select group_concat(name,subject order by 1,2), group_concat(distinct name,score order by 1,2) from ss order by 1;
+select group_concat(distinct name,subject order by 1,2), group_concat(distinct score order by 1) from ss order by 1;
+select group_concat(distinct name,subject order by 1,2) from ss order by 1;
+select group_concat(distinct name,subject order by 1,score,2) from ss order by 1;
+select group_concat(distinct name,subject order by score,length(name),1,2), count(distinct id), max(score)  from ss order by 1;
+select group_concat(distinct name,subject order by score+1,1,2,4) from ss order by 1;
+select group_concat(distinct name,subject order by 1,2,score,4.00) from ss order by 1;
+select group_concat(distinct name,null order by score,4.00) from ss order by 1;
+select group_concat(distinct name,subject order by 1,2, null) from ss order by 1;
+select group_concat(distinct null order by score,4.00) from ss order by 1;
+select group_concat(distinct name order by score,4.00,1),group_concat(subject order by score,4.00,1),array_agg(subject order by score,4.00,1)  from ss order by 1;
+select group_concat(distinct score order by 1,2) from ss order by 1;
+select group_concat(distinct score order by 1,name) from ss order by 1;
+select group_concat(distinct 1,2 order by 1,2) from ss order by 1;
+select group_concat(distinct 1,2 order by score,2) from ss order by 1;
+select group_concat(distinct 3.1323,subject order by 1,-20) from ss order by 1;
+select group_concat( name,subject order by 1,2), count(distinct id), max(score) from ss order by 1;
+select group_concat( name,subject order by 1,score), count(distinct id), max(score)  from ss order by 1;
+select group_concat( name,subject order by score,1,2), count(distinct id), max(score)  from ss order by 1;
+select group_concat( name,subject order by score,4), count(distinct id), max(score)  from ss order by 1;
+select group_concat( name,subject order by score,4.00,1,2), count(distinct id), max(score)  from ss order by 1;
+select group_concat( name,null order by score,4.00, 1) from ss order by 1;
+select group_concat( name,subject order by null,1,2) from ss order by 1;
+select group_concat( null order by score,4.00) from ss order by 1;
+select group_concat( score order by 1,2) from ss order by 1;
+select group_concat( score order by 1,name) from ss order by 1;
+select group_concat( 1,2 order by 1,2) from ss order by 1;
+select group_concat( 1,2 order by score,2) from ss order by 1;
+select group_concat( 3.1323,subject order by 1,-20) from ss order by 1;
+
+set new_planner_agg_stage = 2;
+set streaming_preaggregation_mode = force_streaming;
+select group_concat(distinct name,subject order by 1,2), count(distinct id), group_concat(name order by 1) from ss group by id order by 1;
+select group_concat(distinct name,subject order by 1,2) from ss group by id order by id;
+select group_concat(name,subject order by 1,2), group_concat(name,score order by 1,2) from ss group by id order by 1;
+select group_concat(name,subject order by 1,2), group_concat(distinct name,score order by 1, 2) from ss group by id order by 1;
+select group_concat(distinct name,subject order by 1,2), group_concat(distinct score order by 1) from ss group by id order by 1;
+select group_concat(distinct name,subject order by 1,2) from ss group by id order by 1;
+select group_concat(distinct name,subject order by 1,score) from ss group by id order by 1;
+select group_concat(distinct name,subject order by score, 1,2), count(distinct id), max(score)  from ss group by id order by 1;
+select group_concat(distinct name,subject order by score,4,2,1) from ss group by id order by 1;
+select group_concat(distinct name,subject order by score,4.00, 1,2) from ss group by id order by 1;
+select group_concat(distinct name,null order by score,1,4.00) from ss group by id order by 1;
+select group_concat(distinct name,subject order by 1,2, null) from ss group by id order by 1;
+select group_concat(distinct null order by score,4.00) from ss group by id order by 1;
+select group_concat(distinct name, score order by score,4.00, 1),group_concat(subject order by score,4.00, 1),array_agg(subject order by score,4.00, 1)  from ss group by id order by 1;
+select group_concat(distinct score order by 1,2) from ss group by id order by 1;
+select group_concat(distinct score order by 1,name) from ss group by id order by 1;
+select group_concat(distinct 1,2 order by 1,2) from ss group by id order by 1;
+select group_concat(distinct 1,2 order by score,2) from ss group by id order by 1;
+select group_concat(distinct 3.1323,subject order by 1, 2,-20) from ss group by id order by 1;
+select group_concat( name,subject order by 1,2), count(distinct id), max(score) from ss group by id order by 1;
+select group_concat( name,subject order by 1,score), count(distinct id), max(score)  from ss group by id order by 1;
+select group_concat( name,subject order by score,1,2), count(distinct id), max(score)  from ss group by id order by 1;
+select group_concat( name,subject order by score,4,1,2), count(distinct id), max(score)  from ss group by id order by 1;
+select group_concat( name,subject order by score,4.00,1,2), count(distinct id), max(score)  from ss group by id order by 1;
+select group_concat( name,null order by score,4.00) from ss group by id order by 1;
+select group_concat( name,subject order by 1,2, null) from ss group by id order by 1;
+select group_concat( null order by score,4.00) from ss group by id order by 1;
+select group_concat( score order by 1,2) from ss group by id order by 1;
+select group_concat( score order by 1,name) from ss group by id order by 1;
+select group_concat( 1,2 order by 1,2) from ss group by id order by 1;
+select group_concat( 1,2 order by score,2) from ss group by id order by 1;
+select group_concat( 3.1323,subject order by 1,2,-20) from ss group by id order by 1;
+
+select group_concat(distinct name,subject order by 1,2), count(distinct id), group_concat(name order by 1) from ss order by 1;
+select group_concat(distinct name,subject order by 1,2) from ss order by 1;
+select group_concat(name,subject order by 1,2), group_concat(name,score order by 1,2) from ss order by 1;
+select group_concat(name,subject order by 1,2), group_concat(distinct name,score order by 1,2) from ss order by 1;
+select group_concat(distinct name,subject order by 1,2), group_concat(distinct score order by 1) from ss order by 1;
+select group_concat(distinct name,subject order by 1,2) from ss order by 1;
+select group_concat(distinct name,subject order by 1,score,2) from ss order by 1;
+select group_concat(distinct name,subject order by score,length(name),1,2), count(distinct id), max(score)  from ss order by 1;
+select group_concat(distinct name,subject order by score+1,1,2,4) from ss order by 1;
+select group_concat(distinct name,subject order by 1,2,score,4.00) from ss order by 1;
+select group_concat(distinct name,null order by score,4.00) from ss order by 1;
+select group_concat(distinct name,subject order by 1,2, null) from ss order by 1;
+select group_concat(distinct null order by score,4.00) from ss order by 1;
+select group_concat(distinct name order by score,4.00, 1),group_concat(subject order by score,4.00, 1),array_agg(subject order by score,4.00, 1)  from ss order by 1;
+select group_concat(distinct score order by 1,2) from ss order by 1;
+select group_concat(distinct score order by 1,name) from ss order by 1;
+select group_concat(distinct 1,2 order by 1,2) from ss order by 1;
+select group_concat(distinct 1,2 order by score,2) from ss order by 1;
+select group_concat(distinct 3.1323,subject order by 1,-20) from ss order by 1;
+select group_concat( name,subject order by 1,2), count(distinct id), max(score) from ss order by 1;
+select group_concat( name,subject order by 1,score), count(distinct id), max(score)  from ss order by 1;
+select group_concat( name,subject order by score,1,2), count(distinct id), max(score)  from ss order by 1;
+select group_concat( name,subject order by score,4), count(distinct id), max(score)  from ss order by 1;
+select group_concat( name,subject order by score,4.00,1,2), count(distinct id), max(score)  from ss order by 1;
+select group_concat( name,null order by score,4.00) from ss order by 1;
+select group_concat( name,subject order by null,1,2) from ss order by 1;
+select group_concat( null order by score,4.00) from ss order by 1;
+select group_concat( score order by 1,2) from ss order by 1;
+select group_concat( score order by 1,name) from ss order by 1;
+select group_concat( 1,2 order by 1,2) from ss order by 1;
+select group_concat( 1,2 order by score,2) from ss order by 1;
+select group_concat( 3.1323,subject order by 1,-20) from ss order by 1;
+set new_planner_agg_stage = 0;
+
+set enable_exchange_pass_through = false;
+select group_concat(distinct name,subject order by 1,2), count(distinct id), group_concat(name order by 1) from ss group by id order by 1;
+select group_concat(distinct name,subject order by 1,2) from ss group by id order by id;
+select group_concat(name,subject order by 1,2), group_concat(name,score order by 1,2) from ss group by id order by 1;
+select group_concat(name,subject order by 1,2), group_concat(distinct name,score order by 1, 2) from ss group by id order by 1;
+select group_concat(distinct name,subject order by 1,2), group_concat(distinct score order by 1) from ss group by id order by 1;
+select group_concat(distinct name,subject order by 1,2) from ss group by id order by 1;
+select group_concat(distinct name,subject order by 1,score) from ss group by id order by 1;
+select group_concat(distinct name,subject order by score, 1,2), count(distinct id), max(score)  from ss group by id order by 1;
+select group_concat(distinct name,subject order by score,4,2,1) from ss group by id order by 1;
+select group_concat(distinct name,subject order by score,4.00, 1,2) from ss group by id order by 1;
+select group_concat(distinct name,null order by score,1,4.00) from ss group by id order by 1;
+select group_concat(distinct name,subject order by 1,2, null) from ss group by id order by 1;
+select group_concat(distinct null order by score,4.00) from ss group by id order by 1;
+select group_concat(distinct name, score order by score,4.00, 1),group_concat(subject order by score,4.00, 1),array_agg(subject order by score,4.00, 1)  from ss group by id order by 1;
+select group_concat(distinct score order by 1,2) from ss group by id order by 1;
+select group_concat(distinct score order by 1,name) from ss group by id order by 1;
+select group_concat(distinct 1,2 order by 1,2) from ss group by id order by 1;
+select group_concat(distinct 1,2 order by score,2) from ss group by id order by 1;
+select group_concat(distinct 3.1323,subject order by 1, 2,-20) from ss group by id order by 1;
+select group_concat( name,subject order by 1,2), count(distinct id), max(score) from ss group by id order by 1;
+select group_concat( name,subject order by 1,score,2), count(distinct id), max(score)  from ss group by id order by 1;
+select group_concat( name,subject order by score,1,2), count(distinct id), max(score)  from ss group by id order by 1;
+select group_concat( name,subject order by score,4,1,2), count(distinct id), max(score)  from ss group by id order by 1;
+select group_concat( name,subject order by score,4.00,1,2), count(distinct id), max(score)  from ss group by id order by 1;
+select group_concat( name,null order by score,4.00) from ss group by id order by 1;
+select group_concat( name,subject order by 1,2, null) from ss group by id order by 1;
+select group_concat( null order by score,4.00) from ss group by id order by 1;
+select group_concat( score order by 1,2) from ss group by id order by 1;
+select group_concat( score order by 1,name) from ss group by id order by 1;
+select group_concat( 1,2 order by 1,2) from ss group by id order by 1;
+select group_concat( 1,2 order by score,2) from ss group by id order by 1;
+select group_concat( 3.1323,subject order by 1,2,-20) from ss group by id order by 1;
+
+select group_concat(distinct name,subject order by 1,2), count(distinct id), group_concat(name order by 1) from ss order by 1;
+select group_concat(distinct name,subject order by 1,2) from ss order by 1;
+select group_concat(name,subject order by 1,2), group_concat(name,score order by 1,2) from ss order by 1;
+select group_concat(name,subject order by 1,2), group_concat(distinct name,score order by 1,2) from ss order by 1;
+select group_concat(distinct name,subject order by 1,2), group_concat(distinct score order by 1) from ss order by 1;
+select group_concat(distinct name,subject order by 1,2) from ss order by 1;
+select group_concat(distinct name,subject order by 1,score,2) from ss order by 1;
+select group_concat(distinct name,subject order by score,length(name),1,2), count(distinct id), max(score)  from ss order by 1;
+select group_concat(distinct name,subject order by score+1,1,2,4) from ss order by 1;
+select group_concat(distinct name,subject order by 1,2,score,4.00) from ss order by 1;
+select group_concat(distinct name,null order by score,4.00) from ss order by 1;
+select group_concat(distinct name,subject order by 1,2, null) from ss order by 1;
+select group_concat(distinct null order by score,4.00) from ss order by 1;
+select group_concat(distinct name order by score,4.00, 1),group_concat(subject order by score,4.00, 1),array_agg(subject order by score,4.00, 1)  from ss order by 1;
+select group_concat(distinct score order by 1,2) from ss order by 1;
+select group_concat(distinct score order by 1,name) from ss order by 1;
+select group_concat(distinct 1,2 order by 1,2) from ss order by 1;
+select group_concat(distinct 1,2 order by score,2) from ss order by 1;
+select group_concat(distinct 3.1323,subject order by 1,-20) from ss order by 1;
+select group_concat( name,subject order by 1,2), count(distinct id), max(score) from ss order by 1;
+select group_concat( name,subject order by 1,score, 2), count(distinct id), max(score)  from ss order by 1;
+select group_concat( name,subject order by score,1,2), count(distinct id), max(score)  from ss order by 1;
+select group_concat( name,subject order by score,4), count(distinct id), max(score)  from ss order by 1;
+select group_concat( name,subject order by score,4.00,1,2), count(distinct id), max(score)  from ss order by 1;
+select group_concat( name,null order by score,4.00) from ss order by 1;
+select group_concat( name,subject order by null,1,2) from ss order by 1;
+select group_concat( null order by score,4.00) from ss order by 1;
+select group_concat( score order by 1,2) from ss order by 1;
+select group_concat( score order by 1,name) from ss order by 1;
+select group_concat( 1,2 order by 1,2) from ss order by 1;
+select group_concat( 1,2 order by score,2) from ss order by 1;
+select group_concat( 3.1323,subject order by 1,-20) from ss order by 1;
+
+
+set enable_query_cache = true;
+select group_concat(distinct name,subject order by 1,2), count(distinct score), group_concat(name order by 1) from ss order by 1;
+select count(distinct score), group_concat(name order by 1) from ss order by 1;
+select group_concat(distinct name,subject order by 1,2) from ss order by 1;
+select group_concat(name,subject order by 1,2), group_concat(name,score order by 1,2) from ss order by 1;
+select group_concat(name,subject order by 1,2), group_concat(distinct name,score order by 1,2) from ss order by 1;
+select id, group_concat(distinct name,subject order by 1,2), count(distinct score), group_concat(name order by 1) from ss group by id order by 1;
+select id, count(distinct score), group_concat(name order by 1) from ss group by id order by 1;
+select group_concat(distinct name,subject order by 1,2) from ss group by id order by id;
+select group_concat(name,subject order by 1,2), group_concat(name,score order by 1,2) from ss group by id order by 1;
+select group_concat(name,subject order by 1,2), group_concat(distinct name,score order by 1, 2) from ss group by id order by 1;
+set enable_query_cache = false;
+select group_concat(distinct name,subject order by 1,2), count(distinct score), group_concat(name order by 1) from ss order by 1;
+select count(distinct score), group_concat(name order by 1) from ss order by 1;
+select group_concat(distinct name,subject order by 1,2) from ss order by 1;
+select group_concat(name,subject order by 1,2), group_concat(name,score order by 1,2) from ss order by 1;
+select group_concat(name,subject order by 1,2), group_concat(distinct name,score order by 1,2) from ss order by 1;
+select id, group_concat(distinct name,subject order by 1,2), count(distinct score), group_concat(name order by 1) from ss group by id order by 1;
+select id, count(distinct score), group_concat(name order by 1) from ss group by id order by 1;
+select group_concat(distinct name,subject order by 1,2) from ss group by id order by id;
+select group_concat(name,subject order by 1,2), group_concat(name,score order by 1,2) from ss group by id order by 1;
+select group_concat(name,subject order by 1,2), group_concat(distinct name,score order by 1, 2) from ss group by id order by 1;
+
+select group_concat();
+select group_concat() from ss;
+select group_concat(',');
+select group_concat("中国",name order by 2, id) from ss;
+select group_concat("中国",name order by 2, id separator NULL) from ss;
+select group_concat("中国",name order by 2, "第一", id) from ss;
+select group_concat("中国",name order by 2, "第一", id separator '\n') from ss;
+select group_concat("中国",name order by 2, "第一", id separator subject) from ss;
+select group_concat("中国" order by "第一" separator 1) from ss;
+select group_concat(  order by score) from ss order by 1;
+select group_concat(distinct  order by score) from ss order by 1;
+select group_concat([1,2]) from ss;
+select group_concat(json_object("2:3")) from ss;
+select group_concat(map(2,3)) from ss;
+select group_concat(null);
+select group_concat(order by 1 separator '');
+select group_concat(separator NULL);
+
+set group_concat_max_len = -121;
+select group_concat(name,subject order by 1,2) from ss group by id order by 1;
+set group_concat_max_len = 1;
+select group_concat(name,subject order by 1,2) from ss group by id order by 1;
+set group_concat_max_len = 5;
+select group_concat(name,subject order by 1,2) from ss group by id order by 1;
+set group_concat_max_len = 6;
+select group_concat(name,subject order by 1,2) from ss group by id order by 1;
+set group_concat_max_len = 7;
+select group_concat(name,subject order by 1,2) from ss group by id order by 1;
+set group_concat_max_len = 8;
+select group_concat(name,subject order by 1,2) from ss group by id order by 1;
+set group_concat_max_len = 9;
+select group_concat(name,subject order by 1,2) from ss group by id order by 1;
+set group_concat_max_len = 121;
+select group_concat(name,subject order by 1,2) from ss group by id order by 1;