support query hive external table based on tbds (#2610 )

update ut
Fix decimal const expression return type error
2022-01-06 12:25:11 +08:00 · 2022-01-04 15:46:30 +08:00 · 2022-01-04 15:46:30 +08:00 · 2022-01-04 15:46:30 +08:00 · 2022-01-04 14:57:41 +08:00 · 2022-01-04 13:42:53 +08:00
539 changed files with 12589 additions and 8722 deletions
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@ -342,7 +342,7 @@ set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -gz=zlib")
 # Debug information is stored as dwarf2 to be as compatible as possible
 #   -Werror: compile warnings should be errors when using the toolchain compiler.
 # Only enable for debug builds because this is what we test in pre-commit tests.
-set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -ggdb -O0 -gdwarf-4")
+set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -ggdb -O0 -gdwarf-4 -DDEBUG")

 # For CMAKE_BUILD_TYPE=Release
 #   -O3: Enable all compiler optimizations
@ -456,8 +456,6 @@ set(STARROCKS_DEPENDENCIES
    ${STARROCKS_DEPENDENCIES}
    ${WL_START_GROUP}
    rocksdb
-    librdkafka_cpp
-    librdkafka
    libs2
    snappy
    ${Boost_LIBRARIES}
@ -466,6 +464,8 @@ set(STARROCKS_DEPENDENCIES
    glog
    re2
    pprof
+    # Put lz4 in front of librdkafka to make sure that segment use the native lz4 library to compress/decompress page
+    # Otherwise, he will use the lz4 Library in librdkafka
    lz4
    libevent
    curl
@ -491,6 +491,8 @@ set(STARROCKS_DEPENDENCIES
    fmt
    ryu
    hyperscan
+    librdkafka_cpp
+    librdkafka
    ${WL_END_GROUP}
 )

--- a/be/src/agent/task_worker_pool.cpp
+++ b/be/src/agent/task_worker_pool.cpp
@ -37,7 +37,6 @@
 #include "gen_cpp/FrontendService.h"
 #include "gen_cpp/Types_types.h"
 #include "gutil/strings/substitute.h"
-#include "http/http_client.h"
 #include "runtime/exec_env.h"
 #include "runtime/snapshot_loader.h"
 #include "service/backend_options.h"
@ -1090,18 +1089,26 @@ void* TaskWorkerPool::_check_consistency_worker_thread_callback(void* arg_this)
        TStatusCode::type status_code = TStatusCode::OK;
        std::vector<std::string> error_msgs;
        TStatus task_status;
-
        uint32_t checksum = 0;
-        EngineChecksumTask engine_task(ExecEnv::GetInstance()->consistency_mem_tracker(),
-                                       check_consistency_req.tablet_id, check_consistency_req.schema_hash,
-                                       check_consistency_req.version, check_consistency_req.version_hash, &checksum);
-        OLAPStatus res = worker_pool_this->_env->storage_engine()->execute_task(&engine_task);
-        if (res != OLAP_SUCCESS) {
-            LOG(WARNING) << "check consistency failed. status: " << res << ", signature: " << agent_task_req.signature;
-            status_code = TStatusCode::RUNTIME_ERROR;
+
+        MemTracker* mem_tracker = ExecEnv::GetInstance()->consistency_mem_tracker();
+        Status check_limit_st = mem_tracker->check_mem_limit("Start consistency check.");
+        if (!check_limit_st.ok()) {
+            LOG(WARNING) << "check consistency failed: " << check_limit_st.message();
+            status_code = TStatusCode::MEM_LIMIT_EXCEEDED;
        } else {
-            LOG(INFO) << "check consistency success. status:" << res << ", signature:" << agent_task_req.signature
-                      << ", checksum:" << checksum;
+            EngineChecksumTask engine_task(mem_tracker, check_consistency_req.tablet_id,
+                                           check_consistency_req.schema_hash, check_consistency_req.version,
+                                           check_consistency_req.version_hash, &checksum);
+            OLAPStatus res = worker_pool_this->_env->storage_engine()->execute_task(&engine_task);
+            if (res != OLAP_SUCCESS) {
+                LOG(WARNING) << "check consistency failed. status: " << res
+                             << ", signature: " << agent_task_req.signature;
+                status_code = TStatusCode::RUNTIME_ERROR;
+            } else {
+                LOG(INFO) << "check consistency success. status:" << res << ", signature:" << agent_task_req.signature
+                          << ", checksum:" << checksum;
+            }
        }

        task_status.__set_status_code(status_code);
@ -1121,7 +1128,7 @@ void* TaskWorkerPool::_check_consistency_worker_thread_callback(void* arg_this)
 #ifndef BE_TEST
    }
 #endif
-    return (void*)nullptr;
+    return nullptr;
 }

 void* TaskWorkerPool::_report_task_worker_thread_callback(void* arg_this) {
--- a/be/src/column/chunk.cpp
+++ b/be/src/column/chunk.cpp
@ -13,29 +13,27 @@
 namespace starrocks::vectorized {

 Chunk::Chunk() {
-    _slot_id_to_index.init(4);
-    _tuple_id_to_index.init(1);
+    _slot_id_to_index.reserve(4);
+    _tuple_id_to_index.reserve(1);
 }

 Chunk::Chunk(Columns columns, SchemaPtr schema) : _columns(std::move(columns)), _schema(std::move(schema)) {
    // bucket size cannot be 0.
-    _cid_to_index.init(std::max<size_t>(1, columns.size() * 2));
-    _slot_id_to_index.init(std::max<size_t>(1, _columns.size() * 2));
-    _tuple_id_to_index.init(1);
+    _cid_to_index.reserve(std::max<size_t>(1, columns.size() * 2));
+    _slot_id_to_index.reserve(std::max<size_t>(1, _columns.size() * 2));
+    _tuple_id_to_index.reserve(1);
    rebuild_cid_index();
    check_or_die();
 }

 // TODO: FlatMap don't support std::move
-Chunk::Chunk(Columns columns, const butil::FlatMap<SlotId, size_t>& slot_map)
-        : _columns(std::move(columns)), _slot_id_to_index(slot_map) {
+Chunk::Chunk(Columns columns, const SlotHashMap& slot_map) : _columns(std::move(columns)), _slot_id_to_index(slot_map) {
    // when use _slot_id_to_index, we don't need to rebuild_cid_index
-    _tuple_id_to_index.init(1);
+    _tuple_id_to_index.reserve(1);
 }

 // TODO: FlatMap don't support std::move
-Chunk::Chunk(Columns columns, const butil::FlatMap<SlotId, size_t>& slot_map,
-             const butil::FlatMap<SlotId, size_t>& tuple_map)
+Chunk::Chunk(Columns columns, const SlotHashMap& slot_map, const TupleHashMap& tuple_map)
        : _columns(std::move(columns)), _slot_id_to_index(slot_map), _tuple_id_to_index(tuple_map) {
    // when use _slot_id_to_index, we don't need to rebuild_cid_index
 }
@ -68,7 +66,7 @@ std::string Chunk::get_column_name(size_t idx) const {
 }

 void Chunk::append_column(ColumnPtr column, const FieldPtr& field) {
-    DCHECK(_cid_to_index.seek(field->id()) == nullptr);
+    DCHECK(!_cid_to_index.contains(field->id()));
    _cid_to_index[field->id()] = _columns.size();
    _columns.emplace_back(std::move(column));
    _schema->append(field);
@ -138,7 +136,8 @@ size_t Chunk::serialize_size() const {
    return size;
 }

-void Chunk::serialize(uint8_t* dst) const {
+size_t Chunk::serialize(uint8_t* dst) const {
+    uint8_t* head = dst;
    uint32_t version = 1;
    encode_fixed32_le(dst, version);
    dst += sizeof(uint32_t);
@ -149,6 +148,7 @@ void Chunk::serialize(uint8_t* dst) const {
    for (const auto& column : _columns) {
        dst = column->serialize_column(dst);
    }
+    return dst - head;
 }

 size_t Chunk::serialize_with_meta(starrocks::ChunkPB* chunk) const {
@ -182,15 +182,17 @@ size_t Chunk::serialize_with_meta(starrocks::ChunkPB* chunk) const {

    size_t size = serialize_size();
    chunk->mutable_data()->resize(size);
-    serialize((uint8_t*)chunk->mutable_data()->data());
+    size_t written_size = serialize((uint8_t*)chunk->mutable_data()->data());
+    chunk->set_serialized_size(written_size);
    return size;
 }

-Status Chunk::deserialize(const uint8_t* src, size_t len, const RuntimeChunkMeta& meta) {
+Status Chunk::deserialize(const uint8_t* src, size_t len, const RuntimeChunkMeta& meta, size_t serialized_size) {
    _slot_id_to_index = meta.slot_id_to_index;
    _tuple_id_to_index = meta.tuple_id_to_index;
    _columns.resize(_slot_id_to_index.size() + _tuple_id_to_index.size());

+    const uint8_t* head = src;
    uint32_t version = decode_fixed32_le(src);
    DCHECK_EQ(version, 1);
    src += sizeof(uint32_t);
@ -206,10 +208,23 @@ Status Chunk::deserialize(const uint8_t* src, size_t len, const RuntimeChunkMeta
        src = column->deserialize_column(src);
    }

-    size_t except = serialize_size();
-    if (UNLIKELY(len != except)) {
-        return Status::InternalError(
-                strings::Substitute("deserialize chunk data failed. len: $0, except: $1", len, except));
+    // The logic is a bit confusing here.
+    // `len` and `expected` are both "estimated" serialized size. it could be larger than real serialized size.
+    // `serialized_size` and `read_size` are both "real" serialized size. it's exactly how much bytes are written into buffer.
+    // For some object column types like bitmap/hll/percentile, "estimated" and "real" are not always the same.
+    // And for bitmap, sometimes `len` and `expected` are different. So to fix that problem, we fallback to compare "real" serialized size.
+
+    // We compare "real" serialized size first. It may fails because of backward compatibility. For old version of BE,
+    // there is no "serialized_size" this field(which means the value is zero), and we fallback to compare "estimated" serialized size.
+    // And for new version of BE, the "real" serialized size always matches, and we can save the cost of calling `serialzied_size`.
+    size_t read_size = src - head;
+    if (UNLIKELY(read_size != serialized_size)) {
+        size_t expected = serialize_size();
+        if (UNLIKELY(len != expected)) {
+            return Status::InternalError(strings::Substitute(
+                    "deserialize chunk data failed. len: $0, expected: $1, ser_size: $2, deser_size: $3", len, expected,
+                    serialized_size, read_size));
+        }
    }
    DCHECK_EQ(rows, num_rows());
    return Status::OK();
--- a/be/src/column/chunk.h
+++ b/be/src/column/chunk.h
@ -6,8 +6,10 @@

 #include "butil/containers/flat_map.h"
 #include "column/column.h"
+#include "column/column_hash.h"
 #include "column/schema.h"
 #include "common/global_types.h"
+#include "util/phmap/phmap.h"

 namespace starrocks {
 class ChunkPB;
@ -18,12 +20,14 @@ class DatumTuple;
 class Chunk {
 public:
    using ChunkPtr = std::shared_ptr<Chunk>;
+    using SlotHashMap = phmap::flat_hash_map<SlotId, size_t, StdHash<SlotId>>;
+    using ColumnIdHashMap = phmap::flat_hash_map<ColumnId, size_t, StdHash<SlotId>>;
+    using TupleHashMap = phmap::flat_hash_map<TupleId, size_t, StdHash<TupleId>>;

    Chunk();
    Chunk(Columns columns, SchemaPtr schema);
-    Chunk(Columns columns, const butil::FlatMap<SlotId, size_t>& slot_map);
-    Chunk(Columns columns, const butil::FlatMap<SlotId, size_t>& slot_map,
-          const butil::FlatMap<TupleId, size_t>& tuple_map);
+    Chunk(Columns columns, const SlotHashMap& slot_map);
+    Chunk(Columns columns, const SlotHashMap& slot_map, const TupleHashMap& tuple_map);

    Chunk(Chunk&& other) = default;
    Chunk& operator=(Chunk&& other) = default;
@ -97,8 +101,8 @@ public:
    ColumnPtr& get_column_by_slot_id(SlotId slot_id);

    void set_slot_id_to_index(SlotId slot_id, size_t idx) { _slot_id_to_index[slot_id] = idx; }
-    bool is_slot_exist(SlotId id) const { return _slot_id_to_index.seek(id) != nullptr; }
-    bool is_tuple_exist(TupleId id) const { return _tuple_id_to_index.seek(id) != nullptr; }
+    bool is_slot_exist(SlotId id) const { return _slot_id_to_index.contains(id); }
+    bool is_tuple_exist(TupleId id) const { return _tuple_id_to_index.contains(id); }
    void reset_slot_id_to_index() { _slot_id_to_index.clear(); }

    void set_columns(const Columns& columns) { _columns = columns; }
@ -119,10 +123,10 @@ public:
    //     ...
    //     column n data
    // Note: You should ensure the dst buffer size is enough
-    void serialize(uint8_t* dst) const;
+    size_t serialize(uint8_t* dst) const;

    // Deserialize chunk by |src| (chunk data) and |meta| (chunk meta)
-    Status deserialize(const uint8_t* src, size_t len, const RuntimeChunkMeta& meta);
+    Status deserialize(const uint8_t* src, size_t len, const RuntimeChunkMeta& meta, size_t serialized_size);

    // Create an empty chunk with the same meta and reserve it of size chunk _num_rows
    // not clone tuple column
@ -182,8 +186,8 @@ public:

    DelCondSatisfied delete_state() const { return _delete_state; }

-    const butil::FlatMap<TupleId, size_t>& get_tuple_id_to_index_map() const { return _tuple_id_to_index; }
-    const butil::FlatMap<SlotId, size_t>& get_slot_id_to_index_map() const { return _slot_id_to_index; }
+    const TupleHashMap& get_tuple_id_to_index_map() const { return _tuple_id_to_index; }
+    const SlotHashMap& get_slot_id_to_index_map() const { return _slot_id_to_index; }

    // Call `Column::reserve` on each column of |chunk|, with |cap| passed as argument.
    void reserve(size_t cap);
@ -248,10 +252,10 @@ private:

    Columns _columns;
    std::shared_ptr<Schema> _schema;
-    butil::FlatMap<ColumnId, size_t> _cid_to_index;
+    ColumnIdHashMap _cid_to_index;
    // For compatibility
-    butil::FlatMap<SlotId, size_t> _slot_id_to_index;
-    butil::FlatMap<TupleId, size_t> _tuple_id_to_index;
+    SlotHashMap _slot_id_to_index;
+    TupleHashMap _tuple_id_to_index;
    DelCondSatisfied _delete_state = DEL_NOT_SATISFIED;
 };

@ -289,7 +293,7 @@ inline const ColumnPtr& Chunk::get_column_by_id(ColumnId cid) const {

 inline ColumnPtr& Chunk::get_column_by_id(ColumnId cid) {
    DCHECK(!_cid_to_index.empty());
-    DCHECK(_cid_to_index.seek(cid) != nullptr);
+    DCHECK(_cid_to_index.contains(cid));
    return _columns[_cid_to_index[cid]];
 }

@ -307,8 +311,8 @@ struct RuntimeChunkMeta {
    std::vector<TypeDescriptor> types;
    std::vector<bool> is_nulls;
    std::vector<bool> is_consts;
-    butil::FlatMap<SlotId, size_t> slot_id_to_index;
-    butil::FlatMap<TupleId, size_t> tuple_id_to_index;
+    Chunk::SlotHashMap slot_id_to_index;
+    Chunk::TupleHashMap tuple_id_to_index;
 };

 } // namespace vectorized
--- a/be/src/column/column_hash.h
+++ b/be/src/column/column_hash.h
@ -2,6 +2,8 @@

 #pragma once

+#include <cstdint>
+
 #if defined(__x86_64__)
 #include <nmmintrin.h>
 #endif
@ -275,4 +277,25 @@ inline uint64_t crc_hash_uint128(uint64_t value0, uint64_t value1, uint64_t seed
    return hash;
 }

+// https://github.com/HowardHinnant/hash_append/issues/7
+template <typename T>
+inline void hash_combine(uint64_t& seed, const T& val) {
+    seed ^= std::hash<T>{}(val) + 0x9e3779b97f4a7c15LLU + (seed << 12) + (seed >> 4);
+}
+
+inline uint64_t hash_128(uint64_t seed, int128_t val) {
+    size_t low = val;
+    size_t high = val >> 64;
+    hash_combine(seed, low);
+    hash_combine(seed, high);
+    return seed;
+}
+
+template <PhmapSeed seed>
+struct Hash128WithSeed {
+    std::size_t operator()(int128_t value) const {
+        return phmap_mix_with_seed<sizeof(size_t), seed>()(hash_128(seed, value));
+    }
+};
+
 } // namespace starrocks::vectorized
--- a/be/src/column/column_helper.cpp
+++ b/be/src/column/column_helper.cpp
@ -9,6 +9,7 @@
 #include "gutil/casts.h"
 #include "runtime/types.h"
 #include "simd/simd.h"
+#include "util/date_func.h"

 namespace starrocks::vectorized {

@ -319,4 +320,38 @@ size_t ColumnHelper::compute_bytes_size(ColumnsConstIterator const& begin, Colum
    return n;
 }

+ColumnPtr ColumnHelper::convert_time_column_from_double_to_str(const ColumnPtr& column) {
+    auto get_binary_column = [](DoubleColumn* data_column, size_t size) -> ColumnPtr {
+        auto new_data_column = BinaryColumn::create();
+        new_data_column->reserve(size);
+
+        for (int row = 0; row < size; ++row) {
+            auto time = data_column->get_data()[row];
+            std::string time_str = time_str_from_double(time);
+            new_data_column->append(time_str);
+        }
+
+        return new_data_column;
+    };
+
+    ColumnPtr res;
+
+    if (column->only_null()) {
+        res = column;
+    } else if (column->is_nullable()) {
+        auto* nullable_column = down_cast<NullableColumn*>(column.get());
+        auto* data_column = down_cast<DoubleColumn*>(nullable_column->mutable_data_column());
+        res = NullableColumn::create(get_binary_column(data_column, column->size()), nullable_column->null_column());
+    } else if (column->is_constant()) {
+        auto* const_column = down_cast<vectorized::ConstColumn*>(column.get());
+        string time_str = time_str_from_double(const_column->get(0).get_double());
+        res = vectorized::ColumnHelper::create_const_column<TYPE_VARCHAR>(time_str, column->size());
+    } else {
+        auto* data_column = down_cast<DoubleColumn*>(column.get());
+        res = get_binary_column(data_column, column->size());
+    }
+
+    return res;
+}
+
 } // namespace starrocks::vectorized
--- a/be/src/column/column_helper.h
+++ b/be/src/column/column_helper.h
@ -299,6 +299,8 @@ public:

    static ColumnPtr create_const_null_column(size_t chunk_size);

+    static ColumnPtr convert_time_column_from_double_to_str(const ColumnPtr& column);
+
    static NullColumnPtr one_size_not_null_column;

    static NullColumnPtr one_size_null_column;
--- a/be/src/column/column_pool.h
+++ b/be/src/column/column_pool.h
@ -25,23 +25,6 @@ DIAGNOSTIC_POP

 namespace starrocks::vectorized {

-// Before a thread exit, the corresponding thread-local column pool will be destroyed and the following
-// bvar's will be updated too. This is ok in the production environment, because no column pool exists
-// in the main thread, in other words, accessing a bvar is safe when destroying a thread-local column pool.
-// But it's NOT true when running unit tests, because unit tests usually run in the main thread and a local
-// column pool will be created in the main thread. When destroying the column pool in the main thread, the
-// bvar's may have been destroyed, so it's unsafe to update bvar when destroying a column pool.
-// To work around this, we simply do not update bvar's in unit tests.
-inline bvar::Adder<uint64_t> g_column_pool_oversized_columns("column_pool", "oversized_columns");
-inline bvar::Adder<int64_t> g_column_pool_total_local_bytes("column_pool", "total_local_bytes");
-inline bvar::Adder<int64_t> g_column_pool_total_central_bytes("column_pool", "total_central_bytes");
-
-#ifndef BE_TEST
-#define UPDATE_BVAR(bvar_name, value) (bvar_name) << (value)
-#else
-#define UPDATE_BVAR(bvar_name, value) (void)(value) /* avoid compiler warning: unused variable */
-#endif
-
 template <typename T>
 struct ColumnPoolBlockSize {
    static const size_t value = 256;
@ -126,22 +109,18 @@ class CACHELINE_ALIGNED ColumnPool {
        }

        ~LocalPool() {
-            auto freed_bytes = _curr_free.bytes;
            if (_curr_free.nfree > 0 && !_pool->_push_free_block(_curr_free)) {
                for (size_t i = 0; i < _curr_free.nfree; i++) {
                    ASAN_UNPOISON_MEMORY_REGION(_curr_free.ptrs[i], sizeof(T));
                    delete _curr_free.ptrs[i];
                }
            }
-            UPDATE_BVAR(g_column_pool_total_local_bytes, -freed_bytes);
            _pool->_clear_from_destructor_of_local_pool();
        }

        inline T* get_object() {
            if (_curr_free.nfree == 0) {
-                if (_pool->_pop_free_block(&_curr_free)) {
-                    UPDATE_BVAR(g_column_pool_total_local_bytes, _curr_free.bytes);
-                } else {
+                if (!_pool->_pop_free_block(&_curr_free)) {
                    return nullptr;
                }
            }
@ -149,7 +128,6 @@ class CACHELINE_ALIGNED ColumnPool {
            ASAN_UNPOISON_MEMORY_REGION(obj, sizeof(T));
            auto bytes = column_bytes(obj);
            _curr_free.bytes -= bytes;
-            UPDATE_BVAR(g_column_pool_total_local_bytes, -bytes);

            tls_thread_status.mem_consume(bytes);
            _pool->mem_tracker()->release(bytes);
@ -159,7 +137,6 @@ class CACHELINE_ALIGNED ColumnPool {

        inline void return_object(T* ptr) {
            if (UNLIKELY(column_reserved_size(ptr) > config::vector_chunk_size)) {
-                UPDATE_BVAR(g_column_pool_oversized_columns, 1);
                delete ptr;
                return;
            }
@ -172,12 +149,10 @@ class CACHELINE_ALIGNED ColumnPool {
                tls_thread_status.mem_release(bytes);
                _pool->mem_tracker()->consume(bytes);

-                UPDATE_BVAR(g_column_pool_total_local_bytes, bytes);
                return;
            }
            if (_pool->_push_free_block(_curr_free)) {
                ASAN_POISON_MEMORY_REGION(ptr, sizeof(T));
-                UPDATE_BVAR(g_column_pool_total_local_bytes, -_curr_free.bytes);
                _curr_free.nfree = 1;
                _curr_free.ptrs[0] = ptr;
                _curr_free.bytes = bytes;
@ -185,7 +160,6 @@ class CACHELINE_ALIGNED ColumnPool {
                tls_thread_status.mem_release(bytes);
                _pool->mem_tracker()->consume(bytes);

-                UPDATE_BVAR(g_column_pool_total_local_bytes, bytes);
                return;
            }
            delete ptr;
@ -199,7 +173,6 @@ class CACHELINE_ALIGNED ColumnPool {
                ASAN_POISON_MEMORY_REGION(_curr_free.ptrs[i], sizeof(T));
            }
            _curr_free.bytes -= freed_bytes;
-            UPDATE_BVAR(g_column_pool_total_local_bytes, -freed_bytes);
        }

        static inline void delete_local_pool(void* arg) { delete (LocalPool*)arg; }
@ -263,7 +236,6 @@ public:
        _mem_tracker->release(freed_bytes);
        tls_thread_status.mem_consume(freed_bytes);

-        UPDATE_BVAR(g_column_pool_total_central_bytes, -freed_bytes);
        return freed_bytes;
    }

@ -352,7 +324,6 @@ private:
        if (UNLIKELY(p == nullptr)) {
            return false;
        }
-        UPDATE_BVAR(g_column_pool_total_central_bytes, blk.bytes);
        p->nfree = blk.nfree;
        p->bytes = blk.bytes;
        memcpy(p->ptrs, blk.ptrs, sizeof(*blk.ptrs) * blk.nfree);
@ -378,7 +349,6 @@ private:
        blk->nfree = p->nfree;
        blk->bytes = p->bytes;
        free(p);
-        UPDATE_BVAR(g_column_pool_total_central_bytes, -blk->bytes);
        return true;
    }

--- a/be/src/column/column_viewer.h
+++ b/be/src/column/column_viewer.h
@ -35,6 +35,7 @@ static inline size_t null_mask(const ColumnPtr& column) {
 template <PrimitiveType Type>
 class ColumnViewer {
 public:
+    static auto constexpr TYPE = Type;
    explicit ColumnViewer(const ColumnPtr& column)
            : _not_const_mask(not_const_mask(column)), _null_mask(null_mask(column)) {
        if (column->only_null()) {
--- a/be/src/column/datum_convert.cpp
+++ b/be/src/column/datum_convert.cpp
@ -72,9 +72,7 @@ Status datum_from_string(TypeInfo* type_info, Datum* dst, const std::string& str
            slice.data = (char*)str.data();
        } else {
            slice.data = reinterpret_cast<char*>(mem_pool->allocate(slice.size));
-            if (UNLIKELY(slice.data == nullptr)) {
-                return Status::InternalError("Mem usage has exceed the limit of BE");
-            }
+            RETURN_IF_UNLIKELY_NULL(slice.data, Status::MemoryAllocFailed("alloc mem for varchar field failed"));
            memcpy(slice.data, str.data(), slice.size);
        }
        // If type is OLAP_FIELD_TYPE_CHAR, strip its tailing '\0'
--- a/be/src/column/field.h
+++ b/be/src/column/field.h
@ -2,10 +2,12 @@

 #pragma once

+#include <memory>
 #include <string>
 #include <utility>

 #include "column/vectorized_fwd.h"
+#include "storage/olap_common.h"
 #include "storage/types.h"

 namespace starrocks::vectorized {
@ -78,6 +80,13 @@ public:

    ColumnPtr create_column() const;

+    static FieldPtr convert_to_dict_field(const Field& field) {
+        DCHECK(field.type()->type() == OLAP_FIELD_TYPE_VARCHAR);
+        FieldPtr res = std::make_shared<Field>(field);
+        res->_type = get_type_info(OLAP_FIELD_TYPE_INT);
+        return res;
+    }
+
 private:
    Field() = default;

--- a/be/src/column/hash_set.h
+++ b/be/src/column/hash_set.h
@ -2,6 +2,8 @@

 #pragma once

+#include <cstdint>
+
 #include "column/column_hash.h"
 #include "util/phmap/phmap.h"
 #include "util/phmap/phmap_dump.h"
@ -113,18 +115,10 @@ class FixedSizeSliceKeyHash {
 public:
    std::size_t operator()(const SliceKey& s) const {
        if constexpr (sizeof(SliceKey) == 8) {
-            if constexpr (seed == PhmapSeed1) {
-                return crc_hash_uint64(s.u.value, CRC_HASH_SEED1);
-            } else {
-                return crc_hash_uint64(s.u.value, CRC_HASH_SEED2);
-            }
+            return phmap_mix_with_seed<sizeof(size_t), seed>()(std::hash<size_t>()(s.u.value));
        } else {
-            static_assert(sizeof(SliceKey) == 16);
-            if constexpr (seed == PhmapSeed1) {
-                return crc_hash_uint128(s.u.ui64[0], s.u.ui64[1], CRC_HASH_SEED1);
-            } else {
-                return crc_hash_uint128(s.u.ui64[0], s.u.ui64[1], CRC_HASH_SEED2);
-            }
+            static_assert(sizeof(s.u.value) == 16);
+            return Hash128WithSeed<seed>()(s.u.value);
        }
    }
 };
--- a/be/src/common/CMakeLists.txt
+++ b/be/src/common/CMakeLists.txt
@ -26,7 +26,6 @@ add_library(Common STATIC
  daemon.cpp
  status.cpp
  statusor.cpp
-  resource_tls.cpp
  logconfig.cpp
  configbase.cpp
 )
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@ -70,7 +70,7 @@ CONF_Int64(tc_max_total_thread_cache_bytes, "1073741824");
 // defaults to bytes if no unit is given"
 // must larger than 0. and if larger than physical memory size,
 // it will be set to physical memory size.
-CONF_String(mem_limit, "80%");
+CONF_String(mem_limit, "90%");

 // the port heartbeat service used
 CONF_Int32(heartbeat_service_port, "9050");
@ -456,7 +456,11 @@ CONF_mInt64(write_buffer_size, "104857600");
 // user should set these configs properly if necessary.
 CONF_Int64(load_process_max_memory_limit_bytes, "107374182400"); // 100GB
 CONF_Int32(load_process_max_memory_limit_percent, "30");         // 30%
-CONF_Int64(compaction_mem_limit, "2147483648");                  // 2G
+CONF_Int64(compaction_max_memory_limit, "-1");
+CONF_Int32(compaction_max_memory_limit_percent, "100");
+CONF_Int64(compaction_memory_limit_per_worker, "2147483648"); // 2GB
+CONF_String(consistency_max_memory_limit, "10G");
+CONF_Int32(consistency_max_memory_limit_percent, "20");

 // update interval of tablet stat cache
 CONF_mInt32(tablet_stat_cache_update_interval_second, "300");
@ -644,6 +648,12 @@ CONF_Int64(pipeline_io_buffer_size, "64");
 CONF_Int16(bitmap_serialize_version, "1");
 // schema change vectorized
 CONF_Bool(enable_schema_change_vectorized, "true");
+// max hdfs file handle
+CONF_mInt32(max_hdfs_file_handle, "1000");
+// buffer stream reserve size
+// each column will reserve buffer_stream_reserve_size bytes for read
+// default: 8M
+CONF_mInt32(buffer_stream_reserve_size, "8192000");

 } // namespace config

--- a/be/src/common/daemon.cpp
+++ b/be/src/common/daemon.cpp
@ -68,16 +68,13 @@ private:
    size_t _freed_bytes = 0;
 };

-void* tcmalloc_gc_thread(void* dummy) {
+void gc_tcmalloc_memory(void* arg_this) {
    using namespace starrocks::vectorized;
    const static float kFreeRatio = 0.5;
    GCHelper gch(config::tc_gc_period, config::memory_maintenance_sleep_time_s, MonoTime::Now());
-    StorageEngine* storage_engine = ExecEnv::GetInstance()->storage_engine();
-    bool bg_worker_stopped = false;
-    if (storage_engine != nullptr) {
-        bg_worker_stopped = storage_engine->bg_worker_stopped();
-    }
-    while (!bg_worker_stopped) {
+
+    Daemon* daemon = static_cast<Daemon*>(arg_this);
+    while (!daemon->stopped()) {
        sleep(config::memory_maintenance_sleep_time_s);
 #if !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER)
        MallocExtension::instance()->MarkThreadBusy();
@ -112,14 +109,7 @@ void* tcmalloc_gc_thread(void* dummy) {
        }
        MallocExtension::instance()->MarkThreadIdle();
 #endif
-
-        storage_engine = ExecEnv::GetInstance()->storage_engine();
-        if (storage_engine != nullptr) {
-            bg_worker_stopped = storage_engine->bg_worker_stopped();
-        }
    }
-
-    return nullptr;
 }

 /*
@ -130,7 +120,7 @@ void* tcmalloc_gc_thread(void* dummy) {
 * 4. max network send bytes rate
 * 5. max network receive bytes rate
 */
-void* calculate_metrics(void* dummy) {
+void calculate_metrics(void* arg_this) {
    int64_t last_ts = -1L;
    int64_t lst_push_bytes = -1;
    int64_t lst_query_bytes = -1;
@ -139,12 +129,8 @@ void* calculate_metrics(void* dummy) {
    std::map<std::string, int64_t> lst_net_send_bytes;
    std::map<std::string, int64_t> lst_net_receive_bytes;

-    StorageEngine* storage_engine = ExecEnv::GetInstance()->storage_engine();
-    bool bg_worker_stopped = false;
-    if (storage_engine != nullptr) {
-        bg_worker_stopped = storage_engine->bg_worker_stopped();
-    }
-    while (!bg_worker_stopped) {
+    Daemon* daemon = static_cast<Daemon*>(arg_this);
+    while (!daemon->stopped()) {
        StarRocksMetrics::instance()->metrics()->trigger_hook();

        if (last_ts == -1L) {
@ -190,13 +176,7 @@ void* calculate_metrics(void* dummy) {
        }

        sleep(15); // 15 seconds
-        storage_engine = ExecEnv::GetInstance()->storage_engine();
-        if (storage_engine != nullptr) {
-            bg_worker_stopped = storage_engine->bg_worker_stopped();
-        }
    }
-
-    return nullptr;
 }

 static void init_starrocks_metrics(const std::vector<StorePath>& store_paths) {
@ -221,11 +201,6 @@ static void init_starrocks_metrics(const std::vector<StorePath>& store_paths) {
        }
    }
    StarRocksMetrics::instance()->initialize(paths, init_system_metrics, disk_devices, network_interfaces);
-
-    if (config::enable_metric_calculator) {
-        pthread_t calculator_pid;
-        pthread_create(&calculator_pid, nullptr, calculate_metrics, nullptr);
-    }
 }

 void sigterm_handler(int signo) {
@ -268,7 +243,7 @@ void init_minidump() {
 #endif
 }

-void init_daemon(int argc, char** argv, const std::vector<StorePath>& paths) {
+void Daemon::init(int argc, char** argv, const std::vector<StorePath>& paths) {
    // google::SetVersionString(get_build_version(false));
    // google::ParseCommandLineFlags(&argc, &argv, true);
    google::ParseCommandLineFlags(&argc, &argv, true);
@ -280,20 +255,41 @@ void init_daemon(int argc, char** argv, const std::vector<StorePath>& paths) {
    CpuInfo::init();
    DiskInfo::init();
    MemInfo::init();
+    LOG(INFO) << CpuInfo::debug_string();
+    LOG(INFO) << DiskInfo::debug_string();
+    LOG(INFO) << MemInfo::debug_string();
+
    UserFunctionCache::instance()->init(config::user_function_dir);

    vectorized::ColumnHelper::init_static_variable();
    vectorized::date::init_date_cache();

-    pthread_t tc_malloc_pid;
-    pthread_create(&tc_malloc_pid, nullptr, tcmalloc_gc_thread, nullptr);
+    std::thread tcmalloc_gc_thread(gc_tcmalloc_memory, this);
+    _daemon_threads.emplace_back(std::move(tcmalloc_gc_thread));

-    LOG(INFO) << CpuInfo::debug_string();
-    LOG(INFO) << DiskInfo::debug_string();
-    LOG(INFO) << MemInfo::debug_string();
    init_starrocks_metrics(paths);
+
+    if (config::enable_metric_calculator) {
+        std::thread calculate_metrics_thread(calculate_metrics, this);
+        _daemon_threads.emplace_back(std::move(calculate_metrics_thread));
+    }
+
    init_signals();
    init_minidump();
 }

+void Daemon::stop() {
+    _stopped.store(true, std::memory_order_release);
+    int thread_size = _daemon_threads.size();
+    for (int i = 0; i < thread_size; ++i) {
+        if (_daemon_threads[i].joinable()) {
+            _daemon_threads[i].join();
+        }
+    }
+}
+
+bool Daemon::stopped() {
+    return _stopped.load(std::memory_order_consume);
+}
+
 } // namespace starrocks
--- a/be/src/common/daemon.h
+++ b/be/src/common/daemon.h
@ -22,16 +22,29 @@
 #ifndef STARROCKS_BE_SRC_COMMON_COMMON_DAEMON_H
 #define STARROCKS_BE_SRC_COMMON_COMMON_DAEMON_H

+#include <atomic>
+#include <thread>
 #include <vector>

 #include "storage/options.h"

 namespace starrocks {

-// Initialises logging, flags etc. Callers that want to override default gflags
-// variables should do so before calling this method; no logging should be
-// performed until after this method returns.
-void init_daemon(int argc, char** argv, const std::vector<StorePath>& paths);
+class Daemon {
+public:
+    Daemon() = default;
+    ~Daemon() = default;
+
+    void init(int argc, char** argv, const std::vector<StorePath>& paths);
+    void stop();
+    bool stopped();
+
+private:
+    std::atomic<bool> _stopped{false};
+
+    std::vector<std::thread> _daemon_threads;
+    DISALLOW_COPY_AND_ASSIGN(Daemon);
+};

 } // namespace starrocks

--- a/be/src/common/logging.h
+++ b/be/src/common/logging.h
@ -79,7 +79,7 @@ class TUniqueId;
 }

 // Print log with query id.
-#define QUERY_LOG(level) LOG(level) << "[" << CurrentThread::query_id_string() << "] "
-#define QUERY_LOG_IF(level, cond) LOG_IF(level, cond) << "[" << tls_thread_status.query_id_string() << "] "
+#define QUERY_LOG(level) LOG(level) << "[" << tls_thread_status.query_id() << "] "
+#define QUERY_LOG_IF(level, cond) LOG_IF(level, cond) << "[" << tls_thread_status.query_id() << "] "

 #endif
--- a/be/src/common/resource_tls.cpp
+++ b/be/src/common/resource_tls.cpp
@ -1,71 +0,0 @@
-// This file is made available under Elastic License 2.0.
-// This file is based on code available under the Apache license here:
-//   https://github.com/apache/incubator-doris/blob/master/be/src/common/resource_tls.cpp
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "common/resource_tls.h"
-
-#include <pthread.h>
-
-#include "common/logging.h"
-#include "gen_cpp/Types_types.h"
-
-namespace starrocks {
-
-static pthread_key_t s_resource_key;
-static bool s_is_init = false;
-
-static void resource_destructor(void* value) {
-    TResourceInfo* info = (TResourceInfo*)value;
-    if (info == nullptr) {
-        delete info;
-    }
-}
-
-void ResourceTls::init() {
-    int ret = pthread_key_create(&s_resource_key, resource_destructor);
-    if (ret != 0) {
-        LOG(ERROR) << "create pthread key for resource failed.";
-        return;
-    }
-    s_is_init = true;
-}
-
-TResourceInfo* ResourceTls::get_resource_tls() {
-    if (!s_is_init) {
-        return nullptr;
-    }
-    return (TResourceInfo*)pthread_getspecific(s_resource_key);
-}
-
-int ResourceTls::set_resource_tls(TResourceInfo* info) {
-    if (!s_is_init) {
-        return -1;
-    }
-    TResourceInfo* old_info = (TResourceInfo*)pthread_getspecific(s_resource_key);
-
-    int ret = pthread_setspecific(s_resource_key, info);
-    if (ret == 0) {
-        // OK, now we delete old one
-        delete old_info;
-    }
-    return ret;
-}
-
-} // namespace starrocks
--- a/be/src/common/resource_tls.h
+++ b/be/src/common/resource_tls.h
@ -1,37 +0,0 @@
-// This file is made available under Elastic License 2.0.
-// This file is based on code available under the Apache license here:
-//   https://github.com/apache/incubator-doris/blob/master/be/src/common/resource_tls.h
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef STARROCKS_BE_SRC_COMMON_COMMON_RESOURCE_TLS_H
-#define STARROCKS_BE_SRC_COMMON_COMMON_RESOURCE_TLS_H
-
-namespace starrocks {
-
-class TResourceInfo;
-class ResourceTls {
-public:
-    static void init();
-    static TResourceInfo* get_resource_tls();
-    static int set_resource_tls(TResourceInfo*);
-};
-
-} // namespace starrocks
-
-#endif
--- a/be/src/common/status.h
+++ b/be/src/common/status.h
@ -324,4 +324,25 @@ inline std::ostream& operator<<(std::ostream& os, const Status& st) {
        }                    \
    } while (0)

+#define RETURN_IF_UNLIKELY_NULL(ptr, ret) \
+    do {                                  \
+        if (UNLIKELY(ptr == nullptr)) {   \
+            return ret;                   \
+        }                                 \
+    } while (0)
+
+#define RETURN_IF_UNLIKELY(cond, ret) \
+    do {                              \
+        if (UNLIKELY(cond)) {         \
+            return ret;               \
+        }                             \
+    } while (0)
+
+#define THROW_BAD_ALLOC_IF_NULL(ptr)    \
+    do {                                \
+        if (UNLIKELY(ptr == nullptr)) { \
+            throw std::bad_alloc();     \
+        }                               \
+    } while (0)
+
 #define WARN_UNUSED_RESULT __attribute__((warn_unused_result))
--- a/be/src/env/env_hdfs.cpp
+++ b/be/src/env/env_hdfs.cpp
@ -3,13 +3,40 @@
 #include "env/env_hdfs.h"

 #include "env/env.h"
+#include "fmt/core.h"
 #include "gutil/strings/substitute.h"
+#include "hdfs/hdfs.h"
 #include "util/hdfs_util.h"

 namespace starrocks {

-HdfsRandomAccessFile::HdfsRandomAccessFile(hdfsFS fs, hdfsFile file, std::string filename)
-        : _fs(fs), _file(file), _filename(std::move(filename)) {}
+HdfsRandomAccessFile::HdfsRandomAccessFile(hdfsFS fs, std::string filename)
+        : _opened(false), _fs(fs), _file(nullptr), _filename(std::move(filename)) {}
+
+HdfsRandomAccessFile::~HdfsRandomAccessFile() noexcept {
+    close();
+}
+
+Status HdfsRandomAccessFile::open() {
+    DCHECK(!_opened);
+    if (_fs) {
+        _file = hdfsOpenFile(_fs, _filename.c_str(), O_RDONLY, 0, 0, 0);
+        if (_file == nullptr) {
+            return Status::InternalError(fmt::format("open file failed, file={}", _filename));
+        }
+    }
+    _opened = true;
+    return Status::OK();
+}
+
+void HdfsRandomAccessFile::close() noexcept {
+    if (_opened) {
+        if (_fs && _file) {
+            hdfsCloseFile(_fs, _file);
+        }
+        _opened = false;
+    }
+}

 static Status read_at_internal(hdfsFS fs, hdfsFile file, const std::string& file_name, int64_t offset, Slice* res) {
    auto cur_offset = hdfsTell(fs, file);
@ -40,11 +67,13 @@ static Status read_at_internal(hdfsFS fs, hdfsFile file, const std::string& file
 }

 Status HdfsRandomAccessFile::read(uint64_t offset, Slice* res) const {
+    DCHECK(_opened);
    RETURN_IF_ERROR(read_at_internal(_fs, _file, _filename, offset, res));
    return Status::OK();
 }

 Status HdfsRandomAccessFile::read_at(uint64_t offset, const Slice& res) const {
+    DCHECK(_opened);
    Slice slice = res;
    RETURN_IF_ERROR(read_at_internal(_fs, _file, _filename, offset, &slice));
    if (slice.size != res.size) {
--- a/be/src/env/env_hdfs.h
+++ b/be/src/env/env_hdfs.h
@ -12,9 +12,11 @@ namespace starrocks {
 // Now this is not thread-safe.
 class HdfsRandomAccessFile : public RandomAccessFile {
 public:
-    HdfsRandomAccessFile(hdfsFS fs, hdfsFile file, std::string filename);
-    ~HdfsRandomAccessFile() override = default;
+    HdfsRandomAccessFile(hdfsFS fs, std::string filename);
+    virtual ~HdfsRandomAccessFile() noexcept;

+    Status open();
+    void close() noexcept;
    Status read(uint64_t offset, Slice* res) const override;
    Status read_at(uint64_t offset, const Slice& res) const override;
    Status readv_at(uint64_t offset, const Slice* res, size_t res_cnt) const override;
@ -25,6 +27,7 @@ public:
    hdfsFile hdfs_file() const { return _file; }

 private:
+    bool _opened;
    hdfsFS _fs;
    hdfsFile _file;
    std::string _filename;
--- a/be/src/exec/es/es_predicate.cpp
+++ b/be/src/exec/es/es_predicate.cpp
@ -314,6 +314,12 @@ Status EsPredicate::_build_binary_predicate(const Expr* conjunct, bool* handled)
        // how to process literal
        auto literal = _pool->add(new VExtLiteral(expr->type().type, _context->evaluate(expr, nullptr)));
        std::string col = slot_desc->col_name();
+
+        // ES does not support non-bool literal pushdown for bool type
+        if (column_ref->type().type == TYPE_BOOLEAN && expr->type().type != TYPE_BOOLEAN) {
+            return Status::InternalError("ES does not support non-bool literal pushdown");
+        }
+
        if (_field_context.find(col) != _field_context.end()) {
            col = _field_context[col];
        }
--- a/be/src/exec/olap_common.h
+++ b/be/src/exec/olap_common.h
@ -37,6 +37,7 @@
 #include "exec/scan_node.h"
 #include "gen_cpp/PlanNodes_types.h"
 #include "gutil/stl_util.h"
+#include "gutil/strings/substitute.h"
 #include "runtime/date_value.hpp"
 #include "runtime/datetime_value.h"
 #include "runtime/descriptors.h"
@ -127,6 +128,10 @@ inline void increase(vectorized::TimestampValue& value) {

 } // namespace helper

+// There are two types of value range: Fixed Value Range and Range Value Range
+// I know "Range Value Range" sounds bad, but it's hard to turn over the de facto.
+// Fixed Value Range means discrete values in the set, like "IN (1,2,3)"
+// Range Value Range means range values like ">= 10 && <= 20"
 /**
 * @brief Column's value range
 **/
@ -167,13 +172,6 @@ public:

    void convert_to_range_value();

-    void set_empty_value_range() {
-        _fixed_values.clear();
-        _low_value = _type_max;
-        _high_value = _type_min;
-        _fixed_op = FILTER_IN;
-    }
-
    const std::set<T>& get_fixed_value_set() const { return _fixed_values; }

    T get_range_max_value() const { return _high_value; }
@ -194,37 +192,35 @@ public:

    void set_index_filter_only(bool is_index_only) { _is_index_filter_only = is_index_only; }

-    bool empty_range() { return _empty_range; }
-
    void to_olap_filter(std::vector<TCondition>& filters) {
-        if (is_fixed_value_range() && _fixed_op != FILTER_NOT_IN) {
+        // If we have fixed range value, we generate in/not-in predicates.
+        if (is_fixed_value_range()) {
+            DCHECK(_fixed_op == FILTER_IN || _fixed_op == FILTER_NOT_IN);
+            bool filter_in = (_fixed_op == FILTER_IN) ? true : false;
+            const std::string op = (filter_in) ? "*=" : "!=";
+
            TCondition condition;
            condition.__set_is_index_filter_only(_is_index_filter_only);
            condition.__set_column_name(_column_name);
-            condition.__set_condition_op("*=");
-
+            condition.__set_condition_op(op);
            for (auto value : _fixed_values) {
                condition.condition_values.push_back(cast_to_string(value, type(), precision(), scale()));
            }

-            if (!condition.condition_values.empty()) {
-                filters.push_back(condition);
-            }
-        } else if (is_fixed_value_range()) {
-            TCondition condition;
-            condition.__set_is_index_filter_only(_is_index_filter_only);
-            condition.__set_column_name(_column_name);
-            condition.__set_condition_op("!=");
-
-            for (auto value : _fixed_values) {
-                condition.condition_values.push_back(cast_to_string(value, type(), precision(), scale()));
+            bool can_push = true;
+            if (condition.condition_values.empty()) {
+                // If we use IN clause, we wish to include empty set.
+                if (filter_in && _empty_range) {
+                    can_push = true;
+                } else {
+                    can_push = false;
+                }
            }

-            if (!condition.condition_values.empty()) {
+            if (can_push) {
                filters.push_back(condition);
            }
        } else {
-            DCHECK(!is_fixed_value_range());
            TCondition low;
            low.__set_is_index_filter_only(_is_index_filter_only);
            if (_type_min != _low_value || FILTER_LARGER_OR_EQUAL != _low_op) {
@ -402,6 +398,7 @@ inline Status ColumnValueRange<T>::add_fixed_values(SQLFilterOp op, const std::s
        } else if (is_fixed_value_range()) {
            DCHECK_EQ(FILTER_IN, _fixed_op);
            _fixed_values = STLSetIntersection(_fixed_values, values);
+            _empty_range = _fixed_values.empty();
            _fixed_op = op;
        } else if (!values.empty()) {
            _fixed_values = values;
@ -444,7 +441,8 @@ inline Status ColumnValueRange<T>::add_fixed_values(SQLFilterOp op, const std::s
                _empty_range = true;
                _fixed_op = FILTER_IN;
            }
-        } else if (is_low_value_mininum() && is_high_value_maximum()) {
+        } else if (is_low_value_mininum() && _low_op == FILTER_LARGER_OR_EQUAL && is_high_value_maximum() &&
+                   _high_op == FILTER_LESS_OR_EQUAL) {
            if (!values.empty()) {
                _fixed_values = values;
                _fixed_op = FILTER_NOT_IN;
@ -470,9 +468,12 @@ template <class T>
 inline bool ColumnValueRange<T>::is_empty_value_range() const {
    if (INVALID_TYPE == _column_type) {
        return true;
-    } else {
-        return _fixed_values.empty() && _high_value <= _low_value;
    }
+    // TODO(yan): sometimes we don't have Fixed Value Range, but have
+    // following value range like > 10 && < 5, which is also empty value range.
+    // Maybe we can add that check later. Without that check, there is no correctness problem
+    // but only performance performance.
+    return _fixed_values.empty() && _empty_range;
 }

 template <class T>
@ -578,9 +579,10 @@ inline Status ColumnValueRange<T>::add_range(SQLFilterOp op, T value) {
        return Status::InternalError("AddRange failed, Invalid type");
    }

+    // If we already have IN value range, we can put `value` into it.
    if (is_fixed_value_range()) {
        if (_fixed_op != FILTER_IN) {
-            return Status::InternalError("operator is not FILTER_IN");
+            return Status::InternalError(strings::Substitute("Add Range Fail! Unsupport SQLFilterOp $0", op));
        }
        std::pair<iterator_type, iterator_type> bound_pair = _fixed_values.equal_range(value);

@ -606,13 +608,14 @@ inline Status ColumnValueRange<T>::add_range(SQLFilterOp op, T value) {
            break;
        }
        default: {
-            return Status::InternalError("AddRangefail! Unsupport SQLFilterOp.");
+            return Status::InternalError(strings::Substitute("Add Range Fail! Unsupport SQLFilterOp $0", op));
        }
        }

-        _high_value = _type_min;
-        _low_value = _type_max;
+        _empty_range = _fixed_values.empty();
+
    } else {
+        // Otherwise we can put `value` into normal value range.
        if (_high_value > _low_value) {
            switch (op) {
            case FILTER_LARGER: {
@ -660,7 +663,7 @@ inline Status ColumnValueRange<T>::add_range(SQLFilterOp op, T value) {
                break;
            }
            default: {
-                return Status::InternalError("AddRangefail! Unsupport SQLFilterOp.");
+                return Status::InternalError(strings::Substitute("Add Range Fail! Unsupport SQLFilterOp $0", op));
            }
            }
        }
@ -668,8 +671,8 @@ inline Status ColumnValueRange<T>::add_range(SQLFilterOp op, T value) {
        if (FILTER_LARGER_OR_EQUAL == _low_op && FILTER_LESS_OR_EQUAL == _high_op && _high_value == _low_value) {
            _fixed_values.insert(_high_value);
            _fixed_op = FILTER_IN;
-            _high_value = _type_min;
-            _low_value = _type_max;
+        } else {
+            _empty_range = _low_value > _high_value;
        }
    }
    _is_init_state = false;
@ -702,7 +705,7 @@ inline Status OlapScanKeys::extend_scan_key(ColumnValueRange<T>& range, int32_t
    bool has_converted = false;
    if (range.is_fixed_value_range()) {
        const size_t mul = std::max<size_t>(1, _begin_scan_keys.size());
-        if (range.get_fixed_value_size() * mul > max_scan_key_num) {
+        if (range.get_fixed_value_size() > max_scan_key_num / mul) {
            if (range.is_range_value_convertible()) {
                range.convert_to_range_value();
            } else {
@ -711,7 +714,7 @@ inline Status OlapScanKeys::extend_scan_key(ColumnValueRange<T>& range, int32_t
        }
    } else if (range.is_fixed_value_convertible() && _is_convertible) {
        const size_t mul = std::max<size_t>(1, _begin_scan_keys.size());
-        if (range.get_convertible_fixed_value_size() * mul <= max_scan_key_num) {
+        if (range.get_convertible_fixed_value_size() <= max_scan_key_num / mul) {
            if (range.is_low_value_mininum() && range.is_high_value_maximum()) {
                has_converted = true;
            }
--- a/be/src/exec/parquet/column_chunk_reader.cpp
+++ b/be/src/exec/parquet/column_chunk_reader.cpp
@ -12,6 +12,7 @@
 #include "exec/parquet/types.h"
 #include "exec/parquet/utils.h"
 #include "gutil/strings/substitute.h"
+#include "runtime/current_thread.h"
 #include "util/runtime_profile.h"

 namespace starrocks::parquet {
@ -141,6 +142,7 @@ void ColumnChunkReader::_reserve_uncompress_buf(size_t size) {

 Status ColumnChunkReader::_read_and_decompress_page_data(uint32_t compressed_size, uint32_t uncompressed_size,
                                                         bool is_compressed) {
+    RETURN_IF_ERROR(CurrentThread::mem_tracker()->check_mem_limit("read and decompress page"));
    if (is_compressed && _compress_codec != nullptr) {
        Slice com_slice("", compressed_size);
        RETURN_IF_ERROR(_page_reader->read_bytes((const uint8_t**)&com_slice.data, com_slice.size));
--- a/be/src/exec/parquet/stored_column_reader.cpp
+++ b/be/src/exec/parquet/stored_column_reader.cpp
@ -143,6 +143,7 @@ public:
        _reader = std::make_unique<ColumnChunkReader>(_field->max_def_level(), _field->max_rep_level(),
                                                      _field->type_length, chunk_metadata, file, opts);
        RETURN_IF_ERROR(_reader->init());
+        _num_values_left_in_cur_page = _reader->num_values();
        return Status::OK();
    }

--- a/be/src/exec/parquet_reader.cpp
+++ b/be/src/exec/parquet_reader.cpp
@ -158,14 +158,16 @@ Status ParquetReaderWrap::size(int64_t* size) {
    return Status::OK();
 }

-inline void ParquetReaderWrap::fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool,
-                                         const uint8_t* value, int32_t len) {
+Status ParquetReaderWrap::fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, const uint8_t* value,
+                                    int32_t len) {
    tuple->set_not_null(slot_desc->null_indicator_offset());
    void* slot = tuple->get_slot(slot_desc->tuple_offset());
    StringValue* str_slot = reinterpret_cast<StringValue*>(slot);
    str_slot->ptr = reinterpret_cast<char*>(mem_pool->allocate(len));
+    RETURN_IF_UNLIKELY_NULL(str_slot->ptr, Status::MemoryAllocFailed("alloc mem for parquet reader failed"));
    memcpy(str_slot->ptr, value, len);
    str_slot->len = len;
+    return Status::OK();
 }

 Status ParquetReaderWrap::column_indices(const std::vector<SlotDescriptor*>& tuple_slot_descs) {
@ -303,7 +305,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                    RETURN_IF_ERROR(set_field_null(tuple, slot_desc));
                } else {
                    value = str_array->GetValue(_current_line_of_batch, &wbytes);
-                    fill_slot(tuple, slot_desc, mem_pool, value, wbytes);
+                    RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, value, wbytes));
                }
                break;
            }
@ -314,7 +316,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                } else {
                    int32_t value = int32_array->Value(_current_line_of_batch);
                    wbytes = sprintf((char*)tmp_buf, "%d", value);
-                    fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes);
+                    RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes));
                }
                break;
            }
@ -325,7 +327,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                } else {
                    int64_t value = int64_array->Value(_current_line_of_batch);
                    wbytes = sprintf((char*)tmp_buf, "%ld", value);
-                    fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes);
+                    RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes));
                }
                break;
            }
@ -336,7 +338,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                } else {
                    uint32_t value = uint32_array->Value(_current_line_of_batch);
                    wbytes = sprintf((char*)tmp_buf, "%u", value);
-                    fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes);
+                    RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes));
                }
                break;
            }
@ -347,7 +349,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                } else {
                    uint64_t value = uint64_array->Value(_current_line_of_batch);
                    wbytes = sprintf((char*)tmp_buf, "%lu", value);
-                    fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes);
+                    RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes));
                }
                break;
            }
@ -357,7 +359,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                    RETURN_IF_ERROR(set_field_null(tuple, slot_desc));
                } else {
                    value = str_array->GetValue(_current_line_of_batch, &wbytes);
-                    fill_slot(tuple, slot_desc, mem_pool, value, wbytes);
+                    RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, value, wbytes));
                }
                break;
            }
@ -367,7 +369,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                    RETURN_IF_ERROR(set_field_null(tuple, slot_desc));
                } else {
                    std::string value = fixed_array->GetString(_current_line_of_batch);
-                    fill_slot(tuple, slot_desc, mem_pool, (uint8_t*)value.c_str(), value.length());
+                    RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, (uint8_t*)value.c_str(), value.length()));
                }
                break;
            }
@ -378,9 +380,9 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                } else {
                    bool value = boolean_array->Value(_current_line_of_batch);
                    if (value) {
-                        fill_slot(tuple, slot_desc, mem_pool, (uint8_t*)"true", 4);
+                        RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, (uint8_t*)"true", 4));
                    } else {
-                        fill_slot(tuple, slot_desc, mem_pool, (uint8_t*)"false", 5);
+                        RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, (uint8_t*)"false", 5));
                    }
                }
                break;
@ -392,7 +394,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                } else {
                    uint8_t value = uint8_array->Value(_current_line_of_batch);
                    wbytes = sprintf((char*)tmp_buf, "%d", value);
-                    fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes);
+                    RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes));
                }
                break;
            }
@ -403,7 +405,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                } else {
                    int8_t value = int8_array->Value(_current_line_of_batch);
                    wbytes = sprintf((char*)tmp_buf, "%d", value);
-                    fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes);
+                    RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes));
                }
                break;
            }
@ -414,7 +416,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                } else {
                    uint16_t value = uint16_array->Value(_current_line_of_batch);
                    wbytes = sprintf((char*)tmp_buf, "%d", value);
-                    fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes);
+                    RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes));
                }
                break;
            }
@ -425,7 +427,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                } else {
                    int16_t value = int16_array->Value(_current_line_of_batch);
                    wbytes = sprintf((char*)tmp_buf, "%d", value);
-                    fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes);
+                    RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes));
                }
                break;
            }
@ -436,7 +438,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                } else {
                    float value = half_float_array->Value(_current_line_of_batch);
                    wbytes = sprintf((char*)tmp_buf, "%f", value);
-                    fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes);
+                    RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes));
                }
                break;
            }
@ -447,7 +449,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                } else {
                    float value = float_array->Value(_current_line_of_batch);
                    wbytes = sprintf((char*)tmp_buf, "%f", value);
-                    fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes);
+                    RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes));
                }
                break;
            }
@ -458,7 +460,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                } else {
                    float value = double_array->Value(_current_line_of_batch);
                    wbytes = sprintf((char*)tmp_buf, "%f", value);
-                    fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes);
+                    RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes));
                }
                break;
            }
@ -469,7 +471,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                } else {
                    RETURN_IF_ERROR(handle_timestamp(ts_array, tmp_buf,
                                                     &wbytes)); // convert timestamp to string time
-                    fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes);
+                    RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes));
                }
                break;
            }
@ -479,7 +481,8 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                    RETURN_IF_ERROR(set_field_null(tuple, slot_desc));
                } else {
                    std::string value = decimal_array->FormatValue(_current_line_of_batch);
-                    fill_slot(tuple, slot_desc, mem_pool, (const uint8_t*)value.c_str(), value.length());
+                    RETURN_IF_ERROR(
+                            fill_slot(tuple, slot_desc, mem_pool, (const uint8_t*)value.c_str(), value.length()));
                }
                break;
            }
@ -493,7 +496,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                    localtime_r(&timestamp, &local);
                    char* to = reinterpret_cast<char*>(&tmp_buf);
                    wbytes = (uint32_t)strftime(to, 64, "%Y-%m-%d", &local);
-                    fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes);
+                    RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes));
                }
                break;
            }
@ -508,7 +511,7 @@ Status ParquetReaderWrap::read(Tuple* tuple, const std::vector<SlotDescriptor*>&
                    localtime_r(&timestamp, &local);
                    char* to = reinterpret_cast<char*>(&tmp_buf);
                    wbytes = (uint32_t)strftime(to, 64, "%Y-%m-%d %H:%M:%S", &local);
-                    fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes);
+                    RETURN_IF_ERROR(fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes));
                }
                break;
            }
--- a/be/src/exec/parquet_reader.h
+++ b/be/src/exec/parquet_reader.h
@ -87,7 +87,7 @@ public:
    const std::vector<std::shared_ptr<arrow::DataType>>& get_column_types();

 private:
-    void fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, const uint8_t* value, int32_t len);
+    Status fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, const uint8_t* value, int32_t len);
    Status column_indices(const std::vector<SlotDescriptor*>& tuple_slot_descs);
    Status set_field_null(Tuple* tuple, const SlotDescriptor* slot_desc);
    Status handle_timestamp(const std::shared_ptr<arrow::TimestampArray>& ts_array, uint8_t* buf, int32_t* wbtyes);
--- a/be/src/exec/pipeline/aggregate/aggregate_blocking_sink_operator.cpp
+++ b/be/src/exec/pipeline/aggregate/aggregate_blocking_sink_operator.cpp
@ -2,6 +2,8 @@

 #include "aggregate_blocking_sink_operator.h"

+#include "runtime/current_thread.h"
+
 namespace starrocks::pipeline {

 Status AggregateBlockingSinkOperator::prepare(RuntimeState* state) {
@ -61,16 +63,17 @@ Status AggregateBlockingSinkOperator::push_chunk(RuntimeState* state, const vect
    if (!_aggregator->is_none_group_by_exprs()) {
        if (false) {
        }
-#define HASH_MAP_METHOD(NAME)                                                                          \
-    else if (_aggregator->hash_map_variant().type == vectorized::HashMapVariant::Type::NAME)           \
-            _aggregator->build_hash_map<decltype(_aggregator->hash_map_variant().NAME)::element_type>( \
-                    *_aggregator->hash_map_variant().NAME, chunk->num_rows());
+#define HASH_MAP_METHOD(NAME)                                                                                          \
+    else if (_aggregator->hash_map_variant().type == vectorized::HashMapVariant::Type::NAME) {                         \
+        TRY_CATCH_BAD_ALLOC(_aggregator->build_hash_map<decltype(_aggregator->hash_map_variant().NAME)::element_type>( \
+                *_aggregator->hash_map_variant().NAME, chunk->num_rows()));                                            \
+    }
        APPLY_FOR_VARIANT_ALL(HASH_MAP_METHOD)
 #undef HASH_MAP_METHOD

        _mem_tracker->set(_aggregator->hash_map_variant().memory_usage() +
                          _aggregator->mem_pool()->total_reserved_bytes());
-        _aggregator->try_convert_to_two_level_map();
+        TRY_CATCH_BAD_ALLOC(_aggregator->try_convert_to_two_level_map());
    }
    if (_aggregator->is_none_group_by_exprs()) {
        _aggregator->compute_single_agg_state(chunk->num_rows());
--- a/be/src/exec/pipeline/aggregate/aggregate_distinct_blocking_sink_operator.cpp
+++ b/be/src/exec/pipeline/aggregate/aggregate_distinct_blocking_sink_operator.cpp
@ -2,6 +2,8 @@

 #include "aggregate_distinct_blocking_sink_operator.h"

+#include "runtime/current_thread.h"
+
 namespace starrocks::pipeline {

 Status AggregateDistinctBlockingSinkOperator::prepare(RuntimeState* state) {
@ -56,13 +58,18 @@ Status AggregateDistinctBlockingSinkOperator::push_chunk(RuntimeState* state, co

        if (false) {
        }
-#define HASH_SET_METHOD(NAME)                                                                          \
-    else if (_aggregator->hash_set_variant().type == vectorized::HashSetVariant::Type::NAME)           \
-            _aggregator->build_hash_set<decltype(_aggregator->hash_set_variant().NAME)::element_type>( \
-                    *_aggregator->hash_set_variant().NAME, chunk->num_rows());
+#define HASH_SET_METHOD(NAME)                                                                                          \
+    else if (_aggregator->hash_set_variant().type == vectorized::HashSetVariant::Type::NAME) {                         \
+        TRY_CATCH_BAD_ALLOC(_aggregator->build_hash_set<decltype(_aggregator->hash_set_variant().NAME)::element_type>( \
+                *_aggregator->hash_set_variant().NAME, chunk->num_rows()));                                            \
+    }
        APPLY_FOR_VARIANT_ALL(HASH_SET_METHOD)
 #undef HASH_SET_METHOD

+        _mem_tracker->set(_aggregator->hash_set_variant().memory_usage() +
+                          _aggregator->mem_pool()->total_reserved_bytes());
+        TRY_CATCH_BAD_ALLOC(_aggregator->try_convert_to_two_level_set());
+
        _aggregator->update_num_input_rows(chunk->num_rows());
        if (limit_with_no_agg) {
            auto size = _aggregator->hash_set_variant().size();
--- a/be/src/exec/pipeline/aggregate/aggregate_distinct_streaming_sink_operator.cpp
+++ b/be/src/exec/pipeline/aggregate/aggregate_distinct_streaming_sink_operator.cpp
@ -2,6 +2,7 @@

 #include "aggregate_distinct_streaming_sink_operator.h"

+#include "runtime/current_thread.h"
 #include "simd/simd.h"
 namespace starrocks::pipeline {

@ -55,10 +56,11 @@ Status AggregateDistinctStreamingSinkOperator::_push_chunk_by_force_preaggregati
    SCOPED_TIMER(_aggregator->agg_compute_timer());
    if (false) {
    }
-#define HASH_MAP_METHOD(NAME)                                                                          \
-    else if (_aggregator->hash_set_variant().type == vectorized::HashSetVariant::Type::NAME)           \
-            _aggregator->build_hash_set<decltype(_aggregator->hash_set_variant().NAME)::element_type>( \
-                    *_aggregator->hash_set_variant().NAME, chunk_size);
+#define HASH_MAP_METHOD(NAME)                                                                                          \
+    else if (_aggregator->hash_set_variant().type == vectorized::HashSetVariant::Type::NAME) {                         \
+        TRY_CATCH_BAD_ALLOC(_aggregator->build_hash_set<decltype(_aggregator->hash_set_variant().NAME)::element_type>( \
+                *_aggregator->hash_set_variant().NAME, chunk_size));                                                   \
+    }
    APPLY_FOR_VARIANT_ALL(HASH_MAP_METHOD)
 #undef HASH_MAP_METHOD
    else {
@ -72,6 +74,10 @@ Status AggregateDistinctStreamingSinkOperator::_push_chunk_by_force_preaggregati
    }

    COUNTER_SET(_aggregator->hash_table_size(), (int64_t)_aggregator->hash_set_variant().size());
+
+    _mem_tracker->set(_aggregator->hash_set_variant().memory_usage() + _aggregator->mem_pool()->total_reserved_bytes());
+    TRY_CATCH_BAD_ALLOC(_aggregator->try_convert_to_two_level_set());
+
    return Status::OK();
 }

@ -88,10 +94,11 @@ Status AggregateDistinctStreamingSinkOperator::_push_chunk_by_auto(const size_t
        SCOPED_TIMER(_aggregator->agg_compute_timer());
        if (false) {
        }
-#define HASH_MAP_METHOD(NAME)                                                                          \
-    else if (_aggregator->hash_set_variant().type == vectorized::HashSetVariant::Type::NAME)           \
-            _aggregator->build_hash_set<decltype(_aggregator->hash_set_variant().NAME)::element_type>( \
-                    *_aggregator->hash_set_variant().NAME, chunk_size);
+#define HASH_MAP_METHOD(NAME)                                                                                          \
+    else if (_aggregator->hash_set_variant().type == vectorized::HashSetVariant::Type::NAME) {                         \
+        TRY_CATCH_BAD_ALLOC(_aggregator->build_hash_set<decltype(_aggregator->hash_set_variant().NAME)::element_type>( \
+                *_aggregator->hash_set_variant().NAME, chunk_size));                                                   \
+    }
        APPLY_FOR_VARIANT_ALL(HASH_MAP_METHOD)
 #undef HASH_MAP_METHOD
        else {
@ -105,15 +112,21 @@ Status AggregateDistinctStreamingSinkOperator::_push_chunk_by_auto(const size_t
        }

        COUNTER_SET(_aggregator->hash_table_size(), (int64_t)_aggregator->hash_set_variant().size());
+
+        _mem_tracker->set(_aggregator->hash_set_variant().memory_usage() +
+                          _aggregator->mem_pool()->total_reserved_bytes());
+        TRY_CATCH_BAD_ALLOC(_aggregator->try_convert_to_two_level_set());
    } else {
        {
            SCOPED_TIMER(_aggregator->agg_compute_timer());
            if (false) {
            }
-#define HASH_MAP_METHOD(NAME)                                                                                       \
-    else if (_aggregator->hash_set_variant().type == vectorized::HashSetVariant::Type::NAME) _aggregator            \
-            ->build_hash_set_with_selection<typename decltype(_aggregator->hash_set_variant().NAME)::element_type>( \
-                    *_aggregator->hash_set_variant().NAME, chunk_size);
+#define HASH_MAP_METHOD(NAME)                                                                  \
+    else if (_aggregator->hash_set_variant().type == vectorized::HashSetVariant::Type::NAME) { \
+        TRY_CATCH_BAD_ALLOC(_aggregator->build_hash_set_with_selection<typename decltype(      \
+                                    _aggregator->hash_set_variant().NAME)::element_type>(      \
+                *_aggregator->hash_set_variant().NAME, chunk_size));                           \
+    }
            APPLY_FOR_VARIANT_ALL(HASH_MAP_METHOD)
 #undef HASH_MAP_METHOD
            else {
--- a/be/src/exec/pipeline/aggregate/aggregate_streaming_sink_operator.cpp
+++ b/be/src/exec/pipeline/aggregate/aggregate_streaming_sink_operator.cpp
@ -2,6 +2,7 @@

 #include "aggregate_streaming_sink_operator.h"

+#include "runtime/current_thread.h"
 #include "simd/simd.h"
 namespace starrocks::pipeline {

@ -55,10 +56,11 @@ Status AggregateStreamingSinkOperator::_push_chunk_by_force_preaggregation(const
    SCOPED_TIMER(_aggregator->agg_compute_timer());
    if (false) {
    }
-#define HASH_MAP_METHOD(NAME)                                                                          \
-    else if (_aggregator->hash_map_variant().type == vectorized::HashMapVariant::Type::NAME)           \
-            _aggregator->build_hash_map<decltype(_aggregator->hash_map_variant().NAME)::element_type>( \
-                    *_aggregator->hash_map_variant().NAME, chunk_size);
+#define HASH_MAP_METHOD(NAME)                                                                                          \
+    else if (_aggregator->hash_map_variant().type == vectorized::HashMapVariant::Type::NAME) {                         \
+        TRY_CATCH_BAD_ALLOC(_aggregator->build_hash_map<decltype(_aggregator->hash_map_variant().NAME)::element_type>( \
+                *_aggregator->hash_map_variant().NAME, chunk_size));                                                   \
+    }
    APPLY_FOR_VARIANT_ALL(HASH_MAP_METHOD)
 #undef HASH_MAP_METHOD
    else {
@ -72,7 +74,7 @@ Status AggregateStreamingSinkOperator::_push_chunk_by_force_preaggregation(const
    }

    _mem_tracker->set(_aggregator->hash_map_variant().memory_usage() + _aggregator->mem_pool()->total_reserved_bytes());
-    _aggregator->try_convert_to_two_level_map();
+    TRY_CATCH_BAD_ALLOC(_aggregator->try_convert_to_two_level_map());

    COUNTER_SET(_aggregator->hash_table_size(), (int64_t)_aggregator->hash_map_variant().size());
    return Status::OK();
@ -91,10 +93,11 @@ Status AggregateStreamingSinkOperator::_push_chunk_by_auto(const size_t chunk_si
        SCOPED_TIMER(_aggregator->agg_compute_timer());
        if (false) {
        }
-#define HASH_MAP_METHOD(NAME)                                                                          \
-    else if (_aggregator->hash_map_variant().type == vectorized::HashMapVariant::Type::NAME)           \
-            _aggregator->build_hash_map<decltype(_aggregator->hash_map_variant().NAME)::element_type>( \
-                    *_aggregator->hash_map_variant().NAME, chunk_size);
+#define HASH_MAP_METHOD(NAME)                                                                                          \
+    else if (_aggregator->hash_map_variant().type == vectorized::HashMapVariant::Type::NAME) {                         \
+        TRY_CATCH_BAD_ALLOC(_aggregator->build_hash_map<decltype(_aggregator->hash_map_variant().NAME)::element_type>( \
+                *_aggregator->hash_map_variant().NAME, chunk_size));                                                   \
+    }
        APPLY_FOR_VARIANT_ALL(HASH_MAP_METHOD)
 #undef HASH_MAP_METHOD
        else {
@ -109,7 +112,7 @@ Status AggregateStreamingSinkOperator::_push_chunk_by_auto(const size_t chunk_si

        _mem_tracker->set(_aggregator->hash_map_variant().memory_usage() +
                          _aggregator->mem_pool()->total_reserved_bytes());
-        _aggregator->try_convert_to_two_level_map();
+        TRY_CATCH_BAD_ALLOC(_aggregator->try_convert_to_two_level_map());

        COUNTER_SET(_aggregator->hash_table_size(), (int64_t)_aggregator->hash_map_variant().size());
    } else {
@ -117,10 +120,12 @@ Status AggregateStreamingSinkOperator::_push_chunk_by_auto(const size_t chunk_si
            SCOPED_TIMER(_aggregator->agg_compute_timer());
            if (false) {
            }
-#define HASH_MAP_METHOD(NAME)                                                                                       \
-    else if (_aggregator->hash_map_variant().type == vectorized::HashMapVariant::Type::NAME) _aggregator            \
-            ->build_hash_map_with_selection<typename decltype(_aggregator->hash_map_variant().NAME)::element_type>( \
-                    *_aggregator->hash_map_variant().NAME, chunk_size);
+#define HASH_MAP_METHOD(NAME)                                                                  \
+    else if (_aggregator->hash_map_variant().type == vectorized::HashMapVariant::Type::NAME) { \
+        TRY_CATCH_BAD_ALLOC(_aggregator->build_hash_map_with_selection<typename decltype(      \
+                                    _aggregator->hash_map_variant().NAME)::element_type>(      \
+                *_aggregator->hash_map_variant().NAME, chunk_size));                           \
+    }
            APPLY_FOR_VARIANT_ALL(HASH_MAP_METHOD)
 #undef HASH_MAP_METHOD
            else {
--- a/be/src/exec/pipeline/dict_decode_operator.cpp
+++ b/be/src/exec/pipeline/dict_decode_operator.cpp
@ -54,8 +54,8 @@ Status DictDecodeOperatorFactory::prepare(RuntimeState* state) {
    RETURN_IF_ERROR(Expr::prepare(_expr_ctxs, state, row_desc));
    RETURN_IF_ERROR(Expr::open(_expr_ctxs, state));

-    const auto& global_dict = state->get_global_dict_map();
-    _dict_optimize_parser.set_mutable_dict_maps(state->mutable_global_dict_map());
+    const auto& global_dict = state->get_query_global_dict_map();
+    _dict_optimize_parser.set_mutable_dict_maps(state->mutable_query_global_dict_map());

    DCHECK_EQ(_encode_column_cids.size(), _decode_column_cids.size());
    int need_decode_size = _decode_column_cids.size();
@ -71,11 +71,17 @@ Status DictDecodeOperatorFactory::prepare(RuntimeState* state) {
            auto& [expr_ctx, dict_ctx] = _string_functions[need_encode_cid];
            DCHECK(expr_ctx->root()->fn().could_apply_dict_optimize);
            _dict_optimize_parser.check_could_apply_dict_optimize(expr_ctx, &dict_ctx);
-            DCHECK(dict_ctx.could_apply_dict_optimize);
+            if (!dict_ctx.could_apply_dict_optimize) {
+                return Status::InternalError(
+                        fmt::format("Not found dict for function-called cid:{} it may cause by unsupport function",
+                                    need_encode_cid));
+            }
            _dict_optimize_parser.eval_expr(state, expr_ctx, &dict_ctx, need_encode_cid);
            dict_iter = global_dict.find(need_encode_cid);
            DCHECK(dict_iter != global_dict.end());
-            return Status::InternalError(fmt::format("Not found dict for function-called cid:{}", need_encode_cid));
+            if (dict_iter == global_dict.end()) {
+                return Status::InternalError(fmt::format("Eval Expr Error for cid:{}", need_encode_cid));
+            }
        }

        vectorized::DefaultDecoderPtr decoder = std::make_unique<vectorized::DefaultDecoder>();
--- a/be/src/exec/pipeline/exchange/exchange_sink_operator.cpp
+++ b/be/src/exec/pipeline/exchange/exchange_sink_operator.cpp
@ -465,7 +465,8 @@ Status ExchangeSinkOperator::serialize_chunk(const vectorized::Chunk* src, Chunk
            uncompressed_size = src->serialize_size();
            // TODO(kks): resize without initializing the new bytes
            dst->mutable_data()->resize(uncompressed_size);
-            src->serialize((uint8_t*)dst->mutable_data()->data());
+            size_t written_size = src->serialize((uint8_t*)dst->mutable_data()->data());
+            dst->set_serialized_size(written_size);
        }
    }

--- a/be/src/exec/pipeline/exchange/sink_buffer.h
+++ b/be/src/exec/pipeline/exchange/sink_buffer.h
@ -94,8 +94,7 @@ public:

    void process() {
        try {
-            MemTracker* prev_tracker = tls_thread_status.set_mem_tracker(_mem_tracker);
-            DeferOp op([&] { tls_thread_status.set_mem_tracker(prev_tracker); });
+            SCOPED_THREAD_LOCAL_MEM_TRACKER_SETTER(_mem_tracker);

            while (!_is_finished) {
                {
--- a/be/src/exec/pipeline/fragment_executor.cpp
+++ b/be/src/exec/pipeline/fragment_executor.cpp
@ -111,8 +111,8 @@ Status FragmentExecutor::prepare(ExecEnv* exec_env, const TExecPlanFragmentParam
    _fragment_ctx->set_plan(plan);

    // Set up global dict
-    if (request.fragment.__isset.global_dicts) {
-        RETURN_IF_ERROR(runtime_state->init_global_dict(request.fragment.global_dicts));
+    if (request.fragment.__isset.query_global_dicts) {
+        RETURN_IF_ERROR(runtime_state->init_query_global_dict(request.fragment.query_global_dicts));
    }

    // Set senders of exchange nodes before pipeline build
--- a/be/src/exec/pipeline/olap_chunk_source.cpp
+++ b/be/src/exec/pipeline/olap_chunk_source.cpp
@ -26,7 +26,7 @@ Status OlapChunkSource::prepare(RuntimeState* state) {

    _init_counter(state);

-    _dict_optimize_parser.set_mutable_dict_maps(state->mutable_global_dict_map());
+    _dict_optimize_parser.set_mutable_dict_maps(state->mutable_query_global_dict_map());

    OlapScanConjunctsManager::eval_const_conjuncts(_conjunct_ctxs, &_status);
    OlapScanConjunctsManager& cm = _conjuncts_manager;
@ -289,7 +289,7 @@ Status OlapChunkSource::buffer_next_batch_chunks_blocking(size_t batch_size, boo

 // mapping a slot-column-id to schema-columnid
 Status OlapChunkSource::_init_global_dicts(vectorized::TabletReaderParams* params) {
-    const auto& global_dict_map = _runtime_state->get_global_dict_map();
+    const auto& global_dict_map = _runtime_state->get_query_global_dict_map();
    auto global_dict = _obj_pool.add(new ColumnIdToGlobalDictMap());
    // mapping column id to storage column ids
    const TupleDescriptor* tuple_desc = _runtime_state->desc_tbl().get_tuple_descriptor(_tuple_id);
--- a/be/src/exec/pipeline/pipeline_driver_dispatcher.cpp
+++ b/be/src/exec/pipeline/pipeline_driver_dispatcher.cpp
@ -66,8 +66,7 @@ void GlobalDriverDispatcher::run() {
        auto runtime_state_ptr = fragment_ctx->runtime_state_ptr();
        auto* runtime_state = runtime_state_ptr.get();
        {
-            MemTracker* prev_tracker = tls_thread_status.set_mem_tracker(runtime_state->instance_mem_tracker());
-            DeferOp op([&] { tls_thread_status.set_mem_tracker(prev_tracker); });
+            SCOPED_THREAD_LOCAL_MEM_TRACKER_SETTER(runtime_state->instance_mem_tracker());

            if (fragment_ctx->is_canceled()) {
                VLOG_ROW << "[Driver] Canceled: driver=" << driver
--- a/be/src/exec/pipeline/project_operator.cpp
+++ b/be/src/exec/pipeline/project_operator.cpp
@ -73,7 +73,7 @@ Status ProjectOperatorFactory::prepare(RuntimeState* state) {
    RETURN_IF_ERROR(Expr::open(_expr_ctxs, state));
    RETURN_IF_ERROR(Expr::open(_common_sub_expr_ctxs, state));

-    _dict_optimize_parser.set_mutable_dict_maps(state->mutable_global_dict_map());
+    _dict_optimize_parser.set_mutable_dict_maps(state->mutable_query_global_dict_map());

    auto init_dict_optimize = [&](std::vector<ExprContext*>& expr_ctxs, std::vector<SlotId>& target_slots) {
        _dict_optimize_parser.rewrite_exprs(&expr_ctxs, state, target_slots);
--- a/be/src/exec/pipeline/scan_operator.cpp
+++ b/be/src/exec/pipeline/scan_operator.cpp
@ -114,12 +114,11 @@ void ScanOperator::_trigger_next_scan(RuntimeState* state) {
    PriorityThreadPool::Task task;
    _is_io_task_active.store(true, std::memory_order_release);
    task.work_function = [this, state]() {
-        MemTracker* prev_tracker = tls_thread_status.set_mem_tracker(state->instance_mem_tracker());
-        DeferOp op([&] {
-            tls_thread_status.set_mem_tracker(prev_tracker);
-            _is_io_task_active.store(false, std::memory_order_release);
-        });
-        _chunk_source->buffer_next_batch_chunks_blocking(_batch_size, _is_finished);
+        {
+            SCOPED_THREAD_LOCAL_MEM_TRACKER_SETTER(state->instance_mem_tracker());
+            _chunk_source->buffer_next_batch_chunks_blocking(_batch_size, _is_finished);
+        }
+        _is_io_task_active.store(false, std::memory_order_release);
    };
    // TODO(by satanson): set a proper priority
    task.priority = 20;
@ -156,8 +155,8 @@ Status ScanOperatorFactory::prepare(RuntimeState* state) {
    RETURN_IF_ERROR(Expr::open(_conjunct_ctxs, state));

    auto tuple_desc = state->desc_tbl().get_tuple_descriptor(_olap_scan_node.tuple_id);
-    vectorized::DictOptimizeParser::rewrite_descriptor(state, tuple_desc->slots(), _conjunct_ctxs,
-                                                       _olap_scan_node.dict_string_id_to_int_ids);
+    vectorized::DictOptimizeParser::rewrite_descriptor(state, _conjunct_ctxs, _olap_scan_node.dict_string_id_to_int_ids,
+                                                       &(tuple_desc->decoded_slots()));
    return Status::OK();
 }

--- a/be/src/exec/pipeline/set/except_context.cpp
+++ b/be/src/exec/pipeline/set/except_context.cpp
@ -2,6 +2,8 @@

 #include "exec/pipeline/set/except_context.h"

+#include "runtime/current_thread.h"
+
 namespace starrocks::pipeline {

 Status ExceptContext::prepare(RuntimeState* state, const std::vector<ExprContext*>& build_exprs) {
@ -26,7 +28,8 @@ Status ExceptContext::close(RuntimeState* state) {

 Status ExceptContext::append_chunk_to_ht(RuntimeState* state, const ChunkPtr& chunk,
                                         const std::vector<ExprContext*>& dst_exprs) {
-    return _hash_set->build_set(state, chunk, dst_exprs, _build_pool.get());
+    TRY_CATCH_BAD_ALLOC(_hash_set->build_set(state, chunk, dst_exprs, _build_pool.get()));
+    return Status::OK();
 }

 Status ExceptContext::erase_chunk_from_ht(RuntimeState* state, const ChunkPtr& chunk,
--- a/be/src/exec/pipeline/set/intersect_context.cpp
+++ b/be/src/exec/pipeline/set/intersect_context.cpp
@ -2,9 +2,12 @@

 #include "exec/pipeline/set/intersect_context.h"

+#include "runtime/current_thread.h"
+
 namespace starrocks::pipeline {

 Status IntersectContext::prepare(RuntimeState* state, const std::vector<ExprContext*>& build_exprs) {
+    RETURN_IF_ERROR(_hash_set->init());
    _build_pool = std::make_unique<MemPool>();

    _dst_tuple_desc = state->desc_tbl().get_tuple_descriptor(_dst_tuple_id);
@ -26,7 +29,8 @@ Status IntersectContext::close(RuntimeState* state) {

 Status IntersectContext::append_chunk_to_ht(RuntimeState* state, const ChunkPtr& chunk,
                                            const std::vector<ExprContext*>& dst_exprs) {
-    return _hash_set->build_set(state, chunk, dst_exprs, _build_pool.get());
+    TRY_CATCH_BAD_ALLOC(_hash_set->build_set(state, chunk, dst_exprs, _build_pool.get()));
+    return Status::OK();
 }

 Status IntersectContext::refine_chunk_from_ht(RuntimeState* state, const ChunkPtr& chunk,
--- a/be/src/exec/tablet_info.cpp
+++ b/be/src/exec/tablet_info.cpp
@ -269,10 +269,8 @@ bool OlapTablePartitionParam::find_tablet(Tuple* tuple, const OlapTablePartition
 }

 Status OlapTablePartitionParam::_create_partition_keys(const std::vector<TExprNode>& t_exprs, Tuple** part_key) {
-    Tuple* tuple = (Tuple*)_mem_pool->allocate(_schema->tuple_desc()->byte_size());
-    if (UNLIKELY(tuple == nullptr)) {
-        return Status::InternalError("Mem usage has exceed the limit of BE");
-    }
+    auto* tuple = (Tuple*)_mem_pool->allocate(_schema->tuple_desc()->byte_size());
+    RETURN_IF_UNLIKELY_NULL(tuple, Status::MemoryAllocFailed("alloc mem for partition keys failed"));
    for (int i = 0; i < t_exprs.size(); i++) {
        const TExprNode& t_expr = t_exprs[i];
        RETURN_IF_ERROR(_create_partition_key(t_expr, tuple, _partition_slot_descs[i]));
--- a/be/src/exec/tablet_sink.cpp
+++ b/be/src/exec/tablet_sink.cpp
@ -140,7 +140,7 @@ void NodeChannel::open() {
    request.set_is_vectorized(_parent->_is_vectorized);

    // set global dict
-    const auto& global_dict = _runtime_state->get_global_dict_map();
+    const auto& global_dict = _runtime_state->get_load_global_dict_map();
    for (size_t i = 0; i < request.schema().slot_descs_size(); i++) {
        auto slot = request.mutable_schema()->mutable_slot_descs(i);
        auto it = global_dict.find(slot->id());
@ -1088,8 +1088,7 @@ void OlapTableSink::_padding_char_column(vectorized::Chunk* chunk) {
 }

 void OlapTableSink::_send_chunk_process() {
-    MemTracker* prev_tracker = tls_thread_status.set_mem_tracker(_runtime_state->instance_mem_tracker());
-    DeferOp op([&] { tls_thread_status.set_mem_tracker(prev_tracker); });
+    SCOPED_THREAD_LOCAL_MEM_TRACKER_SETTER(_runtime_state->instance_mem_tracker());

    SCOPED_RAW_TIMER(&_non_blocking_send_ns);
    while (true) {
--- a/be/src/exec/vectorized/aggregate/agg_hash_map.h
+++ b/be/src/exec/vectorized/aggregate/agg_hash_map.h
@ -9,6 +9,7 @@
 #include "column/column_helper.h"
 #include "column/hash_set.h"
 #include "column/type_traits.h"
+#include "exec/vectorized/aggregate/agg_hash_set.h"
 #include "gutil/casts.h"
 #include "gutil/strings/fastmem.h"
 #include "runtime/mem_pool.h"
@ -30,7 +31,7 @@ using Int32AggHashMap = phmap::flat_hash_map<int32_t, AggDataPtr, StdHashWithSee
 template <PhmapSeed seed>
 using Int64AggHashMap = phmap::flat_hash_map<int64_t, AggDataPtr, StdHashWithSeed<int64_t, seed>>;
 template <PhmapSeed seed>
-using Int128AggHashMap = phmap::flat_hash_map<int128_t, AggDataPtr, StdHashWithSeed<int128_t, seed>>;
+using Int128AggHashMap = phmap::flat_hash_map<int128_t, AggDataPtr, Hash128WithSeed<seed>>;
 template <PhmapSeed seed>
 using DateAggHashMap = phmap::flat_hash_map<DateValue, AggDataPtr, StdHashWithSeed<DateValue, seed>>;
 template <PhmapSeed seed>
@ -105,8 +106,11 @@ struct AggHashMapWithOneNumberKey {
            AGG_HASH_MAP_PREFETCH_HASH_VALUE();

            FieldType key = column->get_data()[i];
-            auto iter = hash_map.lazy_emplace_with_hash(key, hash_values[i],
-                                                        [&](const auto& ctor) { ctor(key, allocate_func()); });
+            auto iter = hash_map.lazy_emplace_with_hash(key, hash_values[i], [&](const auto& ctor) {
+                AggDataPtr pv = allocate_func();
+                ERASE_AND_THROW_BAD_ALLOC_IF_NULL_WITH_HASH(hash_map, pv, key, hash_values[i]);
+                ctor(key, pv);
+            });
            (*agg_states)[i] = iter->second;
        }
    }
@ -158,6 +162,7 @@ struct AggHashMapWithOneNullableNumberKey {
        if (key_columns[0]->only_null()) {
            if (null_key_data == nullptr) {
                null_key_data = allocate_func();
+                THROW_BAD_ALLOC_IF_NULL(null_key_data);
            }
            for (size_t i = 0; i < chunk_size; i++) {
                (*agg_states)[i] = null_key_data;
@ -172,8 +177,11 @@ struct AggHashMapWithOneNullableNumberKey {
                    AGG_HASH_MAP_PREFETCH_HASH_VALUE();

                    auto key = data_column->get_data()[i];
-                    auto iter = hash_map.lazy_emplace_with_hash(key, hash_values[i],
-                                                                [&](const auto& ctor) { ctor(key, allocate_func()); });
+                    auto iter = hash_map.lazy_emplace_with_hash(key, hash_values[i], [&](const auto& ctor) {
+                        AggDataPtr pv = allocate_func();
+                        ERASE_AND_THROW_BAD_ALLOC_IF_NULL_WITH_HASH(hash_map, pv, key, hash_values[i]);
+                        ctor(key, pv);
+                    });
                    (*agg_states)[i] = iter->second;
                }
                return;
@ -183,6 +191,7 @@ struct AggHashMapWithOneNullableNumberKey {
                if (key_columns[0]->is_null(i)) {
                    if (null_key_data == nullptr) {
                        null_key_data = allocate_func();
+                        THROW_BAD_ALLOC_IF_NULL(null_key_data);
                    }
                    (*agg_states)[i] = null_key_data;
                } else {
@ -203,6 +212,7 @@ struct AggHashMapWithOneNullableNumberKey {
        if (key_columns[0]->only_null()) {
            if (null_key_data == nullptr) {
                null_key_data = allocate_func();
+                THROW_BAD_ALLOC_IF_NULL(null_key_data);
            }
            for (size_t i = 0; i < chunk_size; i++) {
                (*agg_states)[i] = null_key_data;
@ -226,6 +236,7 @@ struct AggHashMapWithOneNullableNumberKey {
                if (key_columns[0]->is_null(i)) {
                    if (null_key_data == nullptr) {
                        null_key_data = allocate_func();
+                        THROW_BAD_ALLOC_IF_NULL(null_key_data);
                    }
                    (*agg_states)[i] = null_key_data;
                } else {
@ -239,7 +250,11 @@ struct AggHashMapWithOneNullableNumberKey {
    void _handle_data_key_column(ColumnType* data_column, size_t row, Func&& allocate_func,
                                 Buffer<AggDataPtr>* agg_states) {
        auto key = data_column->get_data()[row];
-        auto iter = hash_map.lazy_emplace(key, [&](const auto& ctor) { ctor(key, allocate_func()); });
+        auto iter = hash_map.lazy_emplace(key, [&](const auto& ctor) {
+            AggDataPtr pv = allocate_func();
+            ERASE_AND_THROW_BAD_ALLOC_IF_NULL(hash_map, pv, key);
+            ctor(key, pv);
+        });
        (*agg_states)[row] = iter->second;
    }

@ -286,9 +301,11 @@ struct AggHashMapWithOneStringKey {
            auto iter = hash_map.lazy_emplace_with_hash(key, hash_values[i], [&](const auto& ctor) {
                // we must persist the slice before insert
                uint8_t* pos = pool->allocate(key.size);
+                ERASE_AND_THROW_BAD_ALLOC_IF_NULL_WITH_HASH(hash_map, pos, key, hash_values[i]);
                strings::memcpy_inlined(pos, key.data, key.size);
                Slice pk{pos, key.size};
                AggDataPtr pv = allocate_func();
+                ERASE_AND_THROW_BAD_ALLOC_IF_NULL_WITH_HASH(hash_map, pv, key, hash_values[i]);
                ctor(pk, pv);
            });
            (*agg_states)[i] = iter->second;
@ -337,6 +354,7 @@ struct AggHashMapWithOneNullableStringKey {
        if (key_columns[0]->only_null()) {
            if (null_key_data == nullptr) {
                null_key_data = allocate_func();
+                THROW_BAD_ALLOC_IF_NULL(null_key_data);
            }
            for (size_t i = 0; i < chunk_size; i++) {
                (*agg_states)[i] = null_key_data;
@ -354,9 +372,11 @@ struct AggHashMapWithOneNullableStringKey {
                    auto key = data_column->get_slice(i);
                    auto iter = hash_map.lazy_emplace_with_hash(key, hash_values[i], [&](const auto& ctor) {
                        uint8_t* pos = pool->allocate(key.size);
+                        ERASE_AND_THROW_BAD_ALLOC_IF_NULL_WITH_HASH(hash_map, pos, key, hash_values[i]);
                        strings::memcpy_inlined(pos, key.data, key.size);
                        Slice pk{pos, key.size};
                        AggDataPtr pv = allocate_func();
+                        ERASE_AND_THROW_BAD_ALLOC_IF_NULL_WITH_HASH(hash_map, pv, key, hash_values[i]);
                        ctor(pk, pv);
                    });
                    (*agg_states)[i] = iter->second;
@ -368,6 +388,7 @@ struct AggHashMapWithOneNullableStringKey {
                if (key_columns[0]->is_null(i)) {
                    if (null_key_data == nullptr) {
                        null_key_data = allocate_func();
+                        THROW_BAD_ALLOC_IF_NULL(null_key_data);
                    }
                    (*agg_states)[i] = null_key_data;
                } else {
@ -387,6 +408,7 @@ struct AggHashMapWithOneNullableStringKey {
        if (key_columns[0]->only_null()) {
            if (null_key_data == nullptr) {
                null_key_data = allocate_func();
+                THROW_BAD_ALLOC_IF_NULL(null_key_data);
            }
            for (size_t i = 0; i < chunk_size; i++) {
                (*agg_states)[i] = null_key_data;
@ -408,6 +430,7 @@ struct AggHashMapWithOneNullableStringKey {
                if (nullable_column->is_null(i)) {
                    if (null_key_data == nullptr) {
                        null_key_data = allocate_func();
+                        THROW_BAD_ALLOC_IF_NULL(null_key_data);
                    }
                    (*agg_states)[i] = null_key_data;
                } else {
@ -423,9 +446,11 @@ struct AggHashMapWithOneNullableStringKey {
        auto key = data_column->get_slice(row);
        auto iter = hash_map.lazy_emplace(key, [&](const auto& ctor) {
            uint8_t* pos = pool->allocate(key.size);
+            ERASE_AND_THROW_BAD_ALLOC_IF_NULL(hash_map, pos, key);
            strings::memcpy_inlined(pos, key.data, key.size);
            Slice pk{pos, key.size};
            AggDataPtr pv = allocate_func();
+            ERASE_AND_THROW_BAD_ALLOC_IF_NULL(hash_map, pv, key);
            ctor(pk, pv);
        });
        (*agg_states)[row] = iter->second;
@ -463,7 +488,9 @@ struct AggHashMapWithSerializedKey {

    AggHashMapWithSerializedKey()
            : mem_pool(std::make_unique<MemPool>()),
-              buffer(mem_pool->allocate(max_one_row_size * config::vector_chunk_size)) {}
+              buffer(mem_pool->allocate(max_one_row_size * config::vector_chunk_size)) {
+        THROW_BAD_ALLOC_IF_NULL(buffer);
+    }

    template <typename Func>
    void compute_agg_states(size_t chunk_size, const Columns& key_columns, MemPool* pool, Func&& allocate_func,
@ -477,6 +504,7 @@ struct AggHashMapWithSerializedKey {
            // reserved extra SLICE_MEMEQUAL_OVERFLOW_PADDING bytes to prevent SIMD instructions
            // from accessing out-of-bound memory.
            buffer = mem_pool->allocate(max_one_row_size * config::vector_chunk_size + SLICE_MEMEQUAL_OVERFLOW_PADDING);
+            THROW_BAD_ALLOC_IF_NULL(buffer);
        }

        for (const auto& key_column : key_columns) {
@ -488,9 +516,11 @@ struct AggHashMapWithSerializedKey {
            auto iter = hash_map.lazy_emplace(key, [&](const auto& ctor) {
                // we must persist the slice before insert
                uint8_t* pos = pool->allocate(key.size);
+                ERASE_AND_THROW_BAD_ALLOC_IF_NULL(hash_map, pos, key);
                strings::memcpy_inlined(pos, key.data, key.size);
                Slice pk{pos, key.size};
                AggDataPtr pv = allocate_func();
+                ERASE_AND_THROW_BAD_ALLOC_IF_NULL(hash_map, pv, key);
                ctor(pk, pv);
            });
            (*agg_states)[i] = iter->second;
@ -510,6 +540,7 @@ struct AggHashMapWithSerializedKey {
            max_one_row_size = cur_max_one_row_size;
            mem_pool->clear();
            buffer = mem_pool->allocate(max_one_row_size * config::vector_chunk_size);
+            THROW_BAD_ALLOC_IF_NULL(buffer);
        }

        for (const auto& key_column : key_columns) {
@ -623,6 +654,7 @@ struct AggHashMapWithSerializedKeyFixedSize {
            FixedSizeSliceKey& key = caches[i].key;
            auto iter = hash_map.lazy_emplace_with_hash(key, caches[i].hashval, [&](const auto& ctor) {
                AggDataPtr pv = allocate_func();
+                ERASE_AND_THROW_BAD_ALLOC_IF_NULL_WITH_HASH(hash_map, pv, key, caches[i].hashval);
                ctor(key, pv);
            });
            (*agg_states)[i] = iter->second;
--- a/be/src/exec/vectorized/aggregate/agg_hash_set.h
+++ b/be/src/exec/vectorized/aggregate/agg_hash_set.h
@ -24,7 +24,7 @@ using Int32AggHashSet = phmap::flat_hash_set<int32_t, StdHashWithSeed<int32_t, s
 template <PhmapSeed seed>
 using Int64AggHashSet = phmap::flat_hash_set<int64_t, StdHashWithSeed<int64_t, seed>>;
 template <PhmapSeed seed>
-using Int128AggHashSet = phmap::flat_hash_set<int128_t, StdHashWithSeed<int128_t, seed>>;
+using Int128AggHashSet = phmap::flat_hash_set<int128_t, Hash128WithSeed<seed>>;
 template <PhmapSeed seed>
 using DateAggHashSet = phmap::flat_hash_set<DateValue, StdHashWithSeed<DateValue, seed>>;
 template <PhmapSeed seed>
@ -50,6 +50,18 @@ using SliceAggTwoLevelHashSet =
        phmap::parallel_flat_hash_set<TSliceWithHash<seed>, THashOnSliceWithHash<seed>, TEqualOnSliceWithHash<seed>,
                                      phmap::priv::Allocator<Slice>, 4>;

+#define ERASE_AND_THROW_BAD_ALLOC_IF_NULL(hash_set, pv, key) \
+    if (UNLIKELY(pv == nullptr)) {                           \
+        hash_set.erase(key);                                 \
+        throw std::bad_alloc();                              \
+    }
+
+#define ERASE_AND_THROW_BAD_ALLOC_IF_NULL_WITH_HASH(hash_set, pv, key, hash) \
+    if (UNLIKELY(pv == nullptr)) {                                           \
+        hash_set.erase_with_hash(key, hash);                                 \
+        throw std::bad_alloc();                                              \
+    }
+
 // ==============================================================
 // handle one number hash key
 template <PrimitiveType primitive_type, typename HashSet>
@ -189,6 +201,7 @@ struct AggHashSetOfOneStringKey {
            hash_set.lazy_emplace(key, [&](const auto& ctor) {
                // we must persist the slice before insert
                uint8_t* pos = pool->allocate(key.size);
+                ERASE_AND_THROW_BAD_ALLOC_IF_NULL(hash_set, pos, key);
                memcpy(pos, key.data, key.size);
                ctor(pos, key.size, key.hash);
            });
@ -284,6 +297,7 @@ struct AggHashSetOfOneNullableStringKey {

        hash_set.lazy_emplace(key, [&](const auto& ctor) {
            uint8_t* pos = pool->allocate(key.size);
+            ERASE_AND_THROW_BAD_ALLOC_IF_NULL(hash_set, pos, key);
            memcpy(pos, key.data, key.size);
            ctor(pos, key.size, key.hash);
        });
@ -317,7 +331,9 @@ struct AggHashSetOfSerializedKey {

    AggHashSetOfSerializedKey()
            : _mem_pool(std::make_unique<MemPool>()),
-              _buffer(_mem_pool->allocate(max_one_row_size * config::vector_chunk_size)) {}
+              _buffer(_mem_pool->allocate(max_one_row_size * config::vector_chunk_size)) {
+        THROW_BAD_ALLOC_IF_NULL(_buffer);
+    }

    void build_set(size_t chunk_size, const Columns& key_columns, MemPool* pool) {
        slice_sizes.assign(config::vector_chunk_size, 0);
@ -330,6 +346,7 @@ struct AggHashSetOfSerializedKey {
            // from accessing out-of-bound memory.
            _buffer =
                    _mem_pool->allocate(max_one_row_size * config::vector_chunk_size + SLICE_MEMEQUAL_OVERFLOW_PADDING);
+            THROW_BAD_ALLOC_IF_NULL(_buffer);
        }

        for (const auto& key_column : key_columns) {
@ -343,6 +360,7 @@ struct AggHashSetOfSerializedKey {
            hash_set.lazy_emplace(key, [&](const auto& ctor) {
                // we must persist the slice before insert
                uint8_t* pos = pool->allocate(key.size);
+                ERASE_AND_THROW_BAD_ALLOC_IF_NULL(hash_set, pos, key);
                memcpy(pos, key.data, key.size);
                ctor(pos, key.size, key.hash);
            });
@ -360,6 +378,7 @@ struct AggHashSetOfSerializedKey {
            max_one_row_size = cur_max_one_row_size;
            _mem_pool->clear();
            _buffer = _mem_pool->allocate(max_one_row_size * config::vector_chunk_size);
+            THROW_BAD_ALLOC_IF_NULL(_buffer);
        }

        for (const auto& key_column : key_columns) {
@ -427,6 +446,7 @@ struct AggHashSetOfSerializedKeyFixedSize {
    AggHashSetOfSerializedKeyFixedSize()
            : _mem_pool(std::make_unique<MemPool>()),
              buffer(_mem_pool->allocate(max_fixed_size * config::vector_chunk_size)) {
+        THROW_BAD_ALLOC_IF_NULL(buffer);
        memset(buffer, 0x0, max_fixed_size * config::vector_chunk_size);
    }

--- a/be/src/exec/vectorized/aggregate/agg_hash_variant.h
+++ b/be/src/exec/vectorized/aggregate/agg_hash_variant.h
@ -461,7 +461,7 @@ using OneStringAggHashSet = AggHashSetOfOneStringKey<SliceAggHashSet<seed>>;
 template <PhmapSeed seed>
 using NullOneStringAggHashSet = AggHashSetOfOneNullableStringKey<SliceAggHashSet<seed>>;
 template <PhmapSeed seed>
-using SerializedKeyAggHashSet = AggHashSetOfSerializedKey<SliceAggTwoLevelHashSet<seed>>;
+using SerializedKeyAggHashSet = AggHashSetOfSerializedKey<SliceAggHashSet<seed>>;
 template <PhmapSeed seed>
 using SerializedTwoLevelKeyAggHashSet = AggHashSetOfSerializedKey<SliceAggTwoLevelHashSet<seed>>;
 template <PhmapSeed seed>
--- a/be/src/exec/vectorized/aggregate/aggregate_base_node.cpp
+++ b/be/src/exec/vectorized/aggregate/aggregate_base_node.cpp
@ -24,6 +24,7 @@ Status AggregateBaseNode::close(RuntimeState* state) {
    }
    if (_aggregator != nullptr) {
        _aggregator->close(state);
+        _aggregator.reset();
    }
    return ExecNode::close(state);
 }
--- a/be/src/exec/vectorized/aggregate/aggregate_blocking_node.cpp
+++ b/be/src/exec/vectorized/aggregate/aggregate_blocking_node.cpp
@ -7,6 +7,7 @@
 #include "exec/pipeline/operator.h"
 #include "exec/pipeline/pipeline_builder.h"
 #include "exec/vectorized/aggregator.h"
+#include "runtime/current_thread.h"
 #include "simd/simd.h"

 namespace starrocks::vectorized {
@ -57,16 +58,17 @@ Status AggregateBlockingNode::open(RuntimeState* state) {
            if (!_aggregator->is_none_group_by_exprs()) {
                if (false) {
                }
-#define HASH_MAP_METHOD(NAME)                                                                          \
-    else if (_aggregator->hash_map_variant().type == HashMapVariant::Type::NAME)                       \
-            _aggregator->build_hash_map<decltype(_aggregator->hash_map_variant().NAME)::element_type>( \
-                    *_aggregator->hash_map_variant().NAME, chunk_size, agg_group_by_with_limit);
+#define HASH_MAP_METHOD(NAME)                                                                                          \
+    else if (_aggregator->hash_map_variant().type == HashMapVariant::Type::NAME) {                                     \
+        TRY_CATCH_BAD_ALLOC(_aggregator->build_hash_map<decltype(_aggregator->hash_map_variant().NAME)::element_type>( \
+                *_aggregator->hash_map_variant().NAME, chunk_size, agg_group_by_with_limit));                          \
+    }
                APPLY_FOR_VARIANT_ALL(HASH_MAP_METHOD)
 #undef HASH_MAP_METHOD

                _mem_tracker->set(_aggregator->hash_map_variant().memory_usage() +
                                  _aggregator->mem_pool()->total_reserved_bytes());
-                _aggregator->try_convert_to_two_level_map();
+                TRY_CATCH_BAD_ALLOC(_aggregator->try_convert_to_two_level_map());
            }
            if (_aggregator->is_none_group_by_exprs()) {
                _aggregator->compute_single_agg_state(chunk_size);
@ -145,10 +147,10 @@ Status AggregateBlockingNode::get_next(RuntimeState* state, ChunkPtr* chunk, boo
 #undef HASH_MAP_METHOD
    }

+    size_t old_size = (*chunk)->num_rows();
    eval_join_runtime_filters(chunk->get());

    // For having
-    size_t old_size = (*chunk)->num_rows();
    ExecNode::eval_conjuncts(_conjunct_ctxs, (*chunk).get());
    _aggregator->update_num_rows_returned(-(old_size - (*chunk)->num_rows()));

--- a/be/src/exec/vectorized/aggregate/aggregate_streaming_node.cpp
+++ b/be/src/exec/vectorized/aggregate/aggregate_streaming_node.cpp
@ -6,6 +6,7 @@
 #include "exec/pipeline/aggregate/aggregate_streaming_source_operator.h"
 #include "exec/pipeline/operator.h"
 #include "exec/pipeline/pipeline_builder.h"
+#include "runtime/current_thread.h"
 #include "simd/simd.h"

 namespace starrocks::vectorized {
@ -37,6 +38,14 @@ Status AggregateStreamingNode::get_next(RuntimeState* state, ChunkPtr* chunk, bo
        return Status::OK();
    }

+    if (*chunk != nullptr) {
+        (*chunk)->reset();
+    }
+
+#ifdef DEBUG
+    static int loop = 0;
+#endif
+
    // TODO: merge small chunks to large chunk for optimization
    while (!_child_eos) {
        ChunkPtr input_chunk;
@ -61,10 +70,11 @@ Status AggregateStreamingNode::get_next(RuntimeState* state, ChunkPtr* chunk, bo
                SCOPED_TIMER(_aggregator->agg_compute_timer());
                if (false) {
                }
-#define HASH_MAP_METHOD(NAME)                                                                          \
-    else if (_aggregator->hash_map_variant().type == HashMapVariant::Type::NAME)                       \
-            _aggregator->build_hash_map<decltype(_aggregator->hash_map_variant().NAME)::element_type>( \
-                    *_aggregator->hash_map_variant().NAME, input_chunk_size);
+#define HASH_MAP_METHOD(NAME)                                                                                          \
+    else if (_aggregator->hash_map_variant().type == HashMapVariant::Type::NAME) {                                     \
+        TRY_CATCH_BAD_ALLOC(_aggregator->build_hash_map<decltype(_aggregator->hash_map_variant().NAME)::element_type>( \
+                *_aggregator->hash_map_variant().NAME, input_chunk_size));                                             \
+    }
                APPLY_FOR_VARIANT_ALL(HASH_MAP_METHOD)
 #undef HASH_MAP_METHOD
                else {
@ -79,7 +89,7 @@ Status AggregateStreamingNode::get_next(RuntimeState* state, ChunkPtr* chunk, bo

                _mem_tracker->set(_aggregator->hash_map_variant().memory_usage() +
                                  _aggregator->mem_pool()->total_reserved_bytes());
-                _aggregator->try_convert_to_two_level_map();
+                TRY_CATCH_BAD_ALLOC(_aggregator->try_convert_to_two_level_map());

                COUNTER_SET(_aggregator->hash_table_size(), (int64_t)_aggregator->hash_map_variant().size());

@ -90,19 +100,29 @@ Status AggregateStreamingNode::get_next(RuntimeState* state, ChunkPtr* chunk, bo
                        _aggregator->hash_map_variant().capacity() - _aggregator->hash_map_variant().capacity() / 8;
                size_t remain_size = real_capacity - _aggregator->hash_map_variant().size();
                bool ht_needs_expansion = remain_size < input_chunk_size;
+
+#ifdef DEBUG
+                // chaos test for streaming or agg, The results have to be consistent
+                // when group by type of double, it maybe cause dissonant result because of precision loss for double
+                // thus, so check case will fail, so it only work under DEBUG mode
+                loop++;
+                if (loop % 2 == 0) {
+#else
                if (!ht_needs_expansion ||
                    _aggregator->should_expand_preagg_hash_tables(_children[0]->rows_returned(), input_chunk_size,
                                                                  _aggregator->mem_pool()->total_allocated_bytes(),
                                                                  _aggregator->hash_map_variant().size())) {
+#endif
                    RETURN_IF_ERROR(state->check_mem_limit("AggrNode"));
                    // hash table is not full or allow expand the hash table according reduction rate
                    SCOPED_TIMER(_aggregator->agg_compute_timer());
                    if (false) {
                    }
-#define HASH_MAP_METHOD(NAME)                                                                          \
-    else if (_aggregator->hash_map_variant().type == HashMapVariant::Type::NAME)                       \
-            _aggregator->build_hash_map<decltype(_aggregator->hash_map_variant().NAME)::element_type>( \
-                    *_aggregator->hash_map_variant().NAME, input_chunk_size);
+#define HASH_MAP_METHOD(NAME)                                                                                          \
+    else if (_aggregator->hash_map_variant().type == HashMapVariant::Type::NAME) {                                     \
+        TRY_CATCH_BAD_ALLOC(_aggregator->build_hash_map<decltype(_aggregator->hash_map_variant().NAME)::element_type>( \
+                *_aggregator->hash_map_variant().NAME, input_chunk_size));                                             \
+    }
                    APPLY_FOR_VARIANT_ALL(HASH_MAP_METHOD)
 #undef HASH_MAP_METHOD
                    else {
@ -117,7 +137,7 @@ Status AggregateStreamingNode::get_next(RuntimeState* state, ChunkPtr* chunk, bo

                    _mem_tracker->set(_aggregator->hash_map_variant().memory_usage() +
                                      _aggregator->mem_pool()->total_reserved_bytes());
-                    _aggregator->try_convert_to_two_level_map();
+                    TRY_CATCH_BAD_ALLOC(_aggregator->try_convert_to_two_level_map());
                    COUNTER_SET(_aggregator->hash_table_size(), (int64_t)_aggregator->hash_map_variant().size());

                    continue;
@ -127,10 +147,12 @@ Status AggregateStreamingNode::get_next(RuntimeState* state, ChunkPtr* chunk, bo
                        SCOPED_TIMER(_aggregator->agg_compute_timer());
                        if (false) {
                        }
-#define HASH_MAP_METHOD(NAME)                                                                                       \
-    else if (_aggregator->hash_map_variant().type == HashMapVariant::Type::NAME) _aggregator                        \
-            ->build_hash_map_with_selection<typename decltype(_aggregator->hash_map_variant().NAME)::element_type>( \
-                    *_aggregator->hash_map_variant().NAME, input_chunk_size);
+#define HASH_MAP_METHOD(NAME)                                                             \
+    else if (_aggregator->hash_map_variant().type == HashMapVariant::Type::NAME) {        \
+        TRY_CATCH_BAD_ALLOC(_aggregator->build_hash_map_with_selection<typename decltype( \
+                                    _aggregator->hash_map_variant().NAME)::element_type>( \
+                *_aggregator->hash_map_variant().NAME, input_chunk_size));                \
+    }
                        APPLY_FOR_VARIANT_ALL(HASH_MAP_METHOD)
 #undef HASH_MAP_METHOD
                        else {
@ -157,7 +179,7 @@ Status AggregateStreamingNode::get_next(RuntimeState* state, ChunkPtr* chunk, bo
                    }

                    COUNTER_SET(_aggregator->hash_table_size(), (int64_t)_aggregator->hash_map_variant().size());
-                    if ((*chunk)->num_rows() > 0) {
+                    if (*chunk != nullptr && (*chunk)->num_rows() > 0) {
                        break;
                    } else {
                        continue;
--- a/be/src/exec/vectorized/aggregate/distinct_blocking_node.cpp
+++ b/be/src/exec/vectorized/aggregate/distinct_blocking_node.cpp
@ -7,6 +7,7 @@
 #include "exec/pipeline/operator.h"
 #include "exec/pipeline/pipeline_builder.h"
 #include "exec/vectorized/aggregator.h"
+#include "runtime/current_thread.h"

 namespace starrocks::vectorized {

@ -49,13 +50,18 @@ Status DistinctBlockingNode::open(RuntimeState* state) {
            SCOPED_TIMER(_aggregator->agg_compute_timer());
            if (false) {
            }
-#define HASH_SET_METHOD(NAME)                                                                          \
-    else if (_aggregator->hash_set_variant().type == HashSetVariant::Type::NAME)                       \
-            _aggregator->build_hash_set<decltype(_aggregator->hash_set_variant().NAME)::element_type>( \
-                    *_aggregator->hash_set_variant().NAME, chunk->num_rows());
+#define HASH_SET_METHOD(NAME)                                                                                          \
+    else if (_aggregator->hash_set_variant().type == HashSetVariant::Type::NAME) {                                     \
+        TRY_CATCH_BAD_ALLOC(_aggregator->build_hash_set<decltype(_aggregator->hash_set_variant().NAME)::element_type>( \
+                *_aggregator->hash_set_variant().NAME, chunk->num_rows()));                                            \
+    }
            APPLY_FOR_VARIANT_ALL(HASH_SET_METHOD)
 #undef HASH_SET_METHOD

+            _mem_tracker->set(_aggregator->hash_set_variant().memory_usage() +
+                              _aggregator->mem_pool()->total_reserved_bytes());
+            TRY_CATCH_BAD_ALLOC(_aggregator->try_convert_to_two_level_set());
+
            _aggregator->update_num_input_rows(chunk->num_rows());
            if (limit_with_no_agg) {
                auto size = _aggregator->hash_set_variant().size();
@ -110,10 +116,10 @@ Status DistinctBlockingNode::get_next(RuntimeState* state, ChunkPtr* chunk, bool
    APPLY_FOR_VARIANT_ALL(HASH_SET_METHOD)
 #undef HASH_SET_METHOD

+    size_t old_size = (*chunk)->num_rows();
    eval_join_runtime_filters(chunk->get());

    // For having
-    size_t old_size = (*chunk)->num_rows();
    ExecNode::eval_conjuncts(_conjunct_ctxs, (*chunk).get());
    _aggregator->update_num_rows_returned(-(old_size - (*chunk)->num_rows()));

--- a/be/src/exec/vectorized/aggregate/distinct_streaming_node.cpp
+++ b/be/src/exec/vectorized/aggregate/distinct_streaming_node.cpp
@ -7,6 +7,7 @@
 #include "exec/pipeline/operator.h"
 #include "exec/pipeline/pipeline_builder.h"
 #include "exec/vectorized/aggregator.h"
+#include "runtime/current_thread.h"
 #include "simd/simd.h"

 namespace starrocks::vectorized {
@ -65,10 +66,11 @@ Status DistinctStreamingNode::get_next(RuntimeState* state, ChunkPtr* chunk, boo

                if (false) {
                }
-#define HASH_MAP_METHOD(NAME)                                                                          \
-    else if (_aggregator->hash_set_variant().type == HashSetVariant::Type::NAME)                       \
-            _aggregator->build_hash_set<decltype(_aggregator->hash_set_variant().NAME)::element_type>( \
-                    *_aggregator->hash_set_variant().NAME, input_chunk_size);
+#define HASH_MAP_METHOD(NAME)                                                                                          \
+    else if (_aggregator->hash_set_variant().type == HashSetVariant::Type::NAME) {                                     \
+        TRY_CATCH_BAD_ALLOC(_aggregator->build_hash_set<decltype(_aggregator->hash_set_variant().NAME)::element_type>( \
+                *_aggregator->hash_set_variant().NAME, input_chunk_size));                                             \
+    }
                APPLY_FOR_VARIANT_ALL(HASH_MAP_METHOD)
 #undef HASH_MAP_METHOD
                else {
@ -85,6 +87,7 @@ Status DistinctStreamingNode::get_next(RuntimeState* state, ChunkPtr* chunk, boo

                _mem_tracker->set(_aggregator->hash_set_variant().memory_usage() +
                                  _aggregator->mem_pool()->total_reserved_bytes());
+                TRY_CATCH_BAD_ALLOC(_aggregator->try_convert_to_two_level_set());

                continue;
            } else {
@ -103,10 +106,11 @@ Status DistinctStreamingNode::get_next(RuntimeState* state, ChunkPtr* chunk, boo

                    if (false) {
                    }
-#define HASH_MAP_METHOD(NAME)                                                                          \
-    else if (_aggregator->hash_set_variant().type == HashSetVariant::Type::NAME)                       \
-            _aggregator->build_hash_set<decltype(_aggregator->hash_set_variant().NAME)::element_type>( \
-                    *_aggregator->hash_set_variant().NAME, input_chunk_size);
+#define HASH_MAP_METHOD(NAME)                                                                                          \
+    else if (_aggregator->hash_set_variant().type == HashSetVariant::Type::NAME) {                                     \
+        TRY_CATCH_BAD_ALLOC(_aggregator->build_hash_set<decltype(_aggregator->hash_set_variant().NAME)::element_type>( \
+                *_aggregator->hash_set_variant().NAME, input_chunk_size));                                             \
+    }
                    APPLY_FOR_VARIANT_ALL(HASH_MAP_METHOD)
 #undef HASH_MAP_METHOD
                    else {
@ -123,6 +127,7 @@ Status DistinctStreamingNode::get_next(RuntimeState* state, ChunkPtr* chunk, boo

                    _mem_tracker->set(_aggregator->hash_set_variant().memory_usage() +
                                      _aggregator->mem_pool()->total_reserved_bytes());
+                    TRY_CATCH_BAD_ALLOC(_aggregator->try_convert_to_two_level_set());

                    continue;
                } else {
@ -130,10 +135,12 @@ Status DistinctStreamingNode::get_next(RuntimeState* state, ChunkPtr* chunk, boo
                        SCOPED_TIMER(_aggregator->agg_compute_timer());
                        if (false) {
                        }
-#define HASH_MAP_METHOD(NAME)                                                                                       \
-    else if (_aggregator->hash_set_variant().type == HashSetVariant::Type::NAME) _aggregator                        \
-            ->build_hash_set_with_selection<typename decltype(_aggregator->hash_set_variant().NAME)::element_type>( \
-                    *_aggregator->hash_set_variant().NAME, input_chunk_size);
+#define HASH_MAP_METHOD(NAME)                                                             \
+    else if (_aggregator->hash_set_variant().type == HashSetVariant::Type::NAME) {        \
+        TRY_CATCH_BAD_ALLOC(_aggregator->build_hash_set_with_selection<typename decltype( \
+                                    _aggregator->hash_set_variant().NAME)::element_type>( \
+                *_aggregator->hash_set_variant().NAME, input_chunk_size));                \
+    }
                        APPLY_FOR_VARIANT_ALL(HASH_MAP_METHOD)
 #undef HASH_MAP_METHOD
                        else {
--- a/be/src/exec/vectorized/aggregator.cpp
+++ b/be/src/exec/vectorized/aggregator.cpp
@ -3,6 +3,7 @@
 #include "aggregator.h"

 #include "exprs/anyval_util.h"
+#include "runtime/current_thread.h"

 namespace starrocks {

@ -194,6 +195,7 @@ Status Aggregator::prepare(RuntimeState* state, ObjectPool* pool, RuntimeProfile

    if (_group_by_expr_ctxs.empty()) {
        _single_agg_state = _mem_pool->allocate_aligned(_agg_states_total_size, _max_agg_state_align_size);
+        THROW_BAD_ALLOC_IF_NULL(_single_agg_state);
        for (int i = 0; i < _agg_functions.size(); i++) {
            _agg_functions[i]->create(_single_agg_state + _agg_states_offsets[i]);
        }
@ -205,9 +207,9 @@ Status Aggregator::prepare(RuntimeState* state, ObjectPool* pool, RuntimeProfile
    // For SQL: select distinct id from table or select id from from table group by id;
    // we don't need to allocate memory for agg states.
    if (_is_only_group_by_columns) {
-        _init_agg_hash_variant(_hash_set_variant);
+        TRY_CATCH_BAD_ALLOC(_init_agg_hash_variant(_hash_set_variant));
    } else {
-        _init_agg_hash_variant(_hash_map_variant);
+        TRY_CATCH_BAD_ALLOC(_init_agg_hash_variant(_hash_map_variant));
    }

    return Status::OK();
@ -232,7 +234,7 @@ Status Aggregator::close(RuntimeState* state) {
            }
 #define HASH_MAP_METHOD(NAME)                                                  \
    else if (_hash_map_variant.type == vectorized::HashMapVariant::Type::NAME) \
-            _release_agg_memory<decltype(_hash_map_variant.NAME)::element_type>(*_hash_map_variant.NAME);
+            _release_agg_memory<decltype(_hash_map_variant.NAME)::element_type>(_hash_map_variant.NAME.get());
            APPLY_FOR_VARIANT_ALL(HASH_MAP_METHOD)
 #undef HASH_MAP_METHOD
        }
@ -458,7 +460,7 @@ void Aggregator::output_chunk_by_streaming_with_selection(vectorized::ChunkPtr*
    output_chunk_by_streaming(chunk);
 }

-#define CONVERT_TO_TWO_LEVEL(DST, SRC)                                                             \
+#define CONVERT_TO_TWO_LEVEL_MAP(DST, SRC)                                                         \
    if (_hash_map_variant.type == vectorized::HashMapVariant::Type::SRC) {                         \
        _hash_map_variant.DST = std::make_unique<decltype(_hash_map_variant.DST)::element_type>(); \
        _hash_map_variant.DST->hash_map.reserve(_hash_map_variant.SRC->hash_map.capacity());       \
@ -469,10 +471,28 @@ void Aggregator::output_chunk_by_streaming_with_selection(vectorized::ChunkPtr*
        return;                                                                                    \
    }

+#define CONVERT_TO_TWO_LEVEL_SET(DST, SRC)                                                         \
+    if (_hash_set_variant.type == vectorized::HashSetVariant::Type::SRC) {                         \
+        _hash_set_variant.DST = std::make_unique<decltype(_hash_set_variant.DST)::element_type>(); \
+        _hash_set_variant.DST->hash_set.reserve(_hash_set_variant.SRC->hash_set.capacity());       \
+        _hash_set_variant.DST->hash_set.insert(_hash_set_variant.SRC->hash_set.begin(),            \
+                                               _hash_set_variant.SRC->hash_set.end());             \
+        _hash_set_variant.type = vectorized::HashSetVariant::Type::DST;                            \
+        _hash_set_variant.SRC.reset();                                                             \
+        return;                                                                                    \
+    }
+
 void Aggregator::try_convert_to_two_level_map() {
    if (_mem_tracker->consumption() > two_level_memory_threshold) {
-        CONVERT_TO_TWO_LEVEL(phase1_slice_two_level, phase1_slice);
-        CONVERT_TO_TWO_LEVEL(phase2_slice_two_level, phase2_slice);
+        CONVERT_TO_TWO_LEVEL_MAP(phase1_slice_two_level, phase1_slice);
+        CONVERT_TO_TWO_LEVEL_MAP(phase2_slice_two_level, phase2_slice);
+    }
+}
+
+void Aggregator::try_convert_to_two_level_set() {
+    if (_mem_tracker->consumption() > two_level_memory_threshold) {
+        CONVERT_TO_TWO_LEVEL_SET(phase1_slice_two_level, phase1_slice);
+        CONVERT_TO_TWO_LEVEL_SET(phase2_slice_two_level, phase2_slice);
    }
 }

--- a/be/src/exec/vectorized/aggregator.h
+++ b/be/src/exec/vectorized/aggregator.h
@ -132,6 +132,7 @@ public:
    // we convert the single hash map to two level hash map.
    // two level hash map is better in large data set.
    void try_convert_to_two_level_map();
+    void try_convert_to_two_level_set();

 #ifdef NDEBUG
    static constexpr size_t two_level_memory_threshold = 33554432; // 32M, L3 Cache
@ -253,6 +254,7 @@ public:
                [this]() {
                    vectorized::AggDataPtr agg_state =
                            _mem_pool->allocate_aligned(_agg_states_total_size, _max_agg_state_align_size);
+                    RETURN_IF_UNLIKELY_NULL(agg_state, (uint8_t*)(nullptr));
                    for (int i = 0; i < _agg_functions.size(); i++) {
                        _agg_functions[i]->create(agg_state + _agg_states_offsets[i]);
                    }
@ -268,6 +270,7 @@ public:
                [this]() {
                    vectorized::AggDataPtr agg_state =
                            _mem_pool->allocate_aligned(_agg_states_total_size, _max_agg_state_align_size);
+                    RETURN_IF_UNLIKELY_NULL(agg_state, (uint8_t*)(nullptr));
                    for (int i = 0; i < _agg_functions.size(); i++) {
                        _agg_functions[i]->create(agg_state + _agg_states_offsets[i]);
                    }
@ -462,14 +465,16 @@ private:
    void _init_agg_hash_variant(HashVariantType& hash_variant);

    template <typename HashMapWithKey>
-    void _release_agg_memory(HashMapWithKey& hash_map_with_key) {
-        auto it = hash_map_with_key.hash_map.begin();
-        auto end = hash_map_with_key.hash_map.end();
-        while (it != end) {
-            for (int i = 0; i < _agg_functions.size(); i++) {
-                _agg_functions[i]->destroy(it->second + _agg_states_offsets[i]);
+    void _release_agg_memory(HashMapWithKey* hash_map_with_key) {
+        if (hash_map_with_key != nullptr) {
+            auto it = hash_map_with_key->hash_map.begin();
+            auto end = hash_map_with_key->hash_map.end();
+            while (it != end) {
+                for (int i = 0; i < _agg_functions.size(); i++) {
+                    _agg_functions[i]->destroy(it->second + _agg_states_offsets[i]);
+                }
+                ++it;
            }
-            ++it;
        }
    }
 };
--- a/be/src/exec/vectorized/analytic_node.cpp
+++ b/be/src/exec/vectorized/analytic_node.cpp
@ -17,6 +17,7 @@
 #include "exprs/expr.h"
 #include "exprs/expr_context.h"
 #include "gutil/strings/substitute.h"
+#include "runtime/current_thread.h"
 #include "runtime/runtime_state.h"
 #include "udf/udf.h"
 #include "util/runtime_profile.h"
@ -111,6 +112,7 @@ Status AnalyticNode::close(RuntimeState* state) {

    if (_analytor != nullptr) {
        _analytor->close(state);
+        _analytor.reset();
    }

    return ExecNode::close(state);
@ -294,6 +296,7 @@ Status AnalyticNode::_get_next_for_unbounded_preceding_rows_frame(RuntimeState*
 Status AnalyticNode::_try_fetch_next_partition_data(RuntimeState* state, int64_t* partition_end) {
    *partition_end = _analytor->find_partition_end();
    while (!_analytor->is_partition_finished(*partition_end)) {
+        RETURN_IF_ERROR(state->check_mem_limit("analytic node fetch next partition data"));
        RETURN_IF_ERROR(_fetch_next_chunk(state));
        *partition_end = _analytor->find_partition_end();
    }
@ -315,30 +318,29 @@ Status AnalyticNode::_fetch_next_chunk(RuntimeState* state) {
    size_t chunk_size = child_chunk->num_rows();
    _analytor->update_input_rows(chunk_size);

-    {
-        for (size_t i = 0; i < _analytor->agg_fn_ctxs().size(); i++) {
-            for (size_t j = 0; j < _analytor->agg_expr_ctxs()[i].size(); j++) {
-                ColumnPtr column = _analytor->agg_expr_ctxs()[i][j]->evaluate(child_chunk.get());
-                // Currently, only lead and lag window function have multi args.
-                // For performance, we do this special handle.
-                // In future, if need, we could remove this if else easily.
-                if (j == 0) {
-                    _analytor->append_column(chunk_size, _analytor->agg_intput_columns()[i][j].get(), column);
-                } else {
-                    _analytor->agg_intput_columns()[i][j]->append(*column, 0, column->size());
-                }
+    for (size_t i = 0; i < _analytor->agg_fn_ctxs().size(); i++) {
+        for (size_t j = 0; j < _analytor->agg_expr_ctxs()[i].size(); j++) {
+            ColumnPtr column = _analytor->agg_expr_ctxs()[i][j]->evaluate(child_chunk.get());
+            // Currently, only lead and lag window function have multi args.
+            // For performance, we do this special handle.
+            // In future, if need, we could remove this if else easily.
+            if (j == 0) {
+                TRY_CATCH_BAD_ALLOC(
+                        _analytor->append_column(chunk_size, _analytor->agg_intput_columns()[i][j].get(), column));
+            } else {
+                TRY_CATCH_BAD_ALLOC(_analytor->agg_intput_columns()[i][j]->append(*column, 0, column->size()));
            }
        }
+    }

-        for (size_t i = 0; i < _analytor->partition_ctxs().size(); i++) {
-            ColumnPtr column = _analytor->partition_ctxs()[i]->evaluate(child_chunk.get());
-            _analytor->append_column(chunk_size, _analytor->partition_columns()[i].get(), column);
-        }
+    for (size_t i = 0; i < _analytor->partition_ctxs().size(); i++) {
+        ColumnPtr column = _analytor->partition_ctxs()[i]->evaluate(child_chunk.get());
+        TRY_CATCH_BAD_ALLOC(_analytor->append_column(chunk_size, _analytor->partition_columns()[i].get(), column));
+    }

-        for (size_t i = 0; i < _analytor->order_ctxs().size(); i++) {
-            ColumnPtr column = _analytor->order_ctxs()[i]->evaluate(child_chunk.get());
-            _analytor->append_column(chunk_size, _analytor->order_columns()[i].get(), column);
-        }
+    for (size_t i = 0; i < _analytor->order_ctxs().size(); i++) {
+        ColumnPtr column = _analytor->order_ctxs()[i]->evaluate(child_chunk.get());
+        TRY_CATCH_BAD_ALLOC(_analytor->append_column(chunk_size, _analytor->order_columns()[i].get(), column));
    }

    _analytor->input_chunks().emplace_back(std::move(child_chunk));
--- a/be/src/exec/vectorized/chunks_sorter_full_sort.cpp
+++ b/be/src/exec/vectorized/chunks_sorter_full_sort.cpp
@ -16,13 +16,14 @@ class SortHelper {
 public:
    // Sort on type-known column, and the column has no NULL value in sorting range.
    template <PrimitiveType PT, bool stable>
-    static void sort_on_not_null_column(Column* column, bool is_asc_order, Permutation& perm) {
-        sort_on_not_null_column_within_range<PT, stable>(column, is_asc_order, perm, 0, perm.size());
+    static Status sort_on_not_null_column(RuntimeState* state, Column* column, bool is_asc_order, Permutation& perm) {
+        return sort_on_not_null_column_within_range<PT, stable>(state, column, is_asc_order, perm, 0, perm.size());
    }

    // Sort on type-known column, and the column may have NULL values in the sorting range.
    template <PrimitiveType PT, bool stable>
-    static void sort_on_nullable_column(Column* column, bool is_asc_order, bool is_null_first, Permutation& perm) {
+    static Status sort_on_nullable_column(RuntimeState* state, Column* column, bool is_asc_order, bool is_null_first,
+                                          Permutation& perm) {
        auto* nullable_col = down_cast<NullableColumn*>(column);

        auto null_first_fn = [&nullable_col](const PermutationItem& item) -> bool {
@ -42,7 +43,7 @@ public:
            if (data_offset < perm.size()) {
                data_count = perm.size() - data_offset;
            } else {
-                return;
+                return Status::OK();
            }
        } else {
            // put all NULLs at the end of the permutation.
@ -50,12 +51,13 @@ public:
            data_count = end_of_not_null - perm.begin();
        }
        // sort non-null values
-        sort_on_not_null_column_within_range<PT, stable>(nullable_col->mutable_data_column(), is_asc_order, perm,
-                                                         data_offset, data_count);
+        return sort_on_not_null_column_within_range<PT, stable>(state, nullable_col->mutable_data_column(),
+                                                                is_asc_order, perm, data_offset, data_count);
    }

    // Sort on column that with unknown data type.
-    static void sort_on_other_column(Column* column, int sort_order_flag, int null_first_flag, Permutation& perm) {
+    static Status sort_on_other_column(RuntimeState* state, Column* column, int sort_order_flag, int null_first_flag,
+                                       Permutation& perm) {
        // decides whether element l precedes element r.
        auto cmp_fn = [&column, &sort_order_flag, &null_first_flag](const PermutationItem& l,
                                                                    const PermutationItem& r) -> bool {
@ -67,27 +69,28 @@ public:
                return cmp < 0;
            }
        };
-        pdqsort(perm.begin(), perm.end(), cmp_fn);
+        pdqsort(state->cancelled_ref(), perm.begin(), perm.end(), cmp_fn);
+        RETURN_IF_CANCELLED(state);
+        return Status::OK();
    }

 private:
    // Sort on type-known column, and the column has no NULL value in sorting range.
    template <PrimitiveType PT, bool stable>
-    static void sort_on_not_null_column_within_range(Column* column, bool is_asc_order, Permutation& perm,
-                                                     size_t offset, size_t count = 0) {
+    static Status sort_on_not_null_column_within_range(RuntimeState* state, Column* column, bool is_asc_order,
+                                                       Permutation& perm, size_t offset, size_t count = 0) {
        using ColumnTypeName = typename RunTimeTypeTraits<PT>::ColumnType;
        using CppTypeName = typename RunTimeTypeTraits<PT>::CppType;

        // for numeric column: integers, floats, date, datetime, decimals
        if constexpr (pt_is_fixedlength<PT>) {
-            sort_on_not_null_fixed_size_column<CppTypeName, stable>(column, is_asc_order, perm, offset, count);
-            return;
+            return sort_on_not_null_fixed_size_column<CppTypeName, stable>(state, column, is_asc_order, perm, offset,
+                                                                           count);
        }

        // for binary column
        if constexpr (pt_is_binary<PT>) {
-            sort_on_not_null_binary_column<stable>(column, is_asc_order, perm, offset, count);
-            return;
+            return sort_on_not_null_binary_column<stable>(state, column, is_asc_order, perm, offset, count);
        }

        // for other columns
@ -119,10 +122,12 @@ private:
            end_pos = perm.size();
        }
        if (is_asc_order) {
-            pdqsort(perm.begin() + offset, perm.begin() + end_pos, less_fn);
+            pdqsort(state->cancelled_ref(), perm.begin() + offset, perm.begin() + end_pos, less_fn);
        } else {
-            pdqsort(perm.begin() + offset, perm.begin() + end_pos, greater_fn);
+            pdqsort(state->cancelled_ref(), perm.begin() + offset, perm.begin() + end_pos, greater_fn);
        }
+        RETURN_IF_CANCELLED(state);
+        return Status::OK();
    }

    template <typename CppTypeName>
@ -134,8 +139,8 @@ private:

    // Sort string
    template <bool stable>
-    static void sort_on_not_null_binary_column(Column* column, bool is_asc_order, Permutation& perm, size_t offset,
-                                               size_t count = 0) {
+    static Status sort_on_not_null_binary_column(RuntimeState* state, Column* column, bool is_asc_order,
+                                                 Permutation& perm, size_t offset, size_t count = 0) {
        const size_t row_num = (count == 0 || offset + count > perm.size()) ? (perm.size() - offset) : count;
        auto* binary_column = reinterpret_cast<BinaryColumn*>(column);
        auto& data = binary_column->get_data();
@ -171,20 +176,22 @@ private:
        };

        if (is_asc_order) {
-            pdqsort(sort_items.begin(), sort_items.end(), less_fn);
+            pdqsort(state->cancelled_ref(), sort_items.begin(), sort_items.end(), less_fn);
        } else {
-            pdqsort(sort_items.begin(), sort_items.end(), greater_fn);
+            pdqsort(state->cancelled_ref(), sort_items.begin(), sort_items.end(), greater_fn);
        }
+        RETURN_IF_CANCELLED(state);
        for (size_t i = 0; i < row_num; ++i) {
            perm[i + offset].index_in_chunk = sort_items[i].index_in_chunk;
        }
+        return Status::OK();
    }

    // Sort on some numeric column which has no NULL value in sorting range.
    // Only supports: integers, floats. Not Slice, DecimalV2Value.
    template <typename CppTypeName, bool stable>
-    static void sort_on_not_null_fixed_size_column(Column* column, bool is_asc_order, Permutation& perm, size_t offset,
-                                                   size_t count = 0) {
+    static Status sort_on_not_null_fixed_size_column(RuntimeState* state, Column* column, bool is_asc_order,
+                                                     Permutation& perm, size_t offset, size_t count = 0) {
        // column->size() == perm.size()
        const size_t row_num = (count == 0 || offset + count > perm.size()) ? (perm.size() - offset) : count;
        const CppTypeName* data = static_cast<CppTypeName*>((void*)column->mutable_raw_data());
@ -216,14 +223,16 @@ private:
        };

        if (is_asc_order) {
-            pdqsort(sort_items.begin(), sort_items.end(), less_fn);
+            pdqsort(state->cancelled_ref(), sort_items.begin(), sort_items.end(), less_fn);
        } else {
-            pdqsort(sort_items.begin(), sort_items.end(), greater_fn);
+            pdqsort(state->cancelled_ref(), sort_items.begin(), sort_items.end(), greater_fn);
        }
+        RETURN_IF_CANCELLED(state);
        // output permutation
        for (size_t i = 0; i < row_num; ++i) {
            perm[i + offset].index_in_chunk = sort_items[i].index_in_chunk;
        }
+        return Status::OK();
    }
 };

@ -294,10 +303,20 @@ bool ChunksSorterFullSort::pull_chunk(ChunkPtr* chunk) {
    _append_rows_to_chunk(chunk->get(), _sorted_segment->chunk.get(), _sorted_permutation, _next_output_row, count);
    _next_output_row += count;

-    if (_next_output_row >= _sorted_permutation.size()) {
-        return true;
+    return _next_output_row >= _sorted_permutation.size();
+}
+
+int64_t ChunksSorterFullSort::mem_usage() const {
+    int64_t usage = 0;
+    if (_big_chunk != nullptr) {
+        usage += _big_chunk->memory_usage();
    }
-    return false;
+    if (_sorted_segment != nullptr) {
+        usage += _sorted_segment->mem_usage();
+    }
+    usage += _sorted_permutation.capacity() * sizeof(Permutation);
+    usage += _selective_values.capacity() * sizeof(uint32_t);
+    return usage;
 }

 Status ChunksSorterFullSort::_sort_chunks(RuntimeState* state) {
@ -308,9 +327,9 @@ Status ChunksSorterFullSort::_sort_chunks(RuntimeState* state) {
    // For no more than three order-by columns, sorting by columns can benefit from reducing
    // the cost of calling virtual functions of Column::compare_at.
    if (_get_number_of_order_by_columns() <= 3) {
-        _sort_by_columns();
+        RETURN_IF_ERROR(_sort_by_columns(state));
    } else {
-        _sort_by_row_cmp();
+        RETURN_IF_ERROR(_sort_by_row_cmp(state));
    }
    return Status::OK();
 }
@ -330,11 +349,11 @@ Status ChunksSorterFullSort::_build_sorting_data(RuntimeState* state) {
 }

 // Sort in row style with simplified Permutation struct for the seek of a better cache.
-void ChunksSorterFullSort::_sort_by_row_cmp() {
+Status ChunksSorterFullSort::_sort_by_row_cmp(RuntimeState* state) {
    SCOPED_TIMER(_sort_timer);

    if (_get_number_of_order_by_columns() < 1) {
-        return;
+        return Status::OK();
    }

    // In this case, PermutationItem::chunk_index is constantly 0,
@ -360,42 +379,46 @@ void ChunksSorterFullSort::_sort_by_row_cmp() {
        }
    };

-    pdqsort(indices.begin(), indices.end(), cmp_fn);
+    pdqsort(state->cancelled_ref(), indices.begin(), indices.end(), cmp_fn);
+    RETURN_IF_CANCELLED(state);

    // Set the permutation array to sorted indices.
    for (size_t i = 0; i < elem_number; ++i) {
        _sorted_permutation[i].index_in_chunk = _sorted_permutation[i].permutation_index = indices[i];
    }
+    return Status::OK();
 }

-#define CASE_FOR_NULLABLE_COLUMN_SORT(PrimitiveTypeName)                                                       \
-    case PrimitiveTypeName: {                                                                                  \
-        if (stable) {                                                                                          \
-            SortHelper::sort_on_nullable_column<PrimitiveTypeName, true>(column, is_asc_order, is_null_first,  \
-                                                                         _sorted_permutation);                 \
-        } else {                                                                                               \
-            SortHelper::sort_on_nullable_column<PrimitiveTypeName, false>(column, is_asc_order, is_null_first, \
-                                                                          _sorted_permutation);                \
-        }                                                                                                      \
-        break;                                                                                                 \
+#define CASE_FOR_NULLABLE_COLUMN_SORT(PrimitiveTypeName)                                    \
+    case PrimitiveTypeName: {                                                               \
+        if (stable) {                                                                       \
+            RETURN_IF_ERROR((SortHelper::sort_on_nullable_column<PrimitiveTypeName, true>(  \
+                    state, column, is_asc_order, is_null_first, _sorted_permutation)));     \
+        } else {                                                                            \
+            RETURN_IF_ERROR((SortHelper::sort_on_nullable_column<PrimitiveTypeName, false>( \
+                    state, column, is_asc_order, is_null_first, _sorted_permutation)));     \
+        }                                                                                   \
+        break;                                                                              \
    }

-#define CASE_FOR_NOT_NULL_COLUMN_SORT(PrimitiveTypeName)                                                              \
-    case PrimitiveTypeName: {                                                                                         \
-        if (stable) {                                                                                                 \
-            SortHelper::sort_on_not_null_column<PrimitiveTypeName, true>(column, is_asc_order, _sorted_permutation);  \
-        } else {                                                                                                      \
-            SortHelper::sort_on_not_null_column<PrimitiveTypeName, false>(column, is_asc_order, _sorted_permutation); \
-        }                                                                                                             \
-        break;                                                                                                        \
+#define CASE_FOR_NOT_NULL_COLUMN_SORT(PrimitiveTypeName)                                                               \
+    case PrimitiveTypeName: {                                                                                          \
+        if (stable) {                                                                                                  \
+            RETURN_IF_ERROR((SortHelper::sort_on_not_null_column<PrimitiveTypeName, true>(state, column, is_asc_order, \
+                                                                                          _sorted_permutation)));      \
+        } else {                                                                                                       \
+            RETURN_IF_ERROR((SortHelper::sort_on_not_null_column<PrimitiveTypeName, false>(                            \
+                    state, column, is_asc_order, _sorted_permutation)));                                               \
+        }                                                                                                              \
+        break;                                                                                                         \
    }

 // Sort in column style to avoid calling virtual methods of Column.
-void ChunksSorterFullSort::_sort_by_columns() {
+Status ChunksSorterFullSort::_sort_by_columns(RuntimeState* state) {
    SCOPED_TIMER(_sort_timer);

    if (_get_number_of_order_by_columns() < 1) {
-        return;
+        return Status::OK();
    }

    for (int col_index = static_cast<int>(_get_number_of_order_by_columns()) - 1; col_index >= 0; --col_index) {
@ -433,8 +456,8 @@ void ChunksSorterFullSort::_sort_by_columns() {
                CASE_FOR_NULLABLE_COLUMN_SORT(TYPE_DATETIME)
                CASE_FOR_NULLABLE_COLUMN_SORT(TYPE_TIME)
            default: {
-                SortHelper::sort_on_other_column(column, _sort_order_flag[col_index], _null_first_flag[col_index],
-                                                 _sorted_permutation);
+                RETURN_IF_ERROR(SortHelper::sort_on_other_column(state, column, _sort_order_flag[col_index],
+                                                                 _null_first_flag[col_index], _sorted_permutation));
                break;
            }
            }
@ -458,8 +481,8 @@ void ChunksSorterFullSort::_sort_by_columns() {
                CASE_FOR_NOT_NULL_COLUMN_SORT(TYPE_DATETIME)
                CASE_FOR_NOT_NULL_COLUMN_SORT(TYPE_TIME)
            default: {
-                SortHelper::sort_on_other_column(column, _sort_order_flag[col_index], _null_first_flag[col_index],
-                                                 _sorted_permutation);
+                RETURN_IF_ERROR(SortHelper::sort_on_other_column(state, column, _sort_order_flag[col_index],
+                                                                 _null_first_flag[col_index], _sorted_permutation));
                break;
            }
            }
@ -470,6 +493,7 @@ void ChunksSorterFullSort::_sort_by_columns() {
            _sorted_permutation[i].permutation_index = i;
        }
    }
+    return Status::OK();
 }

 void ChunksSorterFullSort::_append_rows_to_chunk(Chunk* dest, Chunk* src, const Permutation& permutation, size_t offset,
--- a/be/src/exec/vectorized/chunks_sorter_full_sort.h
+++ b/be/src/exec/vectorized/chunks_sorter_full_sort.h
@ -2,12 +2,13 @@

 #pragma once

-#include "column/vectorized_fwd.h"
 #include "exec/vectorized/chunks_sorter.h"
-#include "exprs/expr_context.h"
-#include "util/runtime_profile.h"

-namespace starrocks::vectorized {
+namespace starrocks {
+class ExprContext;
+
+namespace vectorized {
+
 class ChunksSorterFullSort : public ChunksSorter {
 public:
    /**
@ -27,18 +28,7 @@ public:
    void get_next(ChunkPtr* chunk, bool* eos) override;
    bool pull_chunk(ChunkPtr* chunk) override;

-    int64_t mem_usage() const override {
-        int64_t usage = 0;
-        if (_big_chunk != nullptr) {
-            usage += _big_chunk->memory_usage();
-        }
-        if (_sorted_segment != nullptr) {
-            usage += _sorted_segment->mem_usage();
-        }
-        usage += _sorted_permutation.capacity() * sizeof(Permutation);
-        usage += _selective_values.capacity() * sizeof(uint32_t);
-        return usage;
-    }
+    int64_t mem_usage() const override;

    friend class SortHelper;

@ -46,8 +36,8 @@ private:
    Status _sort_chunks(RuntimeState* state);
    Status _build_sorting_data(RuntimeState* state);

-    void _sort_by_row_cmp();
-    void _sort_by_columns();
+    Status _sort_by_row_cmp(RuntimeState* state);
+    Status _sort_by_columns(RuntimeState* state);

    void _append_rows_to_chunk(Chunk* dest, Chunk* src, const Permutation& permutation, size_t offset, size_t count);

@ -57,4 +47,5 @@ private:
    std::vector<uint32_t> _selective_values; // for appending selective values to sorted rows
 };

-} // namespace starrocks::vectorized
+} // namespace vectorized
+} // namespace starrocks
--- a/be/src/exec/vectorized/chunks_sorter_topn.cpp
+++ b/be/src/exec/vectorized/chunks_sorter_topn.cpp
@ -181,8 +181,8 @@ Status ChunksSorterTopn::_build_sorting_data(RuntimeState* state, Permutation& p
    return Status::OK();
 }

-void ChunksSorterTopn::_sort_data_by_row_cmp(
-        Permutation& permutation, size_t rows_to_sort, size_t rows_size,
+Status ChunksSorterTopn::_sort_data_by_row_cmp(
+        RuntimeState* state, Permutation& permutation, size_t rows_to_sort, size_t rows_size,
        const std::function<bool(const PermutationItem& l, const PermutationItem& r)>& cmp_fn) {
    if (rows_to_sort > 0 && rows_to_sort < rows_size / 5) {
        // when Limit >= 1/5 of all data, a full sort will be faster than partial sort.
@ -192,12 +192,14 @@ void ChunksSorterTopn::_sort_data_by_row_cmp(
        permutation.resize(rows_to_sort);
    } else {
        // full sort
-        pdqsort(permutation.begin(), permutation.end(), cmp_fn);
+        pdqsort(state->cancelled_ref(), permutation.begin(), permutation.end(), cmp_fn);
+        RETURN_IF_CANCELLED(state);
        if (rows_size > rows_to_sort) {
            // for topn, We don't need the data after [0, number_of_rows_to_sort).
            permutation.resize(rows_to_sort);
        }
    }
+    return Status::OK();
 }

 void ChunksSorterTopn::_set_permutation_before(Permutation& permutation, size_t size,
@ -319,14 +321,15 @@ Status ChunksSorterTopn::_filter_and_sort_data_by_row_cmp(RuntimeState* state,
    // permutations.second.
    size_t first_size = permutations.first.size();
    if (first_size >= number_of_rows_to_sort) {
-        _sort_data_by_row_cmp(permutations.first, number_of_rows_to_sort, first_size, cmp_fn);
+        RETURN_IF_ERROR(_sort_data_by_row_cmp(state, permutations.first, number_of_rows_to_sort, first_size, cmp_fn));
    } else {
        if (first_size > 0) {
-            pdqsort(permutations.first.begin(), permutations.first.end(), cmp_fn);
+            pdqsort(state->cancelled_ref(), permutations.first.begin(), permutations.first.end(), cmp_fn);
+            RETURN_IF_CANCELLED(state);
        }

-        _sort_data_by_row_cmp(permutations.second, number_of_rows_to_sort - first_size, permutations.second.size(),
-                              cmp_fn);
+        RETURN_IF_ERROR(_sort_data_by_row_cmp(state, permutations.second, number_of_rows_to_sort - first_size,
+                                              permutations.second.size(), cmp_fn));
    }

    return Status::OK();
--- a/be/src/exec/vectorized/chunks_sorter_topn.h
+++ b/be/src/exec/vectorized/chunks_sorter_topn.h
@ -53,8 +53,8 @@ private:
    void _merge_sort_common(ChunkPtr& big_chunk, DataSegments& segments, size_t sort_row_number, size_t sorted_size,
                            size_t permutation_size, Permutation& new_permutation);

-    static void _sort_data_by_row_cmp(
-            Permutation& permutation, size_t rows_to_sort, size_t rows_size,
+    static Status _sort_data_by_row_cmp(
+            RuntimeState* state, Permutation& permutation, size_t rows_to_sort, size_t rows_size,
            const std::function<bool(const PermutationItem& l, const PermutationItem& r)>& cmp_fn);

    static void _set_permutation_before(Permutation&, size_t size, std::vector<std::vector<uint8_t>>& filter_array);
--- a/be/src/exec/vectorized/cross_join_node.cpp
+++ b/be/src/exec/vectorized/cross_join_node.cpp
@ -10,6 +10,7 @@
 #include "exec/pipeline/operator.h"
 #include "exec/pipeline/pipeline_builder.h"
 #include "exprs/expr_context.h"
+#include "runtime/current_thread.h"
 #include "runtime/runtime_state.h"

 namespace starrocks::vectorized {
@ -430,6 +431,13 @@ Status CrossJoinNode::close(RuntimeState* state) {
        return Status::OK();
    }

+    if (_build_chunk != nullptr) {
+        _build_chunk->reset();
+    }
+    if (_probe_chunk != nullptr) {
+        _probe_chunk->reset();
+    }
+
    child(0)->close(state);
    return ExecNode::close(state);
 }
@ -481,11 +489,7 @@ Status CrossJoinNode::_build(RuntimeState* state) {
                // merge chunks from child(1) (the right table) into a big chunk, which can reduce
                // the complexity and time of cross-join chunks from left table with small chunks
                // from right table.
-                size_t col_number = chunk->num_columns();
-                for (size_t col = 0; col < col_number; ++col) {
-                    _build_chunk->get_column_by_index(col)->append(*(chunk->get_column_by_index(col).get()), 0,
-                                                                   row_number);
-                }
+                TRY_CATCH_BAD_ALLOC(_build_chunk->append(*chunk));
            }
        }
    }
--- a/be/src/exec/vectorized/dict_decode_node.cpp
+++ b/be/src/exec/vectorized/dict_decode_node.cpp
@ -58,8 +58,8 @@ Status DictDecodeNode::open(RuntimeState* state) {
    RETURN_IF_CANCELLED(state);
    RETURN_IF_ERROR(_children[0]->open(state));

-    const auto& global_dict = state->get_global_dict_map();
-    _dict_optimize_parser.set_mutable_dict_maps(state->mutable_global_dict_map());
+    const auto& global_dict = state->get_query_global_dict_map();
+    _dict_optimize_parser.set_mutable_dict_maps(state->mutable_query_global_dict_map());

    DCHECK_EQ(_encode_column_cids.size(), _decode_column_cids.size());
    int need_decode_size = _decode_column_cids.size();
@ -77,12 +77,17 @@ Status DictDecodeNode::open(RuntimeState* state) {
            _dict_optimize_parser.check_could_apply_dict_optimize(expr_ctx, &dict_ctx);

            if (!dict_ctx.could_apply_dict_optimize) {
-                Status::InternalError(fmt::format("Not found dict for function-called cid:{}", need_encode_cid));
+                return Status::InternalError(
+                        fmt::format("Not found dict for function-called cid:{} it may cause by unsupport function",
+                                    need_encode_cid));
            }

            _dict_optimize_parser.eval_expr(state, expr_ctx, &dict_ctx, need_encode_cid);
            dict_iter = global_dict.find(need_encode_cid);
            DCHECK(dict_iter != global_dict.end());
+            if (dict_iter == global_dict.end()) {
+                return Status::InternalError(fmt::format("Eval Expr Error for cid:{}", need_encode_cid));
+            }
        }

        DefaultDecoderPtr decoder = std::make_unique<DefaultDecoder>();
--- a/be/src/exec/vectorized/es_http_scanner.cpp
+++ b/be/src/exec/vectorized/es_http_scanner.cpp
@ -59,6 +59,7 @@ Status EsHttpScanner::get_next(RuntimeState* runtime_state, ChunkPtr* chunk, boo
    }

    while (!_batch_eof) {
+        RETURN_IF_CANCELLED(runtime_state);
        if (_line_eof || _es_scroll_parser == nullptr) {
            RETURN_IF_ERROR(_es_reader->get_next(&_batch_eof, _es_scroll_parser));
            _es_scroll_parser->set_params(_tuple_desc, &_docvalue_context);
--- a/be/src/exec/vectorized/except_hash_set.cpp
+++ b/be/src/exec/vectorized/except_hash_set.cpp
@ -3,13 +3,14 @@
 #include "exec/vectorized/except_hash_set.h"

 #include "exec/exec_node.h"
+#include "exec/vectorized/aggregate/agg_hash_set.h"
 #include "runtime/mem_tracker.h"

 namespace starrocks::vectorized {

 template <typename HashSet>
-Status ExceptHashSet<HashSet>::build_set(RuntimeState* state, const ChunkPtr& chunk,
-                                         const std::vector<ExprContext*>& exprs, MemPool* pool) {
+void ExceptHashSet<HashSet>::build_set(RuntimeState* state, const ChunkPtr& chunk,
+                                       const std::vector<ExprContext*>& exprs, MemPool* pool) {
    size_t chunk_size = chunk->num_rows();
    _slice_sizes.assign(config::vector_chunk_size, 0);

@ -18,9 +19,7 @@ Status ExceptHashSet<HashSet>::build_set(RuntimeState* state, const ChunkPtr& ch
        _max_one_row_size = cur_max_one_row_size;
        _mem_pool->clear();
        _buffer = _mem_pool->allocate(_max_one_row_size * config::vector_chunk_size);
-        if (UNLIKELY(_buffer == nullptr)) {
-            return Status::InternalError("Mem usage has exceed the limit of BE");
-        }
+        THROW_BAD_ALLOC_IF_NULL(_buffer);
    }

    _serialize_columns(chunk, exprs, chunk_size);
@ -29,13 +28,11 @@ Status ExceptHashSet<HashSet>::build_set(RuntimeState* state, const ChunkPtr& ch
        ExceptSliceFlag key(_buffer + i * _max_one_row_size, _slice_sizes[i]);
        _hash_set->lazy_emplace(key, [&](const auto& ctor) {
            uint8_t* pos = pool->allocate(key.slice.size);
+            ERASE_AND_THROW_BAD_ALLOC_IF_NULL((*_hash_set), pos, key);
            memcpy(pos, key.slice.data, key.slice.size);
            ctor(pos, key.slice.size);
        });
    }
-
-    RETURN_IF_LIMIT_EXCEEDED(state, "Except, while build hash table.");
-    return Status::OK();
 }

 template <typename HashSet>
--- a/be/src/exec/vectorized/except_hash_set.h
+++ b/be/src/exec/vectorized/except_hash_set.h
@ -44,10 +44,15 @@ public:
    using Iterator = typename HashSet::iterator;
    using KeyVector = std::vector<Slice>;

-    ExceptHashSet()
-            : _hash_set(std::make_unique<HashSet>()),
-              _mem_pool(std::make_unique<MemPool>()),
-              _buffer(_mem_pool->allocate(_max_one_row_size * config::vector_chunk_size)) {}
+    ExceptHashSet() = default;
+
+    Status init() {
+        _hash_set = std::make_unique<HashSet>();
+        _mem_pool = std::make_unique<MemPool>();
+        _buffer = _mem_pool->allocate(_max_one_row_size * config::vector_chunk_size);
+        RETURN_IF_UNLIKELY_NULL(_buffer, Status::MemoryAllocFailed("alloc mem of except hash set failed"));
+        return Status::OK();
+    }

    Iterator begin() { return _hash_set->begin(); }

@ -57,7 +62,7 @@ public:

    size_t size() { return _hash_set->size(); }

-    Status build_set(RuntimeState* state, const ChunkPtr& chunk, const std::vector<ExprContext*>& exprs, MemPool* pool);
+    void build_set(RuntimeState* state, const ChunkPtr& chunk, const std::vector<ExprContext*>& exprs, MemPool* pool);

    Status erase_duplicate_row(RuntimeState* state, const ChunkPtr& chunk, const std::vector<ExprContext*>& exprs);

--- a/be/src/exec/vectorized/except_node.cpp
+++ b/be/src/exec/vectorized/except_node.cpp
@ -9,6 +9,7 @@
 #include "exec/pipeline/set/except_output_source_operator.h"
 #include "exec/pipeline/set/except_probe_sink_operator.h"
 #include "exprs/expr.h"
+#include "runtime/current_thread.h"
 #include "runtime/runtime_state.h"

 namespace starrocks::vectorized {
@ -81,6 +82,7 @@ Status ExceptNode::open(RuntimeState* state) {

    // initial build hash table used for remove duplicted
    _hash_set = std::make_unique<ExceptHashSerializeSet>();
+    RETURN_IF_ERROR(_hash_set->init());

    ChunkPtr chunk = nullptr;
    RETURN_IF_ERROR(child(0)->open(state));
@ -90,9 +92,8 @@ Status ExceptNode::open(RuntimeState* state) {
    RETURN_IF_ERROR(child(0)->get_next(state, &chunk, &eos));
    if (!eos) {
        ScopedTimer<MonotonicStopWatch> build_timer(_build_set_timer);
-        RETURN_IF_ERROR(_hash_set->build_set(state, chunk, _child_expr_lists[0], _build_pool.get()));
+        TRY_CATCH_BAD_ALLOC(_hash_set->build_set(state, chunk, _child_expr_lists[0], _build_pool.get()));
        while (true) {
-            RETURN_IF_ERROR(state->check_mem_limit("ExceptNode"));
            RETURN_IF_CANCELLED(state);
            build_timer.stop();
            RETURN_IF_ERROR(child(0)->get_next(state, &chunk, &eos));
@ -102,7 +103,7 @@ Status ExceptNode::open(RuntimeState* state) {
            } else if (chunk->num_rows() == 0) {
                continue;
            } else {
-                RETURN_IF_ERROR(_hash_set->build_set(state, chunk, _child_expr_lists[0], _build_pool.get()));
+                TRY_CATCH_BAD_ALLOC(_hash_set->build_set(state, chunk, _child_expr_lists[0], _build_pool.get()));
            }
        }
    }
@ -132,6 +133,7 @@ Status ExceptNode::open(RuntimeState* state) {
    }

    _hash_set_iterator = _hash_set->begin();
+    _mem_tracker->set(_hash_set->mem_usage());
    return Status::OK();
 }

@ -209,6 +211,10 @@ Status ExceptNode::close(RuntimeState* state) {
        _build_pool->free_all();
    }

+    if (_hash_set != nullptr) {
+        _hash_set.reset();
+    }
+
    return ExecNode::close(state);
 }

--- a/be/src/exec/vectorized/file_scan_node.cpp
+++ b/be/src/exec/vectorized/file_scan_node.cpp
@ -17,9 +17,11 @@
 #include "exec/vectorized/orc_scanner.h"
 #include "exec/vectorized/parquet_scanner.h"
 #include "exprs/expr.h"
+#include "runtime/current_thread.h"
 #include "runtime/exec_env.h"
 #include "runtime/row_batch.h"
 #include "runtime/runtime_state.h"
+#include "util/defer_op.h"
 #include "util/runtime_profile.h"

 namespace starrocks::vectorized {
@ -273,6 +275,8 @@ Status FileScanNode::scanner_scan(const TBrokerScanRange& scan_range, const std:
 }

 void FileScanNode::scanner_worker(int start_idx, int length) {
+    SCOPED_THREAD_LOCAL_MEM_TRACKER_SETTER(_runtime_state->instance_mem_tracker());
+
    // Clone expr context
    std::vector<ExprContext*> scanner_expr_ctxs;
    auto status = Expr::clone_if_not_exists(_conjunct_ctxs, _runtime_state, &scanner_expr_ctxs);
--- a/be/src/exec/vectorized/hash_join_node.cpp
+++ b/be/src/exec/vectorized/hash_join_node.cpp
@ -18,6 +18,7 @@
 #include "exprs/vectorized/in_const_predicate.hpp"
 #include "exprs/vectorized/runtime_filter_bank.h"
 #include "gutil/strings/substitute.h"
+#include "runtime/current_thread.h"
 #include "runtime/runtime_filter_worker.h"
 #include "simd/simd.h"
 #include "util/runtime_profile.h"
@ -174,17 +175,15 @@ Status HashJoinNode::open(RuntimeState* state) {
        }

        {
-            RETURN_IF_ERROR(state->check_mem_limit("HashJoinNode"));
            // copy chunk of right table
            SCOPED_TIMER(_copy_right_table_chunk_timer);
-            RETURN_IF_ERROR(_ht.append_chunk(state, chunk));
+            TRY_CATCH_BAD_ALLOC(RETURN_IF_ERROR(_ht.append_chunk(state, chunk)));
        }
    }

    {
        // build hash table: compute key columns, and then build the hash table.
-        RETURN_IF_ERROR(_build(state));
-        RETURN_IF_ERROR(state->check_mem_limit("HashJoinNode"));
+        TRY_CATCH_BAD_ALLOC(RETURN_IF_ERROR(_build(state)));
        COUNTER_SET(_build_rows_counter, static_cast<int64_t>(_ht.get_row_count()));
        COUNTER_SET(_build_buckets_counter, static_cast<int64_t>(_ht.get_bucket_size()));
    }
--- a/be/src/exec/vectorized/hdfs_scan_node.cpp
+++ b/be/src/exec/vectorized/hdfs_scan_node.cpp
@ -2,12 +2,16 @@

 #include "exec/vectorized/hdfs_scan_node.h"

+#include <atomic>
 #include <memory>

 #include "env/env_hdfs.h"
+#include "exec/vectorized/hdfs_scanner.h"
 #include "exprs/expr.h"
 #include "exprs/expr_context.h"
 #include "exprs/vectorized/runtime_filter.h"
+#include "fmt/core.h"
+#include "glog/logging.h"
 #include "runtime/current_thread.h"
 #include "runtime/exec_env.h"
 #include "runtime/hdfs/hdfs_fs_cache.h"
@ -18,6 +22,7 @@
 #include "util/priority_thread_pool.hpp"

 namespace starrocks::vectorized {
+
 HdfsScanNode::HdfsScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
        : ScanNode(pool, tnode, descs) {}

@ -160,7 +165,7 @@ Status HdfsScanNode::_start_scan_thread(RuntimeState* state) {
    int concurrency = std::min<int>(kMaxConcurrency, _num_scanners);
    int chunks = _chunks_per_scanner * concurrency;
    _chunk_pool.reserve(chunks);
-    _fill_chunk_pool(chunks);
+    TRY_CATCH_BAD_ALLOC(_fill_chunk_pool(chunks));

    // start scanner
    std::lock_guard<std::mutex> l(_mtx);
@ -189,6 +194,7 @@ Status HdfsScanNode::_create_and_init_scanner(RuntimeState* state, const HdfsFil
    scanner_params.min_max_tuple_desc = _min_max_tuple_desc;
    scanner_params.hive_column_names = &_hive_column_names;
    scanner_params.parent = this;
+    scanner_params.open_limit = hdfs_file_desc.open_limit;

    HdfsScanner* scanner = nullptr;
    if (hdfs_file_desc.hdfs_file_format == THdfsFileFormat::PARQUET) {
@ -196,7 +202,7 @@ Status HdfsScanNode::_create_and_init_scanner(RuntimeState* state, const HdfsFil
    } else if (hdfs_file_desc.hdfs_file_format == THdfsFileFormat::ORC) {
        scanner = _pool->add(new HdfsOrcScanner());
    } else {
-        string msg = "unsupported hdfs file format: " + hdfs_file_desc.hdfs_file_format;
+        std::string msg = fmt::format("unsupported hdfs file format: {}", hdfs_file_desc.hdfs_file_format);
        LOG(WARNING) << msg;
        return Status::NotSupported(msg);
    }
@ -266,8 +272,58 @@ void HdfsScanNode::_scanner_thread(HdfsScanner* scanner) {
    DeferOp op([&] {
        tls_thread_status.set_mem_tracker(prev_tracker);
        _running_threads.fetch_sub(1, std::memory_order_release);
+
+        if (_closed_scanners.load(std::memory_order_acquire) == _num_scanners) {
+            _result_chunks.shutdown();
+        }
    });

+    // if global status was not ok
+    // we need fast failure
+    if (!_get_status().ok()) {
+        scanner->release_pending_token(&_pending_token);
+        scanner->close(_runtime_state);
+        _closed_scanners.fetch_add(1, std::memory_order_release);
+        _close_pending_scanners();
+        return;
+    }
+
+    int concurrency_limit = config::max_hdfs_file_handle;
+
+    // There is a situation where once a resource overrun has occurred,
+    // the scanners that were previously overrun are basically in a pending state,
+    // so even if there are enough resources to follow, they cannot be fully utilized,
+    // and we need to schedule the scanners that are in a pending state as well.
+    if (scanner->has_pending_token()) {
+        int concurrency = std::min<int>(kMaxConcurrency, _num_scanners);
+        int need_put = concurrency - _running_threads;
+        int left_resource = concurrency_limit - scanner->open_limit();
+        if (left_resource > 0) {
+            need_put = std::min(left_resource, need_put);
+            std::lock_guard<std::mutex> l(_mtx);
+            while (need_put-- > 0 && !_pending_scanners.empty()) {
+                if (!_submit_scanner(_pending_scanners.pop(), false)) {
+                    break;
+                }
+            }
+        }
+    }
+
+    if (!scanner->has_pending_token()) {
+        scanner->acquire_pending_token(&_pending_token);
+    }
+
+    // if opened file greater than this. scanner will push back to pending list.
+    // We can't have all scanners in the pending state, we need to
+    // make sure there is at least one thread on each SCAN NODE that can be running
+    if (!scanner->is_open() && scanner->open_limit() > concurrency_limit) {
+        if (!scanner->has_pending_token()) {
+            std::lock_guard<std::mutex> l(_mtx);
+            _pending_scanners.push(scanner);
+            return;
+        }
+    }
+
    Status status = scanner->open(_runtime_state);
    scanner->set_keep_priority(false);

@ -281,6 +337,7 @@ void HdfsScanNode::_scanner_thread(HdfsScanner* scanner) {
            std::lock_guard<std::mutex> l(_mtx);
            if (_chunk_pool.empty()) {
                scanner->set_keep_priority(true);
+                scanner->release_pending_token(&_pending_token);
                _pending_scanners.push(scanner);
                scanner = nullptr;
                break;
@ -310,20 +367,23 @@ void HdfsScanNode::_scanner_thread(HdfsScanner* scanner) {
        if (status.ok() && resubmit) {
            if (!_submit_scanner(scanner, false)) {
                std::lock_guard<std::mutex> l(_mtx);
+                scanner->release_pending_token(&_pending_token);
                _pending_scanners.push(scanner);
            }
        } else if (status.ok()) {
            DCHECK(scanner == nullptr);
        } else if (status.is_end_of_file()) {
+            scanner->release_pending_token(&_pending_token);
            scanner->close(_runtime_state);
            _closed_scanners.fetch_add(1, std::memory_order_release);
            std::lock_guard<std::mutex> l(_mtx);
-            scanner = _pending_scanners.empty() ? nullptr : _pending_scanners.pop();
-            if (scanner != nullptr && !_submit_scanner(scanner, false)) {
-                _pending_scanners.push(scanner);
+            auto nscanner = _pending_scanners.empty() ? nullptr : _pending_scanners.pop();
+            if (nscanner != nullptr && !_submit_scanner(nscanner, false)) {
+                _pending_scanners.push(nscanner);
            }
        } else {
            _update_status(status);
+            scanner->release_pending_token(&_pending_token);
            scanner->close(_runtime_state);
            _closed_scanners.fetch_add(1, std::memory_order_release);
            _close_pending_scanners();
@ -331,15 +391,12 @@ void HdfsScanNode::_scanner_thread(HdfsScanner* scanner) {
    } else {
        // sometimes state == ok but global_status was not ok
        if (scanner != nullptr) {
+            scanner->release_pending_token(&_pending_token);
            scanner->close(_runtime_state);
            _closed_scanners.fetch_add(1, std::memory_order_release);
            _close_pending_scanners();
        }
    }
-
-    if (_closed_scanners.load(std::memory_order_acquire) == _num_scanners) {
-        _result_chunks.shutdown();
-    }
 }

 void HdfsScanNode::_close_pending_scanners() {
@ -404,7 +461,7 @@ Status HdfsScanNode::get_next(RuntimeState* state, ChunkPtr* chunk, bool* eos) {
    }

    if (_result_chunks.blocking_get(chunk)) {
-        _fill_chunk_pool(1);
+        TRY_CATCH_BAD_ALLOC(_fill_chunk_pool(1));

        eval_join_runtime_filters(chunk);

@ -440,9 +497,7 @@ Status HdfsScanNode::close(RuntimeState* state) {

    _close_pending_scanners();
    for (auto* hdfsFile : _hdfs_files) {
-        if (hdfsFile->hdfs_fs != nullptr && hdfsFile->hdfs_file != nullptr) {
-            hdfsCloseFile(hdfsFile->hdfs_fs, hdfsFile->hdfs_file);
-        }
+        hdfsFile->fs.reset();
    }

    Expr::close(_min_max_conjunct_ctxs, state);
@ -546,31 +601,27 @@ Status HdfsScanNode::_find_and_insert_hdfs_file(const THdfsScanRange& scan_range

        auto* hdfs_file_desc = _pool->add(new HdfsFileDesc());
        hdfs_file_desc->hdfs_fs = nullptr;
-        hdfs_file_desc->hdfs_file = nullptr;
        hdfs_file_desc->fs = std::move(file);
        hdfs_file_desc->partition_id = scan_range.partition_id;
        hdfs_file_desc->path = scan_range.relative_path;
        hdfs_file_desc->file_length = scan_range.file_length;
        hdfs_file_desc->splits.emplace_back(&scan_range);
        hdfs_file_desc->hdfs_file_format = scan_range.file_format;
+        hdfs_file_desc->open_limit = nullptr;
        _hdfs_files.emplace_back(hdfs_file_desc);
    } else {
        hdfsFS hdfs;
-        RETURN_IF_ERROR(HdfsFsCache::instance()->get_connection(namenode, &hdfs));
-        auto* file = hdfsOpenFile(hdfs, native_file_path.c_str(), O_RDONLY, 0, 0, 0);
-        if (file == nullptr) {
-            return Status::InternalError(strings::Substitute("open file failed, file=$0", native_file_path));
-        }
-
+        std::atomic<int32_t>* open_limit = nullptr;
+        RETURN_IF_ERROR(HdfsFsCache::instance()->get_connection(namenode, &hdfs, &open_limit));
        auto* hdfs_file_desc = _pool->add(new HdfsFileDesc());
        hdfs_file_desc->hdfs_fs = hdfs;
-        hdfs_file_desc->hdfs_file = file;
-        hdfs_file_desc->fs = std::make_shared<HdfsRandomAccessFile>(hdfs, file, native_file_path);
+        hdfs_file_desc->fs = std::make_shared<HdfsRandomAccessFile>(hdfs, native_file_path);
        hdfs_file_desc->partition_id = scan_range.partition_id;
        hdfs_file_desc->path = scan_range.relative_path;
        hdfs_file_desc->file_length = scan_range.file_length;
        hdfs_file_desc->splits.emplace_back(&scan_range);
        hdfs_file_desc->hdfs_file_format = scan_range.file_format;
+        hdfs_file_desc->open_limit = open_limit;
        _hdfs_files.emplace_back(hdfs_file_desc);
    }

--- a/be/src/exec/vectorized/hdfs_scan_node.h
+++ b/be/src/exec/vectorized/hdfs_scan_node.h
@ -3,6 +3,7 @@
 #pragma once

 #include <atomic>
+#include <memory>

 #include "env/env.h"
 #include "exec/scan_node.h"
@ -15,14 +16,15 @@ namespace starrocks::vectorized {

 struct HdfsFileDesc {
    hdfsFS hdfs_fs;
-    hdfsFile hdfs_file;
    THdfsFileFormat::type hdfs_file_format;
-    std::shared_ptr<RandomAccessFile> fs = nullptr;
+    std::shared_ptr<RandomAccessFile> fs;

    int partition_id = 0;
    std::string path;
    int64_t file_length = 0;
    std::vector<const THdfsScanRange*> splits;
+
+    std::atomic<int32_t>* open_limit = nullptr;
 };

 class HdfsScanNode final : public starrocks::ScanNode {
@ -146,6 +148,8 @@ private:
    RuntimeState* _runtime_state = nullptr;
    bool _is_hdfs_fs = true;

+    std::atomic_bool _pending_token = true;
+
    std::atomic<int32_t> _scanner_submit_count = 0;
    std::atomic<int32_t> _running_threads = 0;
    std::atomic<int32_t> _closed_scanners = 0;
--- a/be/src/exec/vectorized/hdfs_scanner.cpp
+++ b/be/src/exec/vectorized/hdfs_scanner.cpp
@ -3,9 +3,14 @@
 #include "exec/vectorized/hdfs_scanner.h"

 #include <hdfs/hdfs.h>
+#include <unistd.h>

+#include <algorithm>
 #include <memory>
+#include <mutex>
+#include <thread>

+#include "common/status.h"
 #include "env/env_hdfs.h"
 #include "exec/exec_node.h"
 #include "exec/parquet/file_reader.h"
@ -95,6 +100,7 @@ void HdfsScanner::_build_file_read_param() {
 }

 Status HdfsScanner::get_next(RuntimeState* runtime_state, ChunkPtr* chunk) {
+    RETURN_IF_CANCELLED(_runtime_state);
 #ifndef BE_TEST
    SCOPED_TIMER(_scanner_params.parent->_scan_timer);
 #endif
@ -116,29 +122,40 @@ Status HdfsScanner::open(RuntimeState* runtime_state) {
    if (_is_open) {
        return Status::OK();
    }
+#ifndef BE_TEST
+    RETURN_IF_ERROR(down_cast<HdfsRandomAccessFile*>(_scanner_params.fs.get())->open());
+#endif
    _build_file_read_param();
    auto status = do_open(runtime_state);
    if (status.ok()) {
        _is_open = true;
+#ifndef BE_TEST
+        (*_scanner_params.open_limit)++;
+#endif
        LOG(INFO) << "open file success: " << _scanner_params.fs->file_name();
    }
    return status;
 }

-Status HdfsScanner::close(RuntimeState* runtime_state) {
+void HdfsScanner::close(RuntimeState* runtime_state) noexcept {
+    DCHECK(!has_pending_token());
    if (_is_closed) {
-        return Status::OK();
+        return;
    }
    Expr::close(_conjunct_ctxs, runtime_state);
    Expr::close(_min_max_conjunct_ctxs, runtime_state);
    for (auto& it : _conjunct_ctxs_by_slot) {
        Expr::close(it.second, runtime_state);
    }
-    auto status = do_close(runtime_state);
-    if (status.ok()) {
-        _is_closed = true;
+    do_close(runtime_state);
+#ifndef BE_TEST
+    down_cast<HdfsRandomAccessFile*>(_scanner_params.fs.get())->close();
+    if (_is_open) {
+        (*_scanner_params.open_limit)--;
    }
-    return status;
+#endif
+    _scanner_params.fs.reset();
+    _is_closed = true;
 }

 #ifndef BE_TEST
@ -168,8 +185,12 @@ static void get_hdfs_statistics(hdfsFile file, HdfsReadStats* stats) {

 void HdfsScanner::update_counter() {
 #ifndef BE_TEST
+    // _scanner_params.fs is null means scanner open failed
+    if (_scanner_params.fs == nullptr) return;
+
    HdfsReadStats hdfs_stats;
    auto hdfs_file = down_cast<HdfsRandomAccessFile*>(_scanner_params.fs.get())->hdfs_file();
+    if (hdfs_file == nullptr) return;
    // Hdfslib only supports obtaining statistics of hdfs file system.
    // For other systems such as s3, calling this function will cause be crash.
    if (_scanner_params.parent->_is_hdfs_fs) {
--- a/be/src/exec/vectorized/hdfs_scanner.h
+++ b/be/src/exec/vectorized/hdfs_scanner.h
@ -2,6 +2,7 @@

 #pragma once

+#include <atomic>
 #include <utility>

 #include "column/chunk.h"
@ -77,6 +78,8 @@ struct HdfsScannerParams {
    std::vector<std::string>* hive_column_names;

    HdfsScanNode* parent = nullptr;
+
+    std::atomic<int32_t>* open_limit;
 };

 struct HdfsFileReaderParam {
@ -133,13 +136,20 @@ struct HdfsFileReaderParam {
    bool can_use_dict_filter_on_slot(SlotDescriptor* slot) const;
 };

+// if *lvalue == expect, swap(*lvalue,*rvalue)
+inline bool atomic_cas(std::atomic_bool* lvalue, std::atomic_bool* rvalue, bool expect) {
+    bool res = lvalue->compare_exchange_strong(expect, *rvalue);
+    if (res) *rvalue = expect;
+    return res;
+}
+
 class HdfsScanner {
 public:
    HdfsScanner() = default;
    virtual ~HdfsScanner() = default;

    Status open(RuntimeState* runtime_state);
-    Status close(RuntimeState* runtime_state);
+    void close(RuntimeState* runtime_state) noexcept;
    Status get_next(RuntimeState* runtime_state, ChunkPtr* chunk);
    Status init(RuntimeState* runtime_state, const HdfsScannerParams& scanner_params);

@ -150,8 +160,28 @@ public:

    RuntimeState* runtime_state() { return _runtime_state; }

+    int open_limit() { return *_scanner_params.open_limit; }
+
+    bool is_open() { return _is_open; }
+
+    bool acquire_pending_token(std::atomic_bool* token) {
+        // acquire resource
+        return atomic_cas(token, &_pending_token, true);
+    }
+
+    bool release_pending_token(std::atomic_bool* token) {
+        if (_pending_token) {
+            _pending_token = false;
+            *token = true;
+            return true;
+        }
+        return false;
+    }
+
+    bool has_pending_token() { return _pending_token; }
+
    virtual Status do_open(RuntimeState* runtime_state) = 0;
-    virtual Status do_close(RuntimeState* runtime_state) = 0;
+    virtual void do_close(RuntimeState* runtime_state) noexcept = 0;
    virtual Status do_get_next(RuntimeState* runtime_state, ChunkPtr* chunk) = 0;
    virtual Status do_init(RuntimeState* runtime_state, const HdfsScannerParams& scanner_params) = 0;

@ -162,6 +192,8 @@ private:
    void _build_file_read_param();

 protected:
+    std::atomic_bool _pending_token = false;
+
    HdfsFileReaderParam _file_read_param;
    HdfsScannerParams _scanner_params;
    RuntimeState* _runtime_state = nullptr;
@ -183,7 +215,7 @@ public:

    void update_counter();
    Status do_open(RuntimeState* runtime_state) override;
-    Status do_close(RuntimeState* runtime_state) override;
+    void do_close(RuntimeState* runtime_state) noexcept override;
    Status do_get_next(RuntimeState* runtime_state, ChunkPtr* chunk) override;
    Status do_init(RuntimeState* runtime_state, const HdfsScannerParams& scanner_params) override;

--- a/be/src/exec/vectorized/hdfs_scanner_orc.cpp
+++ b/be/src/exec/vectorized/hdfs_scanner_orc.cpp
@ -192,6 +192,13 @@ bool OrcRowReaderFilter::filterOnPickRowGroup(size_t rowGroupIdx,
    return false;
 }

+// Hive ORC char type will pad trailing spaces.
+// https://docs.cloudera.com/documentation/enterprise/6/6.3/topics/impala_char.html
+static inline size_t remove_trailing_spaces(const char* s, size_t size) {
+    while (size > 0 && s[size - 1] == ' ') size--;
+    return size;
+}
+
 bool OrcRowReaderFilter::filterOnPickStringDictionary(
        const std::unordered_map<uint64_t, orc::StringDictionary*>& sdicts) {
    if (sdicts.empty()) return false;
@ -223,6 +230,9 @@ bool OrcRowReaderFilter::filterOnPickStringDictionary(
        }
        // create chunk
        orc::StringDictionary* dict = it->second;
+        if (dict->dictionaryOffset.size() > config::vector_chunk_size) {
+            continue;
+        }
        vectorized::ChunkPtr dict_value_chunk = std::make_shared<vectorized::Chunk>();
        // always assume there is a possibility of null value in ORC column.
        // and we evaluate with null always.
@ -241,15 +251,34 @@ bool OrcRowReaderFilter::filterOnPickStringDictionary(
        bytes.reserve(content_size);
        const uint8_t* start = reinterpret_cast<const uint8_t*>(content_data);
        const uint8_t* end = reinterpret_cast<const uint8_t*>(content_data + content_size);
-        bytes.insert(bytes.end(), start, end);

        size_t offset_size = dict->dictionaryOffset.size();
        size_t dict_size = offset_size - 1;
        const int64_t* offset_data = dict->dictionaryOffset.data();
        offsets.resize(offset_size);
-        // type mismatche, have to use loop to assign.
-        for (size_t i = 0; i < offset_size; i++) {
-            offsets[i] = offset_data[i];
+
+        if (slot_desc->type().type == TYPE_CHAR) {
+            // for char type, dict strings are also padded with spaces.
+            // we also have to strip space off. For example
+            // | hello      |  world      | yes     |, we have to compact to
+            // | hello | world | yes |
+            size_t total_size = 0;
+            const char* p_start = reinterpret_cast<const char*>(start);
+            for (size_t i = 0; i < dict_size; i++) {
+                const char* s = p_start + offset_data[i];
+                size_t old_size = offset_data[i + 1] - offset_data[i];
+                size_t new_size = remove_trailing_spaces(s, old_size);
+                bytes.insert(bytes.end(), s, s + new_size);
+                offsets[i] = total_size;
+                total_size += new_size;
+            }
+            offsets[dict_size] = total_size;
+        } else {
+            bytes.insert(bytes.end(), start, end);
+            // type mismatch, have to use loop to assign.
+            for (size_t i = 0; i < offset_size; i++) {
+                offsets[i] = offset_data[i];
+            }
        }

        // first (dict_size) th items are all not-null
@ -301,9 +330,9 @@ void HdfsOrcScanner::update_counter() {
 #endif
 }

-Status HdfsParquetScanner::do_close(RuntimeState* runtime_state) {
+void HdfsParquetScanner::do_close(RuntimeState* runtime_state) noexcept {
    update_counter();
-    return Status::OK();
+    _reader.reset();
 }

 Status HdfsOrcScanner::do_open(RuntimeState* runtime_state) {
@ -360,10 +389,9 @@ Status HdfsOrcScanner::do_open(RuntimeState* runtime_state) {
    return Status::OK();
 }

-Status HdfsOrcScanner::do_close(RuntimeState* runtime_state) {
+void HdfsOrcScanner::do_close(RuntimeState* runtime_state) noexcept {
    _orc_adapter.reset(nullptr);
    update_counter();
-    return Status::OK();
 }

 Status HdfsOrcScanner::do_get_next(RuntimeState* runtime_state, ChunkPtr* chunk) {
--- a/be/src/exec/vectorized/hdfs_scanner_orc.h
+++ b/be/src/exec/vectorized/hdfs_scanner_orc.h
@ -18,7 +18,7 @@ public:

    void update_counter();
    Status do_open(RuntimeState* runtime_state) override;
-    Status do_close(RuntimeState* runtime_state) override;
+    void do_close(RuntimeState* runtime_state) noexcept override;
    Status do_get_next(RuntimeState* runtime_state, ChunkPtr* chunk) override;
    Status do_init(RuntimeState* runtime_state, const HdfsScannerParams& scanner_params) override;

--- a/be/src/exec/vectorized/intersect_hash_set.cpp
+++ b/be/src/exec/vectorized/intersect_hash_set.cpp
@ -3,13 +3,14 @@
 #include "exec/vectorized/intersect_hash_set.h"

 #include "exec/exec_node.h"
+#include "exec/vectorized/aggregate/agg_hash_set.h"
 #include "util/phmap/phmap_dump.h"

 namespace starrocks::vectorized {

 template <typename HashSet>
-Status IntersectHashSet<HashSet>::build_set(RuntimeState* state, const ChunkPtr& chunkPtr,
-                                            const std::vector<ExprContext*>& exprs, MemPool* pool) {
+void IntersectHashSet<HashSet>::build_set(RuntimeState* state, const ChunkPtr& chunkPtr,
+                                          const std::vector<ExprContext*>& exprs, MemPool* pool) {
    size_t chunk_size = chunkPtr->num_rows();

    _slice_sizes.assign(config::vector_chunk_size, 0);
@ -18,9 +19,7 @@ Status IntersectHashSet<HashSet>::build_set(RuntimeState* state, const ChunkPtr&
        _max_one_row_size = cur_max_one_row_size;
        _mem_pool->clear();
        _buffer = _mem_pool->allocate(_max_one_row_size * config::vector_chunk_size);
-        if (UNLIKELY(_buffer == nullptr)) {
-            return Status::InternalError("Mem usage has exceed the limit of BE");
-        }
+        THROW_BAD_ALLOC_IF_NULL(_buffer);
    }

    _serialize_columns(chunkPtr, exprs, chunk_size);
@ -30,12 +29,11 @@ Status IntersectHashSet<HashSet>::build_set(RuntimeState* state, const ChunkPtr&
        _hash_set->lazy_emplace(key, [&](const auto& ctor) {
            // we must persist the slice before insert
            uint8_t* pos = pool->allocate(key.slice.size);
+            ERASE_AND_THROW_BAD_ALLOC_IF_NULL((*_hash_set), pos, key);
            memcpy(pos, key.slice.data, key.slice.size);
            ctor(pos, key.slice.size);
        });
    }
-    RETURN_IF_LIMIT_EXCEEDED(state, "Intersect, while build hash table.");
-    return Status::OK();
 }

 template <typename HashSet>
--- a/be/src/exec/vectorized/intersect_hash_set.h
+++ b/be/src/exec/vectorized/intersect_hash_set.h
@ -42,10 +42,15 @@ public:
    using Iterator = typename HashSet::iterator;
    using KeyVector = typename std::vector<Slice>;

-    IntersectHashSet()
-            : _hash_set(std::make_unique<HashSet>()),
-              _mem_pool(std::make_unique<MemPool>()),
-              _buffer(_mem_pool->allocate(_max_one_row_size * config::vector_chunk_size)) {}
+    IntersectHashSet() = default;
+
+    Status init() {
+        _hash_set = std::make_unique<HashSet>();
+        _mem_pool = std::make_unique<MemPool>();
+        _buffer = _mem_pool->allocate(_max_one_row_size * config::vector_chunk_size);
+        RETURN_IF_UNLIKELY_NULL(_buffer, Status::MemoryAllocFailed("alloc mem for intersect hash set failed"));
+        return Status::OK();
+    }

    Iterator begin() { return _hash_set->begin(); }

@ -53,8 +58,8 @@ public:

    bool empty() { return _hash_set->empty(); }

-    Status build_set(RuntimeState* state, const ChunkPtr& chunkPtr, const std::vector<ExprContext*>& exprs,
-                     MemPool* pool);
+    void build_set(RuntimeState* state, const ChunkPtr& chunkPtr, const std::vector<ExprContext*>& exprs,
+                   MemPool* pool);

    Status refine_intersect_row(RuntimeState* state, const ChunkPtr& chunkPtr, const std::vector<ExprContext*>& exprs,
                                int hit_times);
--- a/be/src/exec/vectorized/intersect_node.cpp
+++ b/be/src/exec/vectorized/intersect_node.cpp
@ -11,6 +11,7 @@
 #include "exec/pipeline/set/intersect_output_source_operator.h"
 #include "exec/pipeline/set/intersect_probe_sink_operator.h"
 #include "exprs/expr.h"
+#include "runtime/current_thread.h"
 #include "runtime/runtime_state.h"

 namespace starrocks::vectorized {
@ -84,6 +85,7 @@ Status IntersectNode::open(RuntimeState* state) {

    // initial build hash table used for record hitting.
    _hash_set = std::make_unique<IntersectHashSerializeSet>();
+    RETURN_IF_ERROR(_hash_set->init());

    ChunkPtr chunk = nullptr;
    RETURN_IF_ERROR(child(0)->open(state));
@ -93,7 +95,7 @@ Status IntersectNode::open(RuntimeState* state) {
    RETURN_IF_ERROR(child(0)->get_next(state, &chunk, &eos));
    if (!eos) {
        ScopedTimer<MonotonicStopWatch> build_timer(_build_set_timer);
-        RETURN_IF_ERROR(_hash_set->build_set(state, chunk, _child_expr_lists[0], _build_pool.get()));
+        TRY_CATCH_BAD_ALLOC(_hash_set->build_set(state, chunk, _child_expr_lists[0], _build_pool.get()));
        while (true) {
            RETURN_IF_ERROR(state->check_mem_limit("IntersectNode"));
            RETURN_IF_CANCELLED(state);
@ -107,7 +109,7 @@ Status IntersectNode::open(RuntimeState* state) {
            if (chunk->num_rows() == 0) {
                continue;
            }
-            RETURN_IF_ERROR(_hash_set->build_set(state, chunk, _child_expr_lists[0], _build_pool.get()));
+            TRY_CATCH_BAD_ALLOC(_hash_set->build_set(state, chunk, _child_expr_lists[0], _build_pool.get()));
        }
    }

@ -143,6 +145,7 @@ Status IntersectNode::open(RuntimeState* state) {
    }

    _hash_set_iterator = _hash_set->begin();
+    _mem_tracker->set(_hash_set->mem_usage());
    return Status::OK();
 }

@ -220,6 +223,10 @@ Status IntersectNode::close(RuntimeState* state) {
        _build_pool->free_all();
    }

+    if (_hash_set != nullptr) {
+        _hash_set.reset();
+    }
+
    return ExecNode::close(state);
 }

--- a/be/src/exec/vectorized/join_hash_map.cpp
+++ b/be/src/exec/vectorized/join_hash_map.cpp
@ -47,9 +47,7 @@ Status SerializedJoinBuildFunc::construct_hash_table(JoinHashTableItems* table_i
        serialize_size += data_column->serialize_size();
    }
    uint8_t* ptr = table_items->build_pool->allocate(serialize_size);
-    if (UNLIKELY(ptr == nullptr)) {
-        return Status::InternalError("Mem usage has exceed the limit of BE");
-    }
+    RETURN_IF_UNLIKELY_NULL(ptr, Status::MemoryAllocFailed("alloc mem for hash join build failed"));

    // serialize and build hash table
    uint32_t quo = row_count / config::vector_chunk_size;
@ -143,9 +141,7 @@ Status SerializedJoinProbeFunc::lookup_init(const JoinHashTableItems& table_item
        serialize_size += data_column->serialize_size();
    }
    uint8_t* ptr = table_items.probe_pool->allocate(serialize_size);
-    if (UNLIKELY(ptr == nullptr)) {
-        return Status::InternalError("Mem usage has exceed the limit of BE");
-    }
+    RETURN_IF_UNLIKELY_NULL(ptr, Status::MemoryAllocFailed("alloc mem for hash join probe failed"));

    // serialize and init search
    if (!null_columns.empty()) {
@ -207,51 +203,55 @@ void SerializedJoinProbeFunc::_probe_nullable_column(const JoinHashTableItems& t
 JoinHashTable::~JoinHashTable() {}

 void JoinHashTable::close() {
-    _table_items.build_pool.reset();
-    _table_items.probe_pool.reset();
+    _table_items.reset();
+    _probe_state.reset();
 }

 void JoinHashTable::create(const HashTableParam& param) {
-    _table_items.row_count = 0;
-    _table_items.bucket_size = 0;
-    _table_items.build_chunk = std::make_shared<Chunk>();
-    _table_items.build_pool = std::make_unique<MemPool>();
-    _table_items.probe_pool = std::make_unique<MemPool>();
-    _table_items.with_other_conjunct = param.with_other_conjunct;
-    _table_items.join_type = param.join_type;
-    _table_items.row_desc = param.row_desc;
-    if (_table_items.join_type == TJoinOp::RIGHT_SEMI_JOIN || _table_items.join_type == TJoinOp::RIGHT_ANTI_JOIN ||
-        _table_items.join_type == TJoinOp::RIGHT_OUTER_JOIN) {
-        _table_items.left_to_nullable = true;
-    } else if (_table_items.join_type == TJoinOp::LEFT_SEMI_JOIN || _table_items.join_type == TJoinOp::LEFT_ANTI_JOIN ||
-               _table_items.join_type == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
-               _table_items.join_type == TJoinOp::LEFT_OUTER_JOIN) {
-        _table_items.right_to_nullable = true;
-    } else if (_table_items.join_type == TJoinOp::FULL_OUTER_JOIN) {
-        _table_items.left_to_nullable = true;
-        _table_items.right_to_nullable = true;
+    _table_items = std::make_unique<JoinHashTableItems>();
+    _probe_state = std::make_unique<HashTableProbeState>();
+
+    _table_items->row_count = 0;
+    _table_items->bucket_size = 0;
+    _table_items->build_chunk = std::make_shared<Chunk>();
+    _table_items->build_pool = std::make_unique<MemPool>();
+    _table_items->probe_pool = std::make_unique<MemPool>();
+    _table_items->with_other_conjunct = param.with_other_conjunct;
+    _table_items->join_type = param.join_type;
+    _table_items->row_desc = param.row_desc;
+    if (_table_items->join_type == TJoinOp::RIGHT_SEMI_JOIN || _table_items->join_type == TJoinOp::RIGHT_ANTI_JOIN ||
+        _table_items->join_type == TJoinOp::RIGHT_OUTER_JOIN) {
+        _table_items->left_to_nullable = true;
+    } else if (_table_items->join_type == TJoinOp::LEFT_SEMI_JOIN ||
+               _table_items->join_type == TJoinOp::LEFT_ANTI_JOIN ||
+               _table_items->join_type == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN ||
+               _table_items->join_type == TJoinOp::LEFT_OUTER_JOIN) {
+        _table_items->right_to_nullable = true;
+    } else if (_table_items->join_type == TJoinOp::FULL_OUTER_JOIN) {
+        _table_items->left_to_nullable = true;
+        _table_items->right_to_nullable = true;
    }
-    _table_items.search_ht_timer = param.search_ht_timer;
-    _table_items.output_build_column_timer = param.output_build_column_timer;
-    _table_items.output_probe_column_timer = param.output_probe_column_timer;
-    _table_items.output_tuple_column_timer = param.output_tuple_column_timer;
-    _table_items.join_keys = param.join_keys;
+    _table_items->search_ht_timer = param.search_ht_timer;
+    _table_items->output_build_column_timer = param.output_build_column_timer;
+    _table_items->output_probe_column_timer = param.output_probe_column_timer;
+    _table_items->output_tuple_column_timer = param.output_tuple_column_timer;
+    _table_items->join_keys = param.join_keys;

    const auto& probe_desc = *param.probe_row_desc;
    for (const auto& tuple_desc : probe_desc.tuple_descriptors()) {
        for (const auto& slot : tuple_desc->slots()) {
-            _table_items.probe_slots.emplace_back(slot);
-            _table_items.probe_column_count++;
+            _table_items->probe_slots.emplace_back(slot);
+            _table_items->probe_column_count++;
        }
-        if (_table_items.row_desc->get_tuple_idx(tuple_desc->id()) != RowDescriptor::INVALID_IDX) {
-            _table_items.output_probe_tuple_ids.emplace_back(tuple_desc->id());
+        if (_table_items->row_desc->get_tuple_idx(tuple_desc->id()) != RowDescriptor::INVALID_IDX) {
+            _table_items->output_probe_tuple_ids.emplace_back(tuple_desc->id());
        }
    }

    const auto& build_desc = *param.build_row_desc;
    for (const auto& tuple_desc : build_desc.tuple_descriptors()) {
        for (const auto& slot : tuple_desc->slots()) {
-            _table_items.build_slots.emplace_back(slot);
+            _table_items->build_slots.emplace_back(slot);
            ColumnPtr column = ColumnHelper::create_column(slot->type(), slot->is_nullable());
            if (slot->is_nullable()) {
                auto* nullable_column = ColumnHelper::as_raw_column<NullableColumn>(column);
@ -259,35 +259,35 @@ void JoinHashTable::create(const HashTableParam& param) {
            } else {
                column->append_default();
            }
-            _table_items.build_chunk->append_column(std::move(column), slot->id());
-            _table_items.build_column_count++;
+            _table_items->build_chunk->append_column(std::move(column), slot->id());
+            _table_items->build_column_count++;
        }
-        if (_table_items.row_desc->get_tuple_idx(tuple_desc->id()) != RowDescriptor::INVALID_IDX) {
-            _table_items.output_build_tuple_ids.emplace_back(tuple_desc->id());
+        if (_table_items->row_desc->get_tuple_idx(tuple_desc->id()) != RowDescriptor::INVALID_IDX) {
+            _table_items->output_build_tuple_ids.emplace_back(tuple_desc->id());
        }
    }
 }

 Status JoinHashTable::build(RuntimeState* state) {
    _hash_map_type = _choose_join_hash_map();
-    _table_items.bucket_size = JoinHashMapHelper::calc_bucket_size(_table_items.row_count + 1);
-    _table_items.first.resize(_table_items.bucket_size, 0);
-    _table_items.next.resize(_table_items.row_count + 1, 0);
-    if (_table_items.join_type == TJoinOp::RIGHT_OUTER_JOIN || _table_items.join_type == TJoinOp::FULL_OUTER_JOIN ||
-        _table_items.join_type == TJoinOp::RIGHT_SEMI_JOIN || _table_items.join_type == TJoinOp::RIGHT_ANTI_JOIN) {
-        _probe_state.build_match_index.resize(_table_items.row_count + 1, 0);
-        _probe_state.build_match_index[0] = 1;
+    _table_items->bucket_size = JoinHashMapHelper::calc_bucket_size(_table_items->row_count + 1);
+    _table_items->first.resize(_table_items->bucket_size, 0);
+    _table_items->next.resize(_table_items->row_count + 1, 0);
+    if (_table_items->join_type == TJoinOp::RIGHT_OUTER_JOIN || _table_items->join_type == TJoinOp::FULL_OUTER_JOIN ||
+        _table_items->join_type == TJoinOp::RIGHT_SEMI_JOIN || _table_items->join_type == TJoinOp::RIGHT_ANTI_JOIN) {
+        _probe_state->build_match_index.resize(_table_items->row_count + 1, 0);
+        _probe_state->build_match_index[0] = 1;
    }

-    JoinHashMapHelper::prepare_map_index(&_probe_state);
+    JoinHashMapHelper::prepare_map_index(_probe_state.get());

    switch (_hash_map_type) {
    case JoinHashMapType::empty:
        break;
-#define M(NAME)                                                                                             \
-    case JoinHashMapType::NAME:                                                                             \
-        _##NAME = std::make_unique<typename decltype(_##NAME)::element_type>(&_table_items, &_probe_state); \
-        RETURN_IF_ERROR(_##NAME->build(state));                                                             \
+#define M(NAME)                                                                                                       \
+    case JoinHashMapType::NAME:                                                                                       \
+        _##NAME = std::make_unique<typename decltype(_##NAME)::element_type>(_table_items.get(), _probe_state.get()); \
+        RETURN_IF_ERROR(_##NAME->build(state));                                                                       \
        break;
        APPLY_FOR_JOIN_VARIANTS(M)
 #undef M
@ -331,11 +331,11 @@ Status JoinHashTable::probe_remain(ChunkPtr* chunk, bool* eos) {
 }

 Status JoinHashTable::append_chunk(RuntimeState* state, const ChunkPtr& chunk) {
-    Columns& columns = _table_items.build_chunk->columns();
+    Columns& columns = _table_items->build_chunk->columns();
    size_t chunk_memory_size = 0;

-    for (size_t i = 0; i < _table_items.build_column_count; i++) {
-        SlotDescriptor* slot = _table_items.build_slots[i];
+    for (size_t i = 0; i < _table_items->build_column_count; i++) {
+        SlotDescriptor* slot = _table_items->build_slots[i];
        ColumnPtr& column = chunk->get_column_by_slot_id(slot->id());
        chunk_memory_size += column->memory_usage();

@ -352,28 +352,28 @@ Status JoinHashTable::append_chunk(RuntimeState* state, const ChunkPtr& chunk) {

    const auto& tuple_id_map = chunk->get_tuple_id_to_index_map();
    for (auto iter = tuple_id_map.begin(); iter != tuple_id_map.end(); iter++) {
-        if (_table_items.row_desc->get_tuple_idx(iter->first) != RowDescriptor::INVALID_IDX) {
-            if (_table_items.build_chunk->is_tuple_exist(iter->first)) {
+        if (_table_items->row_desc->get_tuple_idx(iter->first) != RowDescriptor::INVALID_IDX) {
+            if (_table_items->build_chunk->is_tuple_exist(iter->first)) {
                ColumnPtr& src_column = chunk->get_tuple_column_by_id(iter->first);
-                ColumnPtr& dest_column = _table_items.build_chunk->get_tuple_column_by_id(iter->first);
+                ColumnPtr& dest_column = _table_items->build_chunk->get_tuple_column_by_id(iter->first);
                dest_column->append(*src_column, 0, src_column->size());
                chunk_memory_size += src_column->memory_usage();
            } else {
                ColumnPtr& src_column = chunk->get_tuple_column_by_id(iter->first);
-                ColumnPtr dest_column = BooleanColumn::create(_table_items.row_count + 1, 1);
+                ColumnPtr dest_column = BooleanColumn::create(_table_items->row_count + 1, 1);
                dest_column->append(*src_column, 0, src_column->size());
-                _table_items.build_chunk->append_tuple_column(dest_column, iter->first);
+                _table_items->build_chunk->append_tuple_column(dest_column, iter->first);
                chunk_memory_size += src_column->memory_usage();
            }
        }
    }

-    _table_items.row_count += chunk->num_rows();
+    _table_items->row_count += chunk->num_rows();
    return Status::OK();
 }

 void JoinHashTable::remove_duplicate_index(Column::Filter* filter) {
-    switch (_table_items.join_type) {
+    switch (_table_items->join_type) {
    case TJoinOp::LEFT_OUTER_JOIN:
        _remove_duplicate_index_for_left_outer_join(filter);
        break;
@ -402,17 +402,17 @@ void JoinHashTable::remove_duplicate_index(Column::Filter* filter) {
 }

 JoinHashMapType JoinHashTable::_choose_join_hash_map() {
-    size_t size = _table_items.join_keys.size();
+    size_t size = _table_items->join_keys.size();
    DCHECK_GT(size, 0);

-    for (size_t i = 0; i < _table_items.join_keys.size(); i++) {
-        if (!_table_items.key_columns[i]->has_null()) {
-            _table_items.join_keys[i].is_null_safe_equal = false;
+    for (size_t i = 0; i < _table_items->join_keys.size(); i++) {
+        if (!_table_items->key_columns[i]->has_null()) {
+            _table_items->join_keys[i].is_null_safe_equal = false;
        }
    }

-    if (size == 1 && !_table_items.join_keys[0].is_null_safe_equal) {
-        switch (_table_items.join_keys[0].type) {
+    if (size == 1 && !_table_items->join_keys[0].is_null_safe_equal) {
+        switch (_table_items->join_keys[0].type) {
        case PrimitiveType::TYPE_BOOLEAN:
            return JoinHashMapType::keyboolean;
        case PrimitiveType::TYPE_TINYINT:
@ -453,7 +453,7 @@ JoinHashMapType JoinHashTable::_choose_join_hash_map() {

    size_t total_size_in_byte = 0;

-    for (auto& join_key : _table_items.join_keys) {
+    for (auto& join_key : _table_items->join_keys) {
        if (join_key.is_null_safe_equal) {
            total_size_in_byte += 1;
        }
@ -509,12 +509,12 @@ void JoinHashTable::_remove_duplicate_index_for_left_outer_join(Column::Filter*
    size_t row_count = filter->size();

    for (size_t i = 0; i < row_count; i++) {
-        if (_probe_state.probe_match_index[_probe_state.probe_index[i]] == 0) {
+        if (_probe_state->probe_match_index[_probe_state->probe_index[i]] == 0) {
            (*filter)[i] = 1;
            continue;
        }

-        if (_probe_state.probe_match_index[_probe_state.probe_index[i]] == 1) {
+        if (_probe_state->probe_match_index[_probe_state->probe_index[i]] == 1) {
            if ((*filter)[i] == 0) {
                (*filter)[i] = 1;
            }
@ -522,7 +522,7 @@ void JoinHashTable::_remove_duplicate_index_for_left_outer_join(Column::Filter*
        }

        if ((*filter)[i] == 0) {
-            _probe_state.probe_match_index[_probe_state.probe_index[i]]--;
+            _probe_state->probe_match_index[_probe_state->probe_index[i]]--;
        }
    }
 }
@ -531,8 +531,8 @@ void JoinHashTable::_remove_duplicate_index_for_left_semi_join(Column::Filter* f
    size_t row_count = filter->size();
    for (size_t i = 0; i < row_count; i++) {
        if ((*filter)[i] == 1) {
-            if (_probe_state.probe_match_index[_probe_state.probe_index[i]] == 0) {
-                _probe_state.probe_match_index[_probe_state.probe_index[i]] = 1;
+            if (_probe_state->probe_match_index[_probe_state->probe_index[i]] == 0) {
+                _probe_state->probe_match_index[_probe_state->probe_index[i]] = 1;
            } else {
                (*filter)[i] = 0;
            }
@ -543,13 +543,13 @@ void JoinHashTable::_remove_duplicate_index_for_left_semi_join(Column::Filter* f
 void JoinHashTable::_remove_duplicate_index_for_left_anti_join(Column::Filter* filter) {
    size_t row_count = filter->size();
    for (size_t i = 0; i < row_count; i++) {
-        if (_probe_state.probe_match_index[_probe_state.probe_index[i]] == 0) {
+        if (_probe_state->probe_match_index[_probe_state->probe_index[i]] == 0) {
            (*filter)[i] = 1;
-        } else if (_probe_state.probe_match_index[_probe_state.probe_index[i]] == 1) {
-            _probe_state.probe_match_index[_probe_state.probe_index[i]]--;
+        } else if (_probe_state->probe_match_index[_probe_state->probe_index[i]] == 1) {
+            _probe_state->probe_match_index[_probe_state->probe_index[i]]--;
            (*filter)[i] = !(*filter)[i];
        } else if ((*filter)[i] == 0) {
-            _probe_state.probe_match_index[_probe_state.probe_index[i]]--;
+            _probe_state->probe_match_index[_probe_state->probe_index[i]]--;
        } else {
            (*filter)[i] = 0;
        }
@ -560,7 +560,7 @@ void JoinHashTable::_remove_duplicate_index_for_right_outer_join(Column::Filter*
    size_t row_count = filter->size();
    for (size_t i = 0; i < row_count; i++) {
        if ((*filter)[i] == 1) {
-            _probe_state.build_match_index[_probe_state.build_index[i]] = 1;
+            _probe_state->build_match_index[_probe_state->build_index[i]] = 1;
        }
    }
 }
@ -569,8 +569,8 @@ void JoinHashTable::_remove_duplicate_index_for_right_semi_join(Column::Filter*
    size_t row_count = filter->size();
    for (size_t i = 0; i < row_count; i++) {
        if ((*filter)[i] == 1) {
-            if (_probe_state.build_match_index[_probe_state.build_index[i]] == 0) {
-                _probe_state.build_match_index[_probe_state.build_index[i]] = 1;
+            if (_probe_state->build_match_index[_probe_state->build_index[i]] == 0) {
+                _probe_state->build_match_index[_probe_state->build_index[i]] = 1;
            } else {
                (*filter)[i] = 0;
            }
@ -582,7 +582,7 @@ void JoinHashTable::_remove_duplicate_index_for_right_anti_join(Column::Filter*
    size_t row_count = filter->size();
    for (size_t i = 0; i < row_count; i++) {
        if ((*filter)[i] == 1) {
-            _probe_state.build_match_index[_probe_state.build_index[i]] = 1;
+            _probe_state->build_match_index[_probe_state->build_index[i]] = 1;
        }
    }
 }
@ -590,24 +590,24 @@ void JoinHashTable::_remove_duplicate_index_for_right_anti_join(Column::Filter*
 void JoinHashTable::_remove_duplicate_index_for_full_outer_join(Column::Filter* filter) {
    size_t row_count = filter->size();
    for (size_t i = 0; i < row_count; i++) {
-        if (_probe_state.probe_match_index[_probe_state.probe_index[i]] == 0) {
+        if (_probe_state->probe_match_index[_probe_state->probe_index[i]] == 0) {
            (*filter)[i] = 1;
            continue;
        }

-        if (_probe_state.probe_match_index[_probe_state.probe_index[i]] == 1) {
+        if (_probe_state->probe_match_index[_probe_state->probe_index[i]] == 1) {
            if ((*filter)[i] == 0) {
                (*filter)[i] = 1;
            } else {
-                _probe_state.build_match_index[_probe_state.build_index[i]] = 1;
+                _probe_state->build_match_index[_probe_state->build_index[i]] = 1;
            }
            continue;
        }

        if ((*filter)[i] == 0) {
-            _probe_state.probe_match_index[_probe_state.probe_index[i]]--;
+            _probe_state->probe_match_index[_probe_state->probe_index[i]]--;
        } else {
-            _probe_state.build_match_index[_probe_state.build_index[i]] = 1;
+            _probe_state->build_match_index[_probe_state->build_index[i]] = 1;
        }
    }
 }
--- a/be/src/exec/vectorized/join_hash_map.h
+++ b/be/src/exec/vectorized/join_hash_map.h
@ -494,32 +494,32 @@ public:

    Status append_chunk(RuntimeState* state, const ChunkPtr& chunk);

-    const ChunkPtr& get_build_chunk() const { return _table_items.build_chunk; }
-    Columns& get_key_columns() { return _table_items.key_columns; }
-    uint32_t get_row_count() const { return _table_items.row_count; }
-    size_t get_probe_column_count() const { return _table_items.probe_column_count; }
-    size_t get_build_column_count() const { return _table_items.build_column_count; }
-    size_t get_bucket_size() const { return _table_items.bucket_size; }
+    const ChunkPtr& get_build_chunk() const { return _table_items->build_chunk; }
+    Columns& get_key_columns() { return _table_items->key_columns; }
+    uint32_t get_row_count() const { return _table_items->row_count; }
+    size_t get_probe_column_count() const { return _table_items->probe_column_count; }
+    size_t get_build_column_count() const { return _table_items->build_column_count; }
+    size_t get_bucket_size() const { return _table_items->bucket_size; }

    void remove_duplicate_index(Column::Filter* filter);

    int64_t mem_usage() {
        int64_t usage = 0;
-        if (_table_items.build_chunk != nullptr) {
-            usage += _table_items.build_chunk->memory_usage();
+        if (_table_items->build_chunk != nullptr) {
+            usage += _table_items->build_chunk->memory_usage();
        }
-        usage += _table_items.first.capacity() * sizeof(uint32_t);
-        usage += _table_items.next.capacity() * sizeof(uint32_t);
-        if (_table_items.build_pool != nullptr) {
-            usage += _table_items.build_pool->total_reserved_bytes();
+        usage += _table_items->first.capacity() * sizeof(uint32_t);
+        usage += _table_items->next.capacity() * sizeof(uint32_t);
+        if (_table_items->build_pool != nullptr) {
+            usage += _table_items->build_pool->total_reserved_bytes();
        }
-        if (_table_items.probe_pool != nullptr) {
-            usage += _table_items.probe_pool->total_reserved_bytes();
+        if (_table_items->probe_pool != nullptr) {
+            usage += _table_items->probe_pool->total_reserved_bytes();
        }
-        if (_table_items.build_key_column != nullptr) {
-            usage += _table_items.build_key_column->memory_usage();
+        if (_table_items->build_key_column != nullptr) {
+            usage += _table_items->build_key_column->memory_usage();
        }
-        usage += _table_items.build_slice.size() * sizeof(Slice);
+        usage += _table_items->build_slice.size() * sizeof(Slice);
        return usage;
    }

@ -557,8 +557,8 @@ private:

    JoinHashMapType _hash_map_type = JoinHashMapType::empty;

-    JoinHashTableItems _table_items;
-    HashTableProbeState _probe_state;
+    std::unique_ptr<JoinHashTableItems> _table_items;
+    std::unique_ptr<HashTableProbeState> _probe_state;
 };
 } // namespace starrocks::vectorized

--- a/be/src/exec/vectorized/json_scanner.cpp
+++ b/be/src/exec/vectorized/json_scanner.cpp
@ -26,6 +26,9 @@

 namespace starrocks::vectorized {

+static std::vector<Slice> literal_0_slice_vector{Slice("0")};
+static std::vector<Slice> literal_1_slice_vector{Slice("1")};
+
 JsonScanner::JsonScanner(RuntimeState* state, RuntimeProfile* profile, const TBrokerScanRange& scan_range,
                         ScannerCounter* counter)
        : FileScanner(state, profile, scan_range.params, counter),
@ -255,7 +258,7 @@ Status JsonReader::close() {
 *      ------------------
 *      value1     10
 *      value2     30
- *  
+ *
 * Case 2 : Json with JsonPath
 * {
 *   "RECORDS":[
@ -292,7 +295,12 @@ Status JsonReader::read_chunk(Chunk* chunk, int32_t rows_to_read, const std::vec
                    ColumnPtr& column = chunk->get_column_by_slot_id(slot_desc->id());
                    const char* column_name = slot_desc->col_name().c_str();
                    if (!objectValue->IsObject() || !objectValue->HasMember(column_name)) {
-                        column->append_nulls(1);
+                        if (strcmp(column_name, "__op") == 0) {
+                            // special treatment for __op column, fill default value '0' rather than null
+                            column->append_strings(literal_0_slice_vector);
+                        } else {
+                            column->append_nulls(1);
+                        }
                    } else {
                        _construct_column((*objectValue)[column_name], column.get(), slot_desc->type());
                    }
@ -398,11 +406,11 @@ void JsonReader::_construct_column(const rapidjson::Value& objectValue, Column*
        break;
    }
    case rapidjson::Type::kFalseType: {
-        column->append_strings(std::vector<Slice>{Slice("0")});
+        column->append_strings(literal_0_slice_vector);
        break;
    }
    case rapidjson::Type::kTrueType: {
-        column->append_strings(std::vector<Slice>{Slice("1")});
+        column->append_strings(literal_1_slice_vector);
        break;
    }
    case rapidjson::Type::kNumberType: {
--- a/be/src/exec/vectorized/olap_scan_node.cpp
+++ b/be/src/exec/vectorized/olap_scan_node.cpp
@ -65,9 +65,9 @@ Status OlapScanNode::open(RuntimeState* state) {
    OlapScanConjunctsManager::eval_const_conjuncts(_conjunct_ctxs, &status);
    _update_status(status);

-    _dict_optimize_parser.set_mutable_dict_maps(state->mutable_global_dict_map());
-    DictOptimizeParser::rewrite_descriptor(state, _tuple_desc->slots(), _conjunct_ctxs,
-                                           _olap_scan_node.dict_string_id_to_int_ids);
+    _dict_optimize_parser.set_mutable_dict_maps(state->mutable_query_global_dict_map());
+    DictOptimizeParser::rewrite_descriptor(state, _conjunct_ctxs, _olap_scan_node.dict_string_id_to_int_ids,
+                                           &(_tuple_desc->decoded_slots()));

    return Status::OK();
 }
@ -134,7 +134,7 @@ Status OlapScanNode::get_next(RuntimeState* state, ChunkPtr* chunk, bool* eos) {
        // is the first time of calling `get_next`, pass the second argument of `_fill_chunk_pool` as
        // true to ensure that the newly allocated column objects will be returned back into the column
        // pool.
-        _fill_chunk_pool(1, first_call);
+        TRY_CATCH_BAD_ALLOC(_fill_chunk_pool(1, first_call));
        *chunk = std::shared_ptr<Chunk>(ptr);
        eval_join_runtime_filters(chunk);
        _num_rows_returned += (*chunk)->num_rows();
@ -212,13 +212,17 @@ void OlapScanNode::_scanner_thread(TabletScanner* scanner) {
        tls_thread_status.set_mem_tracker(prev_tracker);
        _running_threads.fetch_sub(1, std::memory_order_release);
    });
-
    tls_thread_status.set_query_id(scanner->runtime_state()->query_id());

    Status status = scanner->open(_runtime_state);
    if (!status.ok()) {
        QUERY_LOG_IF(ERROR, !status.is_end_of_file()) << status;
        _update_status(status);
+    } else {
+        status = scanner->runtime_state()->check_mem_limit("olap scanner");
+        if (!status.ok()) {
+            _update_status(status);
+        }
    }
    scanner->set_keep_priority(false);
    // Because we use thread pool to scan data from storage. One scanner can't
@ -494,7 +498,7 @@ Status OlapScanNode::_start_scan_thread(RuntimeState* state) {
    int concurrency = std::min<int>(kMaxConcurrency, _num_scanners);
    int chunks = _chunks_per_scanner * concurrency;
    _chunk_pool.reserve(chunks);
-    _fill_chunk_pool(chunks, true);
+    TRY_CATCH_BAD_ALLOC(_fill_chunk_pool(chunks, true));
    std::lock_guard<std::mutex> l(_mtx);
    for (int i = 0; i < concurrency; i++) {
        CHECK(_submit_scanner(_pending_scanners.pop(), true));
--- a/be/src/exec/vectorized/olap_scan_prepare.cpp
+++ b/be/src/exec/vectorized/olap_scan_prepare.cpp
@ -485,7 +485,7 @@ Status OlapScanConjunctsManager::normalize_conjuncts() {

    // TODO(zhuming): if any of the normalized column range is empty, we can know that
    // no row will be selected anymore and can return EOF directly.
-    for (auto& slot : tuple_desc->slots()) {
+    for (auto& slot : tuple_desc->decoded_slots()) {
        const std::string& col_name = slot->col_name();
        PrimitiveType type = slot->type().type;
        switch (type) {
@ -651,7 +651,7 @@ Status OlapScanConjunctsManager::build_olap_filters() {
    for (auto iter : column_value_ranges) {
        std::vector<TCondition> filters;
        boost::apply_visitor([&](auto&& range) { range.to_olap_filter(filters); }, iter.second);
-        bool empty_range = boost::apply_visitor([](auto&& range) { return range.empty_range(); }, iter.second);
+        bool empty_range = boost::apply_visitor([](auto&& range) { return range.is_empty_value_range(); }, iter.second);
        if (empty_range) {
            return Status::EndOfFile("EOF, Filter by always false condition");
        }
@ -727,7 +727,7 @@ void OlapScanConjunctsManager::get_column_predicates(PredicateParser* parser, st
        preds->push_back(p);
    }

-    const auto& slots = tuple_desc->slots();
+    const auto& slots = tuple_desc->decoded_slots();
    for (auto& iter : slot_index_to_expr_ctxs) {
        int slot_index = iter.first;
        auto& expr_ctxs = iter.second;
@ -776,7 +776,7 @@ void OlapScanConjunctsManager::get_not_push_down_conjuncts(std::vector<ExprConte

 void OlapScanConjunctsManager::build_column_expr_predicates() {
    std::map<SlotId, int> slot_id_to_index;
-    const auto& slots = tuple_desc->slots();
+    const auto& slots = tuple_desc->decoded_slots();
    for (int i = 0; i < slots.size(); i++) {
        const SlotDescriptor* slot_desc = slots[i];
        SlotId slot_id = slot_desc->id();
@ -802,6 +802,9 @@ void OlapScanConjunctsManager::build_column_expr_predicates() {
        const SlotDescriptor* slot_desc = slots[index];
        PrimitiveType ptype = slot_desc->type().type;
        if (!is_scalar_primitive_type(ptype)) continue;
+        // disable on float/double type because min/max value may lose precision
+        // The fix should be on storage layer, and this is just a temporary fix.
+        if (ptype == PrimitiveType::TYPE_FLOAT || ptype == PrimitiveType::TYPE_DOUBLE) continue;
        {
            auto iter = slot_index_to_expr_ctxs.find(index);
            if (iter == slot_index_to_expr_ctxs.end()) {
--- a/be/src/exec/vectorized/orc_scanner.cpp
+++ b/be/src/exec/vectorized/orc_scanner.cpp
@ -166,7 +166,11 @@ ChunkPtr ORCScanner::_transfer_chunk(starrocks::vectorized::ChunkPtr& src) {
    if (range.__isset.num_of_columns_from_file) {
        for (int i = 0; i < range.columns_from_path.size(); ++i) {
            auto slot = _src_slot_descriptors[range.num_of_columns_from_file + i];
-            cast_chunk->append_column(src->get_column_by_slot_id(slot->id()), slot->id());
+            // This happens when there are extra fields in broker load specification
+            // but those extra fields don't match any fields in native table.
+            if (slot != nullptr) {
+                cast_chunk->append_column(src->get_column_by_slot_id(slot->id()), slot->id());
+            }
        }
    }
    return cast_chunk;
--- a/be/src/exec/vectorized/orc_scanner_adapter.cpp
+++ b/be/src/exec/vectorized/orc_scanner_adapter.cpp
@ -33,7 +33,7 @@ const static std::unordered_map<orc::TypeKind, PrimitiveType> g_orc_starrocks_ty
        {orc::DOUBLE, TYPE_DOUBLE},   {orc::DECIMAL, TYPE_DECIMALV2},
        {orc::DATE, TYPE_DATE},       {orc::TIMESTAMP, TYPE_DATETIME},
        {orc::STRING, TYPE_VARCHAR},  {orc::BINARY, TYPE_VARCHAR},
-        {orc::CHAR, TYPE_VARCHAR},    {orc::VARCHAR, TYPE_VARCHAR},
+        {orc::CHAR, TYPE_CHAR},       {orc::VARCHAR, TYPE_VARCHAR},
 };

 // NOLINTNEXTLINE
@ -53,6 +53,13 @@ const static std::set<PrimitiveType> g_starrocks_decimal_type = {TYPE_DECIMAL32,
 const static cctz::time_point<cctz::sys_seconds> CCTZ_UNIX_EPOCH =
        std::chrono::time_point_cast<cctz::sys_seconds>(std::chrono::system_clock::from_time_t(0));

+// Hive ORC char type will pad trailing spaces.
+// https://docs.cloudera.com/documentation/enterprise/6/6.3/topics/impala_char.html
+static inline size_t remove_trailing_spaces(const char* s, size_t size) {
+    while (size > 0 && s[size - 1] == ' ') size--;
+    return size;
+}
+
 static void fill_boolean_column(orc::ColumnVectorBatch* cvb, ColumnPtr& col, int from, int size,
                                const TypeDescriptor& type_desc, void* ctx) {
    auto* data = down_cast<orc::LongVectorBatch*>(cvb);
@ -542,9 +549,19 @@ static void fill_string_column(orc::ColumnVectorBatch* cvb, ColumnPtr& col, int
    auto& vb = values->get_bytes();
    auto& vo = values->get_offset();
    int pos = from;
-    for (int i = col_start; i < col_start + size; ++i, ++pos) {
-        vb.insert(vb.end(), data->data[pos], data->data[pos] + data->length[pos]);
-        vo.emplace_back(vb.size());
+
+    if (type_desc.type == TYPE_CHAR) {
+        // Possibly there are some zero padding characters in value, we have to strip them off.
+        for (int i = col_start; i < col_start + size; ++i, ++pos) {
+            size_t str_size = remove_trailing_spaces(data->data[pos], data->length[pos]);
+            vb.insert(vb.end(), data->data[pos], data->data[pos] + str_size);
+            vo.emplace_back(vb.size());
+        }
+    } else {
+        for (int i = col_start; i < col_start + size; ++i, ++pos) {
+            vb.insert(vb.end(), data->data[pos], data->data[pos] + data->length[pos]);
+            vo.emplace_back(vb.size());
+        }
    }

    // col_start == 0 and from == 0 means it's at top level of fill chunk, not in the middle of array
@ -594,26 +611,50 @@ static void fill_string_column_with_null(orc::ColumnVectorBatch* cvb, ColumnPtr&

    auto& vb = values->get_bytes();
    auto& vo = values->get_offset();
+
+    int pos = from;
    if (cvb->hasNulls) {
-        for (int i = col_start; i < col_start + size; ++i, ++from) {
-            nulls[i] = !cvb->notNull[from];
-            if (cvb->notNull[from]) {
-                vb.insert(vb.end(), data->data[from], data->data[from] + data->length[from]);
-                vo.emplace_back(vb.size());
-            } else {
-                vo.emplace_back(vb.size());
+        if (type_desc.type == TYPE_CHAR) {
+            // Possibly there are some zero padding characters in value, we have to strip them off.
+            for (int i = col_start; i < col_start + size; ++i, ++pos) {
+                nulls[i] = !cvb->notNull[pos];
+                if (cvb->notNull[pos]) {
+                    size_t str_size = remove_trailing_spaces(data->data[pos], data->length[pos]);
+                    vb.insert(vb.end(), data->data[pos], data->data[pos] + str_size);
+                    vo.emplace_back(vb.size());
+                } else {
+                    vo.emplace_back(vb.size());
+                }
+            }
+        } else {
+            for (int i = col_start; i < col_start + size; ++i, ++pos) {
+                nulls[i] = !cvb->notNull[pos];
+                if (cvb->notNull[pos]) {
+                    vb.insert(vb.end(), data->data[pos], data->data[pos] + data->length[pos]);
+                    vo.emplace_back(vb.size());
+                } else {
+                    vo.emplace_back(vb.size());
+                }
            }
        }
    } else {
-        for (int i = col_start; i < col_start + size; ++i, ++from) {
-            vb.insert(vb.end(), data->data[from], data->data[from] + data->length[from]);
-            vo.emplace_back(vb.size());
+        if (type_desc.type == TYPE_CHAR) {
+            // Possibly there are some zero padding characters in value, we have to strip them off.
+            for (int i = col_start; i < col_start + size; ++i, ++pos) {
+                size_t str_size = remove_trailing_spaces(data->data[pos], data->length[pos]);
+                vb.insert(vb.end(), data->data[pos], data->data[pos] + str_size);
+                vo.emplace_back(vb.size());
+            }
+        } else {
+            for (int i = col_start; i < col_start + size; ++i, ++pos) {
+                vb.insert(vb.end(), data->data[pos], data->data[pos] + data->length[pos]);
+                vo.emplace_back(vb.size());
+            }
        }
    }

    // col_start == 0 and from == 0 means it's at top level of fill chunk, not in the middle of array
    // otherwise `broker_load_filter` does not work.
-    from -= size; // move back
    if (adapter->get_broker_load_mode() && from == 0 && col_start == 0) {
        auto* filter = adapter->get_broker_load_fiter()->data();
        auto strict_mode = adapter->get_strict_mode();
@ -1443,7 +1484,13 @@ bool OrcScannerAdapter::_ok_to_add_conjunct(const Expr* conjunct) {
        ColumnRef* ref = down_cast<ColumnRef*>(c);
        SlotId slot_id = ref->slot_id();
        // slot can not be found.
-        if (_slot_id_to_desc.find(slot_id) == _slot_id_to_desc.end()) {
+        auto iter = _slot_id_to_desc.find(slot_id);
+        if (iter == _slot_id_to_desc.end()) {
+            return false;
+        }
+        SlotDescriptor* slot_desc = iter->second;
+        // It's unsafe to do eval on char type because of padding problems.
+        if (slot_desc->type().type == TYPE_CHAR) {
            return false;
        }

@ -1840,8 +1887,16 @@ static Status decode_string_min_max(PrimitiveType ptype, const orc::proto::Colum
    if (colStats.has_stringstatistics() && colStats.stringstatistics().has_minimum() &&
        colStats.stringstatistics().has_maximum()) {
        const auto& stats = colStats.stringstatistics();
-        const Slice& min = Slice(stats.minimum());
-        const Slice& max = Slice(stats.maximum());
+        const std::string& min_value = stats.minimum();
+        const std::string& max_value = stats.maximum();
+        size_t min_value_size = min_value.size();
+        size_t max_value_size = max_value.size();
+        if (ptype == TYPE_CHAR) {
+            min_value_size = remove_trailing_spaces(min_value.c_str(), min_value_size);
+            max_value_size = remove_trailing_spaces(max_value.c_str(), max_value_size);
+        }
+        const Slice min(min_value.c_str(), min_value_size);
+        const Slice max(max_value.c_str(), max_value_size);
        switch (ptype) {
        case PrimitiveType::TYPE_VARCHAR:
            DOWN_CAST_ASSIGN_MIN_MAX(PrimitiveType::TYPE_VARCHAR);
@ -2006,7 +2061,7 @@ void OrcScannerAdapter::report_error_message(const std::string& reason, const st
    _state->append_error_msg_to_file(error_msg, reason);
 }

-int OrcScannerAdapter::get_column_id_by_name(const std::string& name) {
+int OrcScannerAdapter::get_column_id_by_name(const std::string& name) const {
    const auto& it = _name_to_column_id.find(name);
    if (it != _name_to_column_id.end()) {
        return it->second;
--- a/be/src/exec/vectorized/orc_scanner_adapter.h
+++ b/be/src/exec/vectorized/orc_scanner_adapter.h
@ -91,7 +91,7 @@ public:
    const SlotDescriptor* get_current_slot() const { return _current_slot; }
    void set_current_file_name(const std::string& name) { _current_file_name = name; }
    void report_error_message(const std::string& reason, const std::string& raw_data);
-    int get_column_id_by_name(const std::string& name);
+    int get_column_id_by_name(const std::string& name) const;

 private:
    bool _ok_to_add_conjunct(const Expr* conjunct);
--- a/be/src/exec/vectorized/project_node.cpp
+++ b/be/src/exec/vectorized/project_node.cpp
@ -91,7 +91,7 @@ Status ProjectNode::open(RuntimeState* state) {
    RETURN_IF_ERROR(Expr::open(_expr_ctxs, state));
    RETURN_IF_ERROR(Expr::open(_common_sub_expr_ctxs, state));

-    GlobalDictMaps* mdict_maps = state->mutable_global_dict_map();
+    GlobalDictMaps* mdict_maps = state->mutable_query_global_dict_map();
    _dict_optimize_parser.set_mutable_dict_maps(mdict_maps);

    auto init_dict_optimize = [&](std::vector<ExprContext*>& expr_ctxs, std::vector<SlotId>& target_slots) {
--- a/be/src/exec/vectorized/tablet_scanner.cpp
+++ b/be/src/exec/vectorized/tablet_scanner.cpp
@ -71,6 +71,8 @@ Status TabletScanner::open([[maybe_unused]] RuntimeState* runtime_state) {
                                           _tablet->full_name(), st.to_string());
            st = Status::InternalError(msg);
            LOG(WARNING) << st;
+        } else {
+            RETURN_IF_ERROR(runtime_state->check_mem_limit("olap scanner open"));
        }
        return st;
    }
@ -194,7 +196,7 @@ Status TabletScanner::_init_return_columns() {

 // mapping a slot-column-id to schema-columnid
 Status TabletScanner::_init_global_dicts() {
-    const auto& global_dict_map = _runtime_state->get_global_dict_map();
+    const auto& global_dict_map = _runtime_state->get_query_global_dict_map();
    auto global_dict = _parent->_obj_pool.add(new ColumnIdToGlobalDictMap());
    // mapping column id to storage column ids
    for (auto slot : _parent->_tuple_desc->slots()) {
--- a/be/src/exec/vectorized/topn_node.cpp
+++ b/be/src/exec/vectorized/topn_node.cpp
@ -13,6 +13,7 @@
 #include "exec/vectorized/chunks_sorter_full_sort.h"
 #include "exec/vectorized/chunks_sorter_topn.h"
 #include "gutil/casts.h"
+#include "runtime/current_thread.h"

 namespace starrocks::vectorized {

@ -147,7 +148,6 @@ Status TopNNode::_consume_chunks(RuntimeState* state, ExecNode* child) {
    bool eos = false;
    _chunks_sorter->setup_runtime(runtime_profile(), "ChunksSorter");
    do {
-        RETURN_IF_ERROR(state->check_mem_limit("Sort"));
        RETURN_IF_CANCELLED(state);
        ChunkPtr chunk;
        timer.stop();
@ -158,10 +158,11 @@ Status TopNNode::_consume_chunks(RuntimeState* state, ExecNode* child) {
        timer.start();
        if (chunk != nullptr && chunk->num_rows() > 0) {
            ChunkPtr materialize_chunk = _materialize_chunk_before_sort(chunk.get());
-            RETURN_IF_ERROR(_chunks_sorter->update(state, materialize_chunk));
+            TRY_CATCH_BAD_ALLOC(RETURN_IF_ERROR(_chunks_sorter->update(state, materialize_chunk)));
        }
    } while (!eos);
-    RETURN_IF_ERROR(_chunks_sorter->done(state));
+
+    TRY_CATCH_BAD_ALLOC(RETURN_IF_ERROR(_chunks_sorter->done(state)));
    return Status::OK();
 }

--- a/be/src/exprs/agg/distinct.h
+++ b/be/src/exprs/agg/distinct.h
@ -103,6 +103,7 @@ struct DistinctAggregateState<PT, BinaryPTGuard<PT>> {
        KeyType key(raw_key);
        set.template lazy_emplace(key, [&](const auto& ctor) {
            uint8_t* pos = mem_pool->allocate(key.size);
+            assert(pos != nullptr);
            memcpy(pos, key.data, key.size);
            ctor(pos, key.size, key.hash);
            ret = phmap::item_serialize_size<SliceHashSet>::value;
@ -115,6 +116,7 @@ struct DistinctAggregateState<PT, BinaryPTGuard<PT>> {
        KeyType key(reinterpret_cast<uint8_t*>(raw_key.data), raw_key.size, hash);
        set.template lazy_emplace_with_hash(key, hash, [&](const auto& ctor) {
            uint8_t* pos = mem_pool->allocate(key.size);
+            assert(pos != nullptr);
            memcpy(pos, key.data, key.size);
            ctor(pos, key.size, key.hash);
            ret = phmap::item_serialize_size<SliceHashSet>::value;
@ -156,6 +158,7 @@ struct DistinctAggregateState<PT, BinaryPTGuard<PT>> {
            // we only memcpy when the key is new
            set.template lazy_emplace(key, [&](const auto& ctor) {
                uint8_t* pos = mem_pool->allocate(key.size);
+                assert(pos != nullptr);
                memcpy(pos, key.data, key.size);
                ctor(pos, key.size, key.hash);
                mem_usage += phmap::item_serialize_size<SliceHashSet>::value;
--- a/be/src/exprs/vectorized/arithmetic_operation.h
+++ b/be/src/exprs/vectorized/arithmetic_operation.h
@ -61,8 +61,20 @@ struct ArithmeticBinaryOperator {
        } else if constexpr (is_sub_op<Op>) {
            return l - r;
        } else if constexpr (is_mul_op<Op>) {
+            // avoid 0 mul a negative num, make result -0
+            if constexpr (std::is_floating_point<LType>::value) {
+                if (UNLIKELY(l == LType(0) || r == RType(0))) {
+                    return LType(0);
+                }
+            }
            return l * r;
        } else if constexpr (is_div_op<Op>) {
+            // avoid 0 div a negative num, make result -0
+            if constexpr (std::is_floating_point<LType>::value) {
+                if (UNLIKELY(l == 0)) {
+                    return 0;
+                }
+            }
            if constexpr (may_cause_fpe<ResultType>) {
                if (UNLIKELY(check_fpe_of_min_div_by_minus_one(l, r))) {
                    return signed_minimum<ResultType>;
@ -101,6 +113,12 @@ struct ArithmeticBinaryOperator<Op, TYPE_DECIMALV2, DivModOpGuard<Op>, guard::Gu
    template <typename LType, typename RType, typename ResultType>
    static inline ReturnType<TYPE_DECIMALV2, ResultType> apply(const LType& l, const RType& r) {
        if constexpr (is_div_op<Op>) {
+            // avoid 0 div a negative num, make result -0
+            if constexpr (std::is_floating_point<LType>::value) {
+                if (UNLIKELY(l == DecimalV2Value::ZERO)) {
+                    return DecimalV2Value::ZERO;
+                }
+            }
            return (r == DecimalV2Value::ZERO) ? l : (l / r);
        } else if constexpr (is_mod_op<Op>) {
            return (r == DecimalV2Value::ZERO) ? l : (l % r);
--- a/be/src/exprs/vectorized/case_expr.cpp
+++ b/be/src/exprs/vectorized/case_expr.cpp
@ -250,9 +250,9 @@ private:
        size_t size = when_columns[0]->size();
        builder.reserve(size);

-        bool columns_has_null = false;
+        bool when_columns_has_null = false;
        for (ColumnPtr& column : when_columns) {
-            columns_has_null |= column->has_null();
+            when_columns_has_null |= column->has_null();
        }

        // max case size in use SIMD CASE WHEN implements
@ -260,8 +260,13 @@ private:

        // optimization for no-nullable Arithmetic Type
        if constexpr (isArithmeticPT<ResultType>) {
+            bool then_columns_has_null = false;
+            for (const auto& column : then_columns) {
+                then_columns_has_null |= column->has_null();
+            }
+
            bool check_could_use_multi_simd_selector =
-                    !columns_has_null && when_columns.size() <= max_simd_case_when_size && _has_else_expr;
+                    !when_columns_has_null && when_columns.size() <= max_simd_case_when_size && !then_columns_has_null;

            if (check_could_use_multi_simd_selector) {
                int then_column_size = then_columns.size();
@ -292,6 +297,12 @@ private:
                }

                auto res = RunTimeColumnType<ResultType>::create();
+
+                if constexpr (pt_is_decimal<ResultType>) {
+                    res->set_scale(this->type().scale);
+                    res->set_precision(this->type().precision);
+                }
+
                auto& container = res->get_data();
                container.resize(size);
                SIMD_muti_selector<ResultType>::multi_select_if(select_vec, when_column_size, container, select_list,
@ -301,7 +312,7 @@ private:
        }

        size_t view_size = when_viewers.size();
-        if (!columns_has_null) {
+        if (!when_columns_has_null) {
            for (int row = 0; row < size; ++row) {
                int i = 0;
                while (i < view_size && !(when_viewers[i].value(row))) {
--- a/be/src/exprs/vectorized/condition_expr.cpp
+++ b/be/src/exprs/vectorized/condition_expr.cpp
@ -298,6 +298,7 @@ public:
    CASE_TYPE(TYPE_DOUBLE, CLASS);     \
    CASE_TYPE(TYPE_CHAR, CLASS);       \
    CASE_TYPE(TYPE_VARCHAR, CLASS);    \
+    CASE_TYPE(TYPE_TIME, CLASS);       \
    CASE_TYPE(TYPE_DATE, CLASS);       \
    CASE_TYPE(TYPE_DATETIME, CLASS);   \
    CASE_TYPE(TYPE_DECIMALV2, CLASS);  \
--- a/be/src/exprs/vectorized/in_const_predicate.hpp
+++ b/be/src/exprs/vectorized/in_const_predicate.hpp
@ -195,7 +195,7 @@ public:

    ColumnPtr evaluate(ExprContext* context, vectorized::Chunk* ptr) override {
        ColumnPtr lhs = _children[0]->evaluate(context, ptr);
-        if (ColumnHelper::count_nulls(lhs) == lhs->size()) {
+        if (!_eq_null && ColumnHelper::count_nulls(lhs) == lhs->size()) {
            return ColumnHelper::create_const_null_column(lhs->size());
        }

--- a/be/src/exprs/vectorized/time_functions.cpp
+++ b/be/src/exprs/vectorized/time_functions.cpp
@ -2,6 +2,7 @@

 #include "exprs/vectorized/time_functions.h"

+#include "column/column_helper.h"
 #include "exprs/vectorized/binary_function.h"
 #include "exprs/vectorized/unary_function.h"
 #include "runtime/runtime_state.h"
@ -1125,6 +1126,15 @@ Status TimeFunctions::str_to_date_close(starrocks_udf::FunctionContext* context,
    return Status::OK();
 }

+DEFINE_UNARY_FN_WITH_IMPL(TimestampToDate, value) {
+    return DateValue{timestamp::to_julian(value._timestamp)};
+}
+
+ColumnPtr TimeFunctions::str2date(FunctionContext* context, const Columns& columns) {
+    ColumnPtr datetime = str_to_date(context, columns);
+    return VectorizedStrictUnaryFunction<TimestampToDate>::evaluate<TYPE_DATETIME, TYPE_DATE>(datetime);
+}
+
 Status TimeFunctions::format_prepare(starrocks_udf::FunctionContext* context,
                                     starrocks_udf::FunctionContext::FunctionStateScope scope) {
    if (scope != FunctionContext::FRAGMENT_LOCAL) {
@ -1192,9 +1202,11 @@ ColumnPtr date_format_func(const Columns& cols, size_t patten_size) {
    ColumnBuilder<TYPE_VARCHAR> builder;
    ColumnViewer<Type> viewer(cols[0]);

-    builder.data_column()->reserve(viewer.size(), viewer.size() * patten_size);
+    size_t num_rows = viewer.size();

-    for (int i = 0; i < viewer.size(); ++i) {
+    builder.data_column()->reserve(num_rows, num_rows * patten_size);
+
+    for (int i = 0; i < num_rows; ++i) {
        if (viewer.is_null(i)) {
            builder.append_null();
            continue;
@ -1203,7 +1215,7 @@ ColumnPtr date_format_func(const Columns& cols, size_t patten_size) {
        builder.append(OP::template apply<RunTimeCppType<Type>, RunTimeCppType<TYPE_VARCHAR>>(viewer.value(i)));
    }

-    return builder.build(cols[0]->is_constant());
+    return builder.build(ColumnHelper::is_all_const(cols));
 }

 std::string format_for_yyyyMMdd(const DateValue& date_value) {
@ -1393,13 +1405,18 @@ ColumnPtr TimeFunctions::datetime_format(FunctionContext* context, const Columns
    if (fc != nullptr && fc->is_valid) {
        return do_format<TYPE_DATETIME>(fc, columns);
    } else {
+        bool all_const = ColumnHelper::is_all_const(columns);
        ColumnBuilder<TYPE_VARCHAR> builder;
        ColumnViewer<TYPE_DATETIME> viewer_date(columns[0]);
        ColumnViewer<TYPE_VARCHAR> viewer_format(columns[1]);

+        // all_const was true viewer_date.size() will return 1
+        // which could reduce unnecessary calculations
+        size_t num_rows = all_const ? viewer_date.size() : columns[0]->size();
+
        builder.reserve(columns[0]->size());

-        for (int i = 0; i < viewer_date.size(); ++i) {
+        for (int i = 0; i < num_rows; ++i) {
            if (viewer_date.is_null(i)) {
                builder.append_null();
                continue;
@ -1408,7 +1425,7 @@ ColumnPtr TimeFunctions::datetime_format(FunctionContext* context, const Columns
            common_format_process(&viewer_date, &viewer_format, &builder, i);
        }

-        return builder.build(columns[0]->is_constant());
+        return builder.build(all_const);
    }
 }

@ -1421,13 +1438,14 @@ ColumnPtr TimeFunctions::date_format(FunctionContext* context, const Columns& co
    if (fc != nullptr && fc->is_valid) {
        return do_format<TYPE_DATE>(fc, columns);
    } else {
+        int num_rows = columns[0]->size();
        ColumnBuilder<TYPE_VARCHAR> builder;
        ColumnViewer<TYPE_DATE> viewer_date(columns[0]);
        ColumnViewer<TYPE_VARCHAR> viewer_format(columns[1]);

        builder.reserve(columns[0]->size());

-        for (int i = 0; i < viewer_date.size(); ++i) {
+        for (int i = 0; i < num_rows; ++i) {
            if (viewer_date.is_null(i)) {
                builder.append_null();
                continue;
@ -1436,7 +1454,7 @@ ColumnPtr TimeFunctions::date_format(FunctionContext* context, const Columns& co
            common_format_process(&viewer_date, &viewer_format, &builder, i);
        }

-        return builder.build(columns[0]->is_constant());
+        return builder.build(ColumnHelper::is_all_const(columns));
    }
 }

--- a/be/src/exprs/vectorized/time_functions.h
+++ b/be/src/exprs/vectorized/time_functions.h
@ -364,12 +364,20 @@ public:
    static ColumnPtr str_to_date_uncommon(FunctionContext* context, const starrocks::vectorized::Columns& columns);
    /**
     *
+     * cast string to datetime
     * @param context
     * @param columns [BinaryColumn of TYPE_VARCHAR, BinaryColumn of TYPE_VARCHAR]  The first column holds the datetime string, the second column holds the format.
     * @return  TimestampColumn
     */
    DEFINE_VECTORIZED_FN(str_to_date);

+    /** 
+     * 
+     * cast string to date, the function will call by FE getStrToDateFunction, and is invisible to user
+     * 
+     */
+    DEFINE_VECTORIZED_FN(str2date);
+
    static bool is_date_format(const Slice& slice, char** start);
    static bool is_datetime_format(const Slice& slice, char** start);

--- a/be/src/http/action/checksum_action.cpp
+++ b/be/src/http/action/checksum_action.cpp
@ -119,10 +119,16 @@ void ChecksumAction::handle(HttpRequest* req) {

 int64_t ChecksumAction::do_checksum(int64_t tablet_id, int64_t version, int64_t version_hash, int32_t schema_hash,
                                    HttpRequest* req) {
+    MemTracker* mem_tracker = ExecEnv::GetInstance()->consistency_mem_tracker();
+    Status check_limit_st = mem_tracker->check_mem_limit("Start consistency check.");
+    if (!check_limit_st.ok()) {
+        LOG(WARNING) << "checksum failed: " << check_limit_st.message();
+        return -1L;
+    }
+
    OLAPStatus res = OLAP_SUCCESS;
    uint32_t checksum;
-    EngineChecksumTask engine_task(ExecEnv::GetInstance()->consistency_mem_tracker(), tablet_id, schema_hash, version,
-                                   version_hash, &checksum);
+    EngineChecksumTask engine_task(mem_tracker, tablet_id, schema_hash, version, version_hash, &checksum);
    res = engine_task.execute();
    if (res != OLAP_SUCCESS) {
        LOG(WARNING) << "checksum failed. status: " << res << ", signature: " << tablet_id;
--- a/be/src/http/action/meta_action.cpp
+++ b/be/src/http/action/meta_action.cpp
@ -59,7 +59,7 @@ Status MetaAction::_handle_header(HttpRequest* req, std::string* json_meta) {
        LOG(WARNING) << "no tablet for tablet_id:" << tablet_id << " schema hash:" << schema_hash;
        return Status::InternalError("no tablet exist");
    }
-    TabletMetaSharedPtr tablet_meta(new TabletMeta());
+    auto tablet_meta = TabletMeta::create(StorageEngine::instance()->tablet_meta_mem_tracker());
    tablet->generate_tablet_meta_copy(tablet_meta);
    json2pb::Pb2JsonOptions json_options;
    json_options.pretty_json = true;
--- a/be/src/http/action/stream_load.cpp
+++ b/be/src/http/action/stream_load.cpp
@ -44,6 +44,7 @@
 #include "http/http_response.h"
 #include "http/utils.h"
 #include "runtime/client_cache.h"
+#include "runtime/current_thread.h"
 #include "runtime/exec_env.h"
 #include "runtime/fragment_mgr.h"
 #include "runtime/load_path_mgr.h"
@ -54,6 +55,7 @@
 #include "runtime/stream_load/stream_load_pipe.h"
 #include "util/byte_buffer.h"
 #include "util/debug_util.h"
+#include "util/defer_op.h"
 #include "util/json_util.h"
 #include "util/metrics.h"
 #include "util/starrocks_metrics.h"
@ -289,13 +291,21 @@ void StreamLoadAction::on_chunk_data(HttpRequest* req) {
        return;
    }

+    SCOPED_THREAD_LOCAL_MEM_TRACKER_SETTER(ctx->instance_mem_tracker.get());
+
    struct evhttp_request* ev_req = req->get_evhttp_request();
    auto evbuf = evhttp_request_get_input_buffer(ev_req);

    int64_t start_read_data_time = MonotonicNanos();
    while (evbuffer_get_length(evbuf) > 0) {
-        auto bb = ByteBuffer::allocate(4096);
-        auto remove_bytes = evbuffer_remove(evbuf, bb->ptr, bb->capacity);
+        ByteBufferPtr bb = ByteBuffer::allocate(4096);
+        int remove_bytes;
+        {
+            // The memory is applied for in http server thread,
+            // so the release of this memory must be recorded in ProcessMemTracker
+            SCOPED_THREAD_LOCAL_MEM_TRACKER_SETTER(nullptr);
+            remove_bytes = evbuffer_remove(evbuf, bb->ptr, bb->capacity);
+        }
        bb->pos = remove_bytes;
        bb->flip();
        auto st = ctx->body_sink->append(bb);
--- a/be/src/http/action/stream_load.h
+++ b/be/src/http/action/stream_load.h
@ -26,6 +26,7 @@
 #include "gen_cpp/PlanNodes_types.h"
 #include "http/http_handler.h"
 #include "runtime/client_cache.h"
+#include "runtime/mem_tracker.h"
 #include "runtime/message_body_sink.h"

 namespace starrocks {
--- a/be/src/http/default_path_handlers.cpp
+++ b/be/src/http/default_path_handlers.cpp
@ -137,9 +137,6 @@ void mem_tracker_handler(MemTracker* mem_tracker, const WebPageHandler::Argument
        } else if (iter->second == "chunk_allocator") {
            start_mem_tracker = ExecEnv::GetInstance()->chunk_allocator_mem_tracker();
            cur_level = 2;
-        } else if (iter->second == "clone") {
-            start_mem_tracker = ExecEnv::GetInstance()->clone_mem_tracker();
-            cur_level = 2;
        } else if (iter->second == "consistency") {
            start_mem_tracker = ExecEnv::GetInstance()->consistency_mem_tracker();
            cur_level = 2;
@ -154,8 +151,32 @@ void mem_tracker_handler(MemTracker* mem_tracker, const WebPageHandler::Argument

    std::vector<MemTracker::SimpleItem> items;

+    // Metadata memory statistics use the old memory framework,
+    // not in RootMemTrackerTree, so it needs to be added here
+    MemTracker* meta_mem_tracker = ExecEnv::GetInstance()->tablet_meta_mem_tracker();
+    MemTracker::SimpleItem meta_item{"tablet_meta",
+                                     "process",
+                                     2,
+                                     meta_mem_tracker->limit(),
+                                     meta_mem_tracker->consumption(),
+                                     meta_mem_tracker->peak_consumption()};
+
+    // Update memory statistics use the old memory framework,
+    // not in RootMemTrackerTree, so it needs to be added here
+    MemTracker* update_mem_tracker = ExecEnv::GetInstance()->update_mem_tracker();
+    MemTracker::SimpleItem update_item{"update",
+                                       "process",
+                                       2,
+                                       update_mem_tracker->limit(),
+                                       update_mem_tracker->consumption(),
+                                       update_mem_tracker->peak_consumption()};
+
    if (start_mem_tracker != nullptr) {
        start_mem_tracker->list_mem_usage(&items, cur_level, upper_level);
+        if (start_mem_tracker == ExecEnv::GetInstance()->process_mem_tracker()) {
+            items.emplace_back(meta_item);
+            items.emplace_back(update_item);
+        }

        for (const auto& item : items) {
            std::string level_str = ItoaKMGT(item.level);
--- a/be/src/runtime/CMakeLists.txt
+++ b/be/src/runtime/CMakeLists.txt
@ -92,6 +92,7 @@ set(RUNTIME_FILES
    hdfs/hdfs_fs_cache.cpp
    runtime_filter_worker.cpp
    global_dicts.cpp
+    current_thread.cpp
 )

 set(RUNTIME_FILES ${RUNTIME_FILES}
--- a/Show More
+++ b/Show More