Signed-off-by: Kevin Cai <kevin.cai@celerdata.com> Co-authored-by: Kevin Cai <caixiaohua@starrocks.com> Co-authored-by: Kevin Cai <kevin.cai@celerdata.com>
This commit is contained in:
parent
5f1aa6a870
commit
8f3acf1d0c
|
|
@ -923,6 +923,9 @@ CONF_mInt64(tablet_internal_parallel_min_scan_dop, "4");
|
|||
// Only the num rows of lake tablet less than lake_tablet_rows_splitted_ratio * splitted_scan_rows, than the lake tablet can be splitted.
|
||||
CONF_mDouble(lake_tablet_rows_splitted_ratio, "1.5");
|
||||
|
||||
// Allow skipping invalid delete_predicate in order to get the segment data back, and do manual correction.
|
||||
CONF_mBool(lake_tablet_ignore_invalid_delete_predicate, "false");
|
||||
|
||||
// The bitmap serialize version.
|
||||
CONF_Int16(bitmap_serialize_version, "1");
|
||||
// The max hdfs file handle.
|
||||
|
|
|
|||
|
|
@ -34,6 +34,9 @@ Status ConjunctivePredicates::evaluate_or(const Chunk* chunk, uint8_t* selection
|
|||
|
||||
Status ConjunctivePredicates::evaluate(const Chunk* chunk, uint8_t* selection, uint16_t from, uint16_t to) const {
|
||||
FAIL_POINT_TRIGGER_RETURN_ERROR(random_error);
|
||||
if (empty()) {
|
||||
return Status::OK();
|
||||
}
|
||||
DCHECK_LE(to, chunk->num_rows());
|
||||
if (!_vec_preds.empty()) {
|
||||
const ColumnPredicate* pred = _vec_preds[0];
|
||||
|
|
@ -50,6 +53,9 @@ Status ConjunctivePredicates::evaluate(const Chunk* chunk, uint8_t* selection, u
|
|||
|
||||
Status ConjunctivePredicates::evaluate_or(const Chunk* chunk, uint8_t* selection, uint16_t from, uint16_t to) const {
|
||||
DCHECK_LE(to, chunk->num_rows());
|
||||
if (empty()) {
|
||||
return Status::OK();
|
||||
}
|
||||
std::unique_ptr<uint8_t[]> buff(new uint8_t[chunk->num_rows()]);
|
||||
RETURN_IF_ERROR(evaluate(chunk, buff.get(), from, to));
|
||||
const uint8_t* p = buff.get();
|
||||
|
|
|
|||
|
|
@ -22,6 +22,9 @@ namespace starrocks {
|
|||
|
||||
void DeletePredicates::add(int32_t version, ConjunctivePredicates preds) {
|
||||
// fast path.
|
||||
if (preds.empty()) {
|
||||
return;
|
||||
}
|
||||
if (_version_predicates.empty() || version > _version_predicates.back()._version) {
|
||||
_version_predicates.emplace_back(version, std::move(preds));
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -454,13 +454,24 @@ Status TabletReader::init_delete_predicates(const TabletReaderParams& params, De
|
|||
|
||||
ConjunctivePredicates conjunctions;
|
||||
for (const auto& cond : conds) {
|
||||
ASSIGN_OR_RETURN(ColumnPredicate * pred, pred_parser.parse_thrift_cond(cond));
|
||||
conjunctions.add(pred);
|
||||
auto pred_or = pred_parser.parse_thrift_cond(cond);
|
||||
if (!pred_or.ok()) {
|
||||
if (LIKELY(!config::lake_tablet_ignore_invalid_delete_predicate)) {
|
||||
return pred_or.status();
|
||||
} else {
|
||||
LOG(WARNING) << "failed to parse delete condition.column_name[" << cond.column_name
|
||||
<< "], condition_op[" << cond.condition_op << "], condition_values["
|
||||
<< (cond.condition_values.empty() ? "<empty>" : cond.condition_values[0]) << "].";
|
||||
continue;
|
||||
}
|
||||
}
|
||||
conjunctions.add(pred_or.value());
|
||||
// save for memory release.
|
||||
_predicate_free_list.emplace_back(pred);
|
||||
_predicate_free_list.emplace_back(pred_or.value());
|
||||
}
|
||||
if (!conjunctions.empty()) {
|
||||
dels->add(index, conjunctions);
|
||||
}
|
||||
|
||||
dels->add(index, conjunctions);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
|||
|
|
@ -1607,6 +1607,8 @@ Status SegmentIterator::_do_get_next(Chunk* result, vector<rowid_t>* rowid) {
|
|||
if (chunk_size > 0 && chunk->delete_state() != DEL_NOT_SATISFIED && !_opts.delete_predicates.empty()) {
|
||||
SCOPED_RAW_TIMER(&_opts.stats->del_filter_ns);
|
||||
size_t old_sz = chunk->num_rows();
|
||||
// NOTE: risk of using _selection.data() without initialization
|
||||
// if the delete_predicates do nothing to the selection.
|
||||
RETURN_IF_ERROR(_opts.delete_predicates.evaluate(chunk, _selection.data()));
|
||||
size_t deletes = SIMD::count_nonzero(_selection.data(), old_sz);
|
||||
if (deletes == old_sz) {
|
||||
|
|
|
|||
|
|
@ -267,6 +267,7 @@ set(EXEC_FILES
|
|||
./storage/utils_test.cpp
|
||||
./storage/del_vector_test.cpp
|
||||
./storage/delete_handler_test.cpp
|
||||
./storage/delete_predicates_test.cpp
|
||||
./storage/delta_column_group_test.cpp
|
||||
./storage/fast_schema_evolution_test.cpp
|
||||
./storage/file_utils_test.cpp
|
||||
|
|
|
|||
|
|
@ -171,6 +171,42 @@ TEST(ConjunctivePredicatesTest, test_evaluate) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(ConjunctivePredicatesTest, test_empty_predicates) {
|
||||
SchemaPtr schema(new Schema());
|
||||
auto c0_field = std::make_shared<Field>(0, "c0", TYPE_INT, true);
|
||||
schema->append(c0_field);
|
||||
auto c0 = ChunkHelper::column_from_field(*c0_field);
|
||||
|
||||
// +------+
|
||||
// | c0 |
|
||||
// +------+
|
||||
// | NULL |
|
||||
// | 1 |
|
||||
// | 2 |
|
||||
// | 3 |
|
||||
// +------+
|
||||
c0->append_datum(Datum());
|
||||
c0->append_datum(Datum(1));
|
||||
c0->append_datum(Datum(2));
|
||||
c0->append_datum(Datum(3));
|
||||
|
||||
ChunkPtr chunk = std::make_shared<Chunk>(Columns{std::move(c0)}, schema);
|
||||
|
||||
std::vector<uint8_t> selection = {1, 0, 1, 0};
|
||||
EXPECT_EQ("1,0,1,0", to_string(selection));
|
||||
|
||||
ConjunctivePredicates conjuncts;
|
||||
|
||||
conjuncts.evaluate(chunk.get(), selection.data());
|
||||
EXPECT_EQ("1,0,1,0", to_string(selection));
|
||||
|
||||
conjuncts.evaluate_or(chunk.get(), selection.data());
|
||||
EXPECT_EQ("1,0,1,0", to_string(selection));
|
||||
|
||||
conjuncts.evaluate_and(chunk.get(), selection.data());
|
||||
EXPECT_EQ("1,0,1,0", to_string(selection));
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
TEST(ConjunctivePredicatesTest, test_evaluate_and) {
|
||||
SchemaPtr schema(new Schema());
|
||||
|
|
|
|||
|
|
@ -0,0 +1,51 @@
|
|||
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "storage/delete_predicates.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "storage/column_predicate.h"
|
||||
#include "storage/conjunctive_predicates.h"
|
||||
|
||||
namespace starrocks {
|
||||
|
||||
using PredicatePtr = std::unique_ptr<ColumnPredicate>;
|
||||
|
||||
TEST(DeletePredicatesTest, test_add_empty_preds) {
|
||||
DeletePredicates delete_predicates;
|
||||
EXPECT_TRUE(delete_predicates.get_predicates(0).empty());
|
||||
|
||||
{
|
||||
ConjunctivePredicates conjuncts;
|
||||
delete_predicates.add(1, conjuncts);
|
||||
// Nothing added
|
||||
EXPECT_TRUE(delete_predicates.get_predicates(0).empty());
|
||||
}
|
||||
{
|
||||
PredicatePtr p0(new_column_null_predicate(get_type_info(TYPE_INT), 0, false));
|
||||
ConjunctivePredicates conjuncts;
|
||||
|
||||
conjuncts.add(p0.get());
|
||||
delete_predicates.add(1, conjuncts);
|
||||
|
||||
auto dis_delete_predicates = delete_predicates.get_predicates(0);
|
||||
auto& conjuncts_arr = dis_delete_predicates.predicate_list();
|
||||
EXPECT_EQ(1U, conjuncts_arr.size());
|
||||
EXPECT_EQ(1U, conjuncts_arr[0].vec_preds().size());
|
||||
EXPECT_EQ(p0.get(), conjuncts_arr[0].vec_preds()[0]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace starrocks
|
||||
|
|
@ -365,6 +365,16 @@ TEST_F(LakeDuplicateTabletReaderWithDeleteTest, test_read_success) {
|
|||
writer->close();
|
||||
}
|
||||
|
||||
{ // Add empty delete_predicate, won't affect anything
|
||||
auto* rowset = _tablet_metadata->add_rowsets();
|
||||
rowset->set_overlapped(false);
|
||||
rowset->set_num_rows(0);
|
||||
rowset->set_data_size(0);
|
||||
|
||||
auto* empty_delete_predicate = rowset->mutable_delete_predicate();
|
||||
empty_delete_predicate->set_version(-1);
|
||||
}
|
||||
|
||||
{
|
||||
auto* rowset = _tablet_metadata->add_rowsets();
|
||||
rowset->set_overlapped(false);
|
||||
|
|
@ -385,12 +395,33 @@ TEST_F(LakeDuplicateTabletReaderWithDeleteTest, test_read_success) {
|
|||
in_predicate->add_values("44");
|
||||
in_predicate->add_values("0");
|
||||
in_predicate->add_values("1");
|
||||
|
||||
// This is to simulate the bug where a delete predicate references a non-existent column.
|
||||
auto* invalid_binary_predicate = delete_predicate->add_binary_predicates();
|
||||
invalid_binary_predicate->set_column_name("c0c"); // column name doesn't exist
|
||||
invalid_binary_predicate->set_op("=");
|
||||
invalid_binary_predicate->set_value("30");
|
||||
}
|
||||
|
||||
// write tablet metadata
|
||||
_tablet_metadata->set_version(3);
|
||||
CHECK_OK(_tablet_mgr->put_tablet_metadata(*_tablet_metadata));
|
||||
|
||||
bool original_ignore_config_val = config::lake_tablet_ignore_invalid_delete_predicate;
|
||||
config::lake_tablet_ignore_invalid_delete_predicate = false;
|
||||
|
||||
{ // test reader open failed due to invalid delete_predicate
|
||||
auto reader = std::make_shared<TabletReader>(_tablet_mgr.get(), _tablet_metadata, *_schema);
|
||||
ASSERT_OK(reader->prepare());
|
||||
TabletReaderParams params;
|
||||
auto st = reader->open(params);
|
||||
EXPECT_FALSE(st.ok());
|
||||
EXPECT_TRUE(st.is_unknown()) << st;
|
||||
EXPECT_EQ("unknown column c0c", st.message());
|
||||
}
|
||||
|
||||
config::lake_tablet_ignore_invalid_delete_predicate = true;
|
||||
|
||||
// test reader
|
||||
auto reader = std::make_shared<TabletReader>(_tablet_mgr.get(), _tablet_metadata, *_schema);
|
||||
ASSERT_OK(reader->prepare());
|
||||
|
|
@ -421,6 +452,8 @@ TEST_F(LakeDuplicateTabletReaderWithDeleteTest, test_read_success) {
|
|||
ASSERT_TRUE(reader->get_next(read_chunk_ptr.get()).is_end_of_file());
|
||||
|
||||
reader->close();
|
||||
|
||||
config::lake_tablet_ignore_invalid_delete_predicate = original_ignore_config_val;
|
||||
}
|
||||
|
||||
class LakeDuplicateTabletReaderWithDeleteNotInOneValueTest : public TestBase {
|
||||
|
|
|
|||
|
|
@ -2630,6 +2630,15 @@ When this value is set to less than `0`, the system uses the product of its abso
|
|||
- Introduced in: -
|
||||
-->
|
||||
|
||||
##### lake_tablet_ignore_invalid_delete_predicate
|
||||
|
||||
- Default: false
|
||||
- Type: Boolean
|
||||
- Unit: -
|
||||
- Is mutable: Yes
|
||||
- Description: A boolean value to control whether ignore invalid delete predicates in tablet rowset metadata which may be introduced by logic deletion to a duplicate key table after the column name renamed.
|
||||
- Introduced in: v4.0
|
||||
|
||||
<!--
|
||||
##### bitmap_serialize_version
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue