[Enhancement] Optimize accessing non-existent JSON field (#62003)
This commit is contained in:
parent
6db1949309
commit
aa4adc5a3d
|
|
@ -199,6 +199,9 @@ public:
|
|||
|
||||
bool has_remain_json() const { return _has_remain; }
|
||||
|
||||
// Return the pointer to the remain filter if it exists, otherwise return nullptr.
|
||||
const BloomFilter* get_remain_filter() const { return _remain_filter ? _remain_filter.get() : nullptr; }
|
||||
|
||||
private:
|
||||
StatusOr<std::unique_ptr<ColumnIterator>> _new_json_iterator(ColumnAccessPath* path = nullptr,
|
||||
const TabletColumn* column = nullptr);
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@
|
|||
#include "column/schema.h"
|
||||
#include "common/logging.h"
|
||||
#include "fs/key_cache.h"
|
||||
#include "gutil/strings/split.h"
|
||||
#include "gutil/strings/substitute.h"
|
||||
#include "segment_iterator.h"
|
||||
#include "segment_options.h"
|
||||
|
|
@ -524,7 +525,25 @@ StatusOr<ColumnIteratorUPtr> Segment::_new_extended_column_iterator(const Tablet
|
|||
}
|
||||
}
|
||||
|
||||
// Build a regular ColumnIterator to read it
|
||||
// case 3: check if this segment contains the specific field
|
||||
auto& column_reader = _column_readers[source_id];
|
||||
bool may_contains = column_reader->has_remain_json();
|
||||
if (may_contains && column_reader->get_remain_filter() != nullptr) {
|
||||
std::vector<std::string> paths = strings::Split(full_path, ".");
|
||||
std::string_view leaf = paths.back();
|
||||
may_contains = column_reader->get_remain_filter()->test_bytes(leaf.data(), leaf.size());
|
||||
}
|
||||
if (!may_contains) {
|
||||
// create an iterator always return NULL for fields that don't exist in this segment
|
||||
auto default_null_iter = std::make_unique<DefaultValueColumnIterator>(false, "", true, get_type_info(column),
|
||||
column.length(), num_rows());
|
||||
ColumnIteratorOptions iter_opts;
|
||||
RETURN_IF_ERROR(default_null_iter->init(iter_opts));
|
||||
VLOG(2) << "json field " << full_path << " not found in segment, return NULL directly";
|
||||
return default_null_iter;
|
||||
}
|
||||
|
||||
// Build a regular JsonExtractIterator to read it
|
||||
auto& source_reader = _column_readers[source_id];
|
||||
ASSIGN_OR_RETURN(auto source_iter, source_reader->new_iterator(path, &column));
|
||||
return create_json_extract_iterator(std::move(source_iter), source_reader->is_nullable(), std::string(field_name),
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@
|
|||
#include "exprs/column_ref.h"
|
||||
#include "exprs/expr_context.h"
|
||||
#include "gutil/casts.h"
|
||||
#include "gutil/strings/split.h"
|
||||
#include "runtime/types.h"
|
||||
#include "storage/rowset/column_reader.h"
|
||||
#include "types/logical_type.h"
|
||||
|
|
|
|||
|
|
@ -2491,7 +2491,7 @@ TEST_F(FlatJsonColumnRWTest, testSegmentWriterIteratorWithMixedDataTypes) {
|
|||
ASSIGN_OR_ABORT(auto column_iter, segment->new_column_iterator_or_default(col, path.get()));
|
||||
ASSERT_OK(column_iter->init(column_opts));
|
||||
ASSERT_OK(column_iter->seek_to_first());
|
||||
size_t count = 4096;
|
||||
size_t count = 3;
|
||||
auto column = ColumnHelper::create_column(TypeDescriptor(field_type), true);
|
||||
ASSERT_OK(column_iter->next_batch(&count, column.get()));
|
||||
ASSERT_EQ(column->size(), json_strings.size());
|
||||
|
|
|
|||
|
|
@ -167,8 +167,6 @@ select get_json_int(j1, '$.f1') from js2 where get_json_int(j1, '$.f1') = 1;
|
|||
-- !result
|
||||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 1
|
||||
- AccessPathHits: 1
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select get_json_int(j1, '$.f2') from js2 where get_json_int(j1, '$.f1') = 1;
|
||||
|
|
@ -177,8 +175,6 @@ None
|
|||
-- !result
|
||||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 2
|
||||
- AccessPathHits: 2
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select get_json_int(j1, '$.f1') from js2 where get_json_int(j1, '$.f2') = 1;
|
||||
|
|
@ -187,8 +183,6 @@ None
|
|||
-- !result
|
||||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 2
|
||||
- AccessPathHits: 2
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select get_json_int(j1, '$.f2'), j1 from js2 where get_json_int(j1, '$.f1') = 1;
|
||||
|
|
@ -197,8 +191,7 @@ None {"f1": 1}
|
|||
-- !result
|
||||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 2
|
||||
- AccessPathHits: 4
|
||||
- AccessPathHits: 2
|
||||
- PushdownAccessPaths: 2
|
||||
-- !result
|
||||
select get_json_int(j1, '$.f2'), j1 from js2 where get_json_int(j1, '$.f3') = 1;
|
||||
|
|
@ -206,8 +199,7 @@ select get_json_int(j1, '$.f2'), j1 from js2 where get_json_int(j1, '$.f3') = 1;
|
|||
-- !result
|
||||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 3
|
||||
- AccessPathHits: 5
|
||||
- AccessPathHits: 2
|
||||
- PushdownAccessPaths: 2
|
||||
-- !result
|
||||
select get_json_int(j1, '$.f2'), get_json_int(j1, '$.f2') from js2 where get_json_int(j1, '$.f1') = 1;
|
||||
|
|
@ -216,8 +208,6 @@ None None
|
|||
-- !result
|
||||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 2
|
||||
- AccessPathHits: 2
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select count(get_json_int(j1, '$.f2')) from js2 ;
|
||||
|
|
@ -226,8 +216,6 @@ select count(get_json_int(j1, '$.f2')) from js2 ;
|
|||
-- !result
|
||||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 1
|
||||
- AccessPathHits: 1
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select count(get_json_int(j1, '$.f3')) from js2 ;
|
||||
|
|
@ -236,8 +224,6 @@ select count(get_json_int(j1, '$.f3')) from js2 ;
|
|||
-- !result
|
||||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 2
|
||||
- AccessPathHits: 2
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select * from js2 where get_json_int(j1, '$.f1') = -1;
|
||||
|
|
@ -507,8 +493,6 @@ select count(*) from js3 where get_json_double(j1, 'f_bool') = 1.0;
|
|||
-- !result
|
||||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 1
|
||||
- AccessPathHits: 1
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select count(*) from js3 where get_json_int(j1, 'f_int') < 500;
|
||||
|
|
@ -529,8 +513,6 @@ select count(*) from js3 where get_json_double(j1, 'f_int') < 500;
|
|||
-- !result
|
||||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 1
|
||||
- AccessPathHits: 1
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select count(*) from js3 where get_json_double(j1, 'f_double') < 500.0;
|
||||
|
|
@ -551,8 +533,6 @@ select count(*) from js3 where get_json_string(j1, 'f_double') < '500';
|
|||
-- !result
|
||||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 1
|
||||
- AccessPathHits: 1
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
select count(*) from js3 where get_json_int(j1, 'f_none') IS NULL;
|
||||
|
|
@ -581,7 +561,5 @@ select count(*) from js3 where get_json_double(j1, 'f_none') < 500;
|
|||
-- !result
|
||||
select * from profile_access_path;
|
||||
-- result:
|
||||
- AccessPathExtract: 2
|
||||
- AccessPathHits: 2
|
||||
- PushdownAccessPaths: 0
|
||||
-- !result
|
||||
Loading…
Reference in New Issue