[Feature] implement json_remove (#61394)

Signed-off-by: Murphy <mofei@starrocks.com>
Co-authored-by: Cursor Agent <cursoragent@cursor.com>
This commit is contained in:
Murphy 2025-08-07 14:50:13 +08:00 committed by GitHub
parent e338f7ac00
commit 0a9618db4c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 954 additions and 33 deletions

View File

@ -1156,6 +1156,238 @@ StatusOr<ColumnPtr> JsonFunctions::_json_keys_without_path(FunctionContext* cont
return result.build(ColumnHelper::is_all_const(columns));
}
static void build_path_lookup_structures(const std::vector<JsonPath>& valid_paths,
std::unordered_set<std::string>& exact_paths_to_remove,
std::unordered_set<std::string>& prefix_paths_to_remove);
static StatusOr<JsonValue> _remove_json_paths_core(JsonValue* json_value,
const std::unordered_set<std::string>& exact_paths_to_remove,
const std::unordered_set<std::string>& prefix_paths_to_remove,
vpack::Builder* builder);
StatusOr<ColumnPtr> JsonFunctions::json_remove(FunctionContext* context, const Columns& columns) {
RETURN_IF_COLUMNS_ONLY_NULL(columns);
RETURN_IF(columns.size() < 2,
Status::InvalidArgument("json_remove requires at least 2 arguments: json_doc and path"));
size_t rows = columns[0]->size();
ColumnBuilder<TYPE_JSON> result(rows);
ColumnViewer<TYPE_JSON> json_viewer(columns[0]);
// Get all path arguments
std::vector<ColumnViewer<TYPE_VARCHAR>> path_viewers;
for (size_t i = 1; i < columns.size(); i++) {
path_viewers.emplace_back(columns[i]);
}
// Parse all valid paths once at the beginning to amortize parsing overhead
std::vector<JsonPath> valid_paths;
bool all_paths_constant = true;
for (size_t path_idx = 0; path_idx < path_viewers.size(); path_idx++) {
// Check if this path column is constant
bool is_constant = columns[path_idx + 1]->is_constant();
if (is_constant) {
// For constant paths, parse once and reuse
if (!path_viewers[path_idx].is_null(0)) {
Slice path_str = path_viewers[path_idx].value(0);
auto jsonpath = JsonPath::parse(path_str);
if (jsonpath.ok()) {
valid_paths.emplace_back(jsonpath.value());
}
}
} else {
all_paths_constant = false;
}
}
// Build lookup structures once for constant paths to amortize construction overhead
std::unordered_set<std::string> exact_paths_to_remove;
std::unordered_set<std::string> prefix_paths_to_remove;
build_path_lookup_structures(valid_paths, exact_paths_to_remove, prefix_paths_to_remove);
for (size_t row = 0; row < rows; row++) {
if (json_viewer.is_null(row) || json_viewer.value(row) == nullptr) {
result.append_null();
continue;
}
JsonValue* json_value = json_viewer.value(row);
// Create new JSON with paths removed
vpack::Builder builder;
if (all_paths_constant) {
// All paths are constant, use pre-built lookup structures directly
ASSIGN_OR_RETURN(auto removed_json, _remove_json_paths_core(json_value, exact_paths_to_remove,
prefix_paths_to_remove, &builder));
result.append(std::move(removed_json));
} else {
// Some paths are non-constant, parse them for each row
std::vector<JsonPath> row_paths = valid_paths; // Start with pre-parsed constant paths
for (size_t path_idx = 0; path_idx < path_viewers.size(); path_idx++) {
bool is_constant = columns[path_idx + 1]->is_constant();
if (!is_constant) {
// Parse non-constant paths for each row
if (!path_viewers[path_idx].is_null(row)) {
Slice path_str = path_viewers[path_idx].value(row);
auto jsonpath = JsonPath::parse(path_str);
if (jsonpath.ok()) {
row_paths.emplace_back(jsonpath.value());
}
}
}
}
// Build lookup structures for this row (including non-constant paths)
std::unordered_set<std::string> row_exact_paths = exact_paths_to_remove;
std::unordered_set<std::string> row_prefix_paths = prefix_paths_to_remove;
// Add lookup structures for non-constant paths
std::vector<JsonPath> non_constant_paths;
for (size_t i = valid_paths.size(); i < row_paths.size(); i++) {
non_constant_paths.push_back(row_paths[i]);
}
build_path_lookup_structures(non_constant_paths, row_exact_paths, row_prefix_paths);
ASSIGN_OR_RETURN(auto removed_json,
_remove_json_paths_core(json_value, row_exact_paths, row_prefix_paths, &builder));
result.append(std::move(removed_json));
}
}
return result.build(ColumnHelper::is_all_const(columns));
}
// Helper function to build lookup structures for path removal
static void build_path_lookup_structures(const std::vector<JsonPath>& valid_paths,
std::unordered_set<std::string>& exact_paths_to_remove,
std::unordered_set<std::string>& prefix_paths_to_remove) {
for (const auto& remove_path : valid_paths) {
std::string path_str = remove_path.to_string();
exact_paths_to_remove.insert(path_str);
// Build prefix paths for quick recursion decision by iterating JsonPath::paths
// instead of string operations to handle escaped dots correctly
std::string current_prefix = "$";
for (size_t i = 0; i < remove_path.paths.size(); i++) {
const auto& piece = remove_path.paths[i];
// Add the key part (skip the root "$" piece)
if (!piece.key.empty() && piece.key != "$") {
current_prefix += "." + piece.key;
}
// Add the path before array selector
prefix_paths_to_remove.insert(current_prefix);
// Add array selector if present
if (piece.array_selector) {
current_prefix += piece.array_selector->to_string();
prefix_paths_to_remove.insert(current_prefix);
}
}
}
}
// Core function that performs the actual JSON removal work
static StatusOr<JsonValue> _remove_json_paths_core(JsonValue* json_value,
const std::unordered_set<std::string>& exact_paths_to_remove,
const std::unordered_set<std::string>& prefix_paths_to_remove,
vpack::Builder* builder) {
namespace vpack = arangodb::velocypack;
vpack::Slice original_slice = json_value->to_vslice();
// Recursive function with optimized path checking
std::function<vpack::Slice(vpack::Slice, const std::string&)> remove_paths_recursive =
[&](vpack::Slice slice, const std::string& current_path) -> vpack::Slice {
if (slice.isObject()) {
vpack::Builder obj_builder;
{
vpack::ObjectBuilder builder(&obj_builder);
// Iterate the object directly without collecting and sorting keys
for (auto it : vpack::ObjectIterator(slice)) {
auto key = it.key.copyString();
std::string child_path = current_path.empty() ? ("$." + key) : (current_path + "." + key);
// 1. Check if this is the target level (exact match)
if (exact_paths_to_remove.find(child_path) != exact_paths_to_remove.end()) {
// This is the target level, skip it
continue;
}
vpack::Slice value = it.value;
if (value.isNone()) {
continue;
}
// 2. Check if recursion is needed (prefix match)
bool needs_recursion = false;
if (value.isObject() || value.isArray()) {
needs_recursion = (prefix_paths_to_remove.find(child_path) != prefix_paths_to_remove.end());
}
if (needs_recursion) {
vpack::Slice processed_value = remove_paths_recursive(value, child_path);
builder->add(key, processed_value);
} else {
builder->add(key, value);
}
}
} // ObjectBuilder automatically closes here
return obj_builder.slice();
} else if (slice.isArray()) {
vpack::Builder arr_builder;
{
vpack::ArrayBuilder builder(&arr_builder);
size_t array_size = slice.length();
for (size_t index = 0; index < array_size; index++) {
std::string child_path = current_path + "[" + std::to_string(index) + "]";
// 1. Check if this is the target level (exact match)
if (exact_paths_to_remove.find(child_path) != exact_paths_to_remove.end()) {
continue;
}
vpack::Slice element = slice.at(index);
if (element.isNone()) {
continue; // Index out of bounds
}
// 2. Check if recursion is needed (prefix match)
bool needs_recursion = false;
if (element.isObject() || element.isArray()) {
// Check if current path is a prefix of any removal path
needs_recursion = (prefix_paths_to_remove.find(child_path) != prefix_paths_to_remove.end());
}
if (needs_recursion) {
vpack::Slice processed_element = remove_paths_recursive(element, child_path);
builder->add(processed_element);
} else {
builder->add(element);
}
}
} // ArrayBuilder automatically closes here
return arr_builder.slice();
} else {
// Primitive value, return as is
return slice;
}
};
vpack::Slice result = remove_paths_recursive(original_slice, "$");
builder->add(result);
return JsonValue(builder->slice());
}
StatusOr<ColumnPtr> JsonFunctions::to_json(FunctionContext* context, const Columns& columns) {
RETURN_IF_COLUMNS_ONLY_NULL(columns);
return cast_nested_to_json(columns[0], context->allow_throw_exception());

View File

@ -27,6 +27,9 @@
namespace starrocks {
// Forward declarations
struct JsonPath;
extern const re2::RE2 SIMPLE_JSONPATH_PATTERN;
struct SimpleJsonPath {
@ -152,6 +155,13 @@ public:
*/
DEFINE_VECTORIZED_FN(json_keys);
/**
* Remove data from a JSON document at one or more specified JSON paths
* @param JSON, JSONPath, [JSONPath, ...]
* @return JSON with specified paths removed
*/
DEFINE_VECTORIZED_FN(json_remove);
/**
* Return json built from struct/map
*/

View File

@ -255,38 +255,6 @@ void JsonPath::reset(JsonPath&& rhs) {
paths = std::move(rhs.paths);
}
std::string JsonPath::to_string() const {
std::string result = "$";
for (size_t i = 0; i < paths.size(); i++) {
const auto& piece = paths[i];
if (!piece.key.empty() && piece.key != "$") {
result += "." + piece.key;
}
if (piece.array_selector) {
switch (piece.array_selector->type) {
case ArraySelectorType::SINGLE: {
auto* single = down_cast<ArraySelectorSingle*>(piece.array_selector.get());
result += "[" + std::to_string(single->index) + "]";
break;
}
case ArraySelectorType::WILDCARD:
result += "[*]";
break;
case ArraySelectorType::SLICE: {
auto* slice = down_cast<ArraySelectorSlice*>(piece.array_selector.get());
result += "[" + std::to_string(slice->left) + ":" + std::to_string(slice->right) + "]";
break;
}
case ArraySelectorType::NONE:
case ArraySelectorType::INVALID:
default:
break;
}
}
}
return result;
}
StatusOr<JsonPath> JsonPath::parse(Slice path_string) {
std::vector<JsonPathPiece> pieces;
RETURN_IF_ERROR(JsonPathPiece::parse(path_string.to_string(), &pieces));
@ -297,6 +265,20 @@ vpack::Slice JsonPath::extract(const JsonValue* json, const JsonPath& jsonpath,
return JsonPathPiece::extract(json, jsonpath.paths, b);
}
std::string JsonPath::to_string() const {
std::string result = "$";
for (size_t i = 0; i < paths.size(); i++) {
const auto& piece = paths[i];
if (!piece.key.empty() && piece.key != "$") {
result += "." + piece.key;
}
if (piece.array_selector) {
result += piece.array_selector->to_string();
}
}
return result;
}
bool JsonPath::starts_with(const JsonPath* other) const {
if (other->paths.size() > paths.size()) {
// this: a.b, other: a.b.c.d

View File

@ -49,12 +49,17 @@ struct ArraySelector {
virtual void iterate(vpack::Slice array_slice, std::function<void(vpack::Slice)> callback) = 0;
virtual bool match(const ArraySelector& other) const { return type == other.type; };
// Convert array selector to string representation
virtual std::string to_string() const = 0;
};
struct ArraySelectorNone final : public ArraySelector {
ArraySelectorNone() { type = NONE; }
void iterate(vpack::Slice array_slice, std::function<void(vpack::Slice)> callback) override { return; }
std::string to_string() const override { return ""; }
};
struct ArraySelectorSingle final : public ArraySelector {
@ -72,6 +77,8 @@ struct ArraySelectorSingle final : public ArraySelector {
}
return index == down_cast<const ArraySelectorSingle*>(&other)->index;
};
std::string to_string() const override { return "[" + std::to_string(index) + "]"; }
};
struct ArraySelectorWildcard final : public ArraySelector {
@ -80,6 +87,8 @@ struct ArraySelectorWildcard final : public ArraySelector {
static bool match(const std::string& input);
void iterate(vpack::Slice array_slice, std::function<void(vpack::Slice)> callback) override;
std::string to_string() const override { return "[*]"; }
};
struct ArraySelectorSlice final : public ArraySelector {
@ -98,6 +107,8 @@ struct ArraySelectorSlice final : public ArraySelector {
auto* ass = down_cast<const ArraySelectorSlice*>(&other);
return left == ass->left && right == ass->right;
};
std::string to_string() const override { return "[" + std::to_string(left) + ":" + std::to_string(right) + "]"; }
};
// JsonPath implement that support array building
@ -140,7 +151,6 @@ struct JsonPath {
bool is_empty() const { return paths.empty(); }
// Convert JsonPath to string representation
std::string to_string() const;
static StatusOr<JsonPath> parse(Slice path_string);

View File

@ -1665,4 +1665,193 @@ TEST_F(JsonFunctionsTest, query_json_obj) {
ASSERT_EQ(result->debug_string(), "[0]");
}
// Test parameters for json_remove function
// Note: Implementation supports:
// - Top-level object key removal (e.g., $.key)
// - Nested path removal (e.g., $.outer.inner1)
// - Array element removal (e.g., $.arr[2])
// - Mixed path removal from nested structures
struct JsonRemoveTestParam {
std::string json_input;
std::vector<std::string> paths_to_remove;
std::string expected_result;
std::string description;
};
class JsonRemoveTestFixture : public ::testing::TestWithParam<JsonRemoveTestParam> {};
TEST_P(JsonRemoveTestFixture, json_remove) {
std::unique_ptr<FunctionContext> ctx(FunctionContext::create_test_context());
auto param = GetParam();
// Create JSON column
auto json_column = ColumnHelper::cast_to_nullable_column(JsonColumn::create());
if (param.json_input == "null") {
json_column->append_nulls(1);
} else {
auto json = JsonValue::parse(param.json_input);
ASSERT_TRUE(json.ok()) << "Failed to parse JSON: " << param.json_input;
json_column->append_datum(Datum(&json.value()));
}
// Create columns with JSON and all paths
Columns columns{json_column};
for (const auto& path : param.paths_to_remove) {
auto path_column = ColumnHelper::cast_to_nullable_column(BinaryColumn::create());
if (path == "null") {
path_column->append_nulls(1);
} else {
path_column->append_datum(Datum(Slice(path)));
}
columns.emplace_back(path_column);
}
// Prepare JSON path context
ASSERT_TRUE(JsonFunctions::native_json_path_prepare(
ctx.get(), FunctionContext::FunctionContext::FunctionStateScope::FRAGMENT_LOCAL)
.ok());
// Execute json_remove function
ColumnPtr result = JsonFunctions::json_remove(ctx.get(), columns).value();
ASSERT_TRUE(!!result) << "json_remove returned null result for: " << param.description;
// Verify result
Datum datum = result->get(0);
if (param.expected_result == "null") {
ASSERT_TRUE(datum.is_null()) << "Expected null result for: " << param.description;
} else {
ASSERT_FALSE(datum.is_null()) << "Result should not be null for: " << param.description;
std::string json_str = datum.get_json()->to_string().value();
// Parse and re-serialize both JSON strings to normalize key ordering
auto expected_json = JsonValue::parse(param.expected_result);
auto actual_json = JsonValue::parse(json_str);
ASSERT_TRUE(expected_json.ok()) << "Failed to parse expected JSON: " << param.expected_result;
ASSERT_TRUE(actual_json.ok()) << "Failed to parse actual JSON: " << json_str;
std::string normalized_expected = expected_json->to_string().value();
std::string normalized_actual = actual_json->to_string().value();
ASSERT_EQ(normalized_expected, normalized_actual)
<< "Test: " << param.description << "\nExpected: " << normalized_expected
<< "\nActual: " << normalized_actual;
}
// Clean up JSON path context
ASSERT_TRUE(JsonFunctions::native_json_path_close(
ctx.get(), FunctionContext::FunctionContext::FunctionStateScope::FRAGMENT_LOCAL)
.ok());
}
// clang-format off
INSTANTIATE_TEST_SUITE_P(
JsonRemoveTests, JsonRemoveTestFixture,
::testing::Values(
JsonRemoveTestParam{
"null", // JSON input is null
{"$.a"}, // Any path (should not matter)
"null", // Expected result
"Null input JSON should result in null output"},
JsonRemoveTestParam{
R"({"foo": 123, "bar": 456})", // JSON input
{"null"}, // paths_to_remove is null
R"({"foo": 123, "bar": 456})", // Expected result (no change)
"No paths to remove: output should be identical to input"
},
JsonRemoveTestParam{
R"({"a": 1, "b": [10, 20, 30]})",
{"$.a"},
R"({"b": [10, 20, 30]})", // Expected result
"Remove single key from object"},
JsonRemoveTestParam{
R"({"a": 1, "b": [10, 20, 30], "c": "test"})",
{"$.a", "$.c"},
R"({"b": [10, 20, 30]})", // Expected result
"Remove multiple keys from object"},
JsonRemoveTestParam{
R"({"a": 1, "b": 2})",
{"invalid_path"},
R"({"a": 1, "b": 2})", // Expected result (invalid path ignored)
"Invalid path should be ignored"},
JsonRemoveTestParam{
R"({"x": 100, "y": 200, "z": 300})",
{"$.y"},
R"({"x": 100, "z": 300})", // Expected result
"Remove middle key from object"},
JsonRemoveTestParam{
R"({"single": "value"})",
{"$.single"},
R"({})", // Expected result (empty object)
"Remove single key from single-key object"},
// TODO
JsonRemoveTestParam{
R"([1, 2, 3, {"a": 10, "b": 20}])", // JSON input is an array
{"$[0]", "$[1]"}, // Try to remove array elements
R"([3, {"a": 10, "b": 20}])", // Expected result
"Remove array elements from array"},
JsonRemoveTestParam{
R"({"outer": {"inner1": 1, "inner2": 2}, "keep": 42})",
{"$.outer.inner1"},
R"({"keep": 42, "outer": {"inner2": 2}})", // Expected result
"Remove nested key from nested object"},
JsonRemoveTestParam{
R"({"deep": {"level1": {"level2": {"level3": "value"}}}})",
{"$.deep.level1.level2"},
R"({"deep": {"level1": {}}})", // Expected result
"Remove deeply nested object"},
JsonRemoveTestParam{
R"({"arr": [0, 1, 2, 3, 4]})",
{"$.arr[2]"},
R"({"arr": [0, 1, 3, 4]})", // Expected result
"Remove middle element from array"},
JsonRemoveTestParam{
R"({"mixed": {"obj": {"key": "value"}, "arr": [1, 2, 3]}})",
{"$.mixed.obj.key", "$.mixed.arr[1]"},
R"({"mixed": {"obj": {}, "arr": [1, 3]}})", // Expected result
"Remove mixed paths from nested structure"},
JsonRemoveTestParam{
R"({"top_level": "value", "nested": {"inner": "data"}})",
{"$.top_level"},
R"({"nested": {"inner": "data"}})", // Expected result
"Remove top-level key from object"},
JsonRemoveTestParam{
R"({"key1": "value1", "key2": "value2", "key3": "value3"})",
{"$.key1", "$.key3"},
R"({"key2": "value2"})", // Expected result
"Remove multiple top-level keys from object"},
JsonRemoveTestParam{
R"({"nested": {"a": 1, "b": 2, "c": 3}, "other": "value"})",
{"$.nested.b", "$.nested.c"},
R"({"nested": {"a": 1}, "other": "value"})", // Expected result
"Remove multiple nested keys from object"},
JsonRemoveTestParam{
R"({"array": [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}, {"id": 3, "name": "Charlie"}]})",
{"$.array[1]"},
R"({"array": [{"id": 1, "name": "Alice"}, {"id": 3, "name": "Charlie"}]})", // Expected result
"Remove object from array"},
JsonRemoveTestParam{
R"({"complex": {"users": [{"id": 1}, {"id": 2}], "settings": {"theme": "dark", "lang": "en"}}})",
{"$.complex.users[0]", "$.complex.settings.theme"},
R"({"complex": {"users": [{"id": 2}], "settings": {"lang": "en"}}})", // Expected result
"Remove mixed paths from complex nested structure"},
JsonRemoveTestParam{
R"({"a.b": {"c": 1}, "d": 2})",
{"$.\"a.b\""},
R"({"d": 2})", // Expected result
"Remove key from object where path contains a dot character"},
JsonRemoveTestParam{
R"({"a.b": {"c": 1}, "d": 2})",
{"$.\"a.b\".c"},
R"({"a.b": {}, "d": 2})", // Expected result
"Remove key from object where path contains a dot character"},
JsonRemoveTestParam{
R"({"outer": {"a.b": {"x": 10, "y": 20}}, "other": 5})",
{"$.outer.\"a.b\".y"},
R"({"outer": {"a.b": {"x": 10}}, "other": 5})", // Expected result
"Remove nested key where intermediate path contains a dot character"}
));
// clang-format on
} // namespace starrocks

View File

@ -0,0 +1,88 @@
---
displayed_sidebar: docs
---
# json_remove
Removes data from a JSON document at one or more specified JSON paths and returns the modified JSON document.
:::tip
All of the JSON functions and operators are listed in the navigation and on the [overview page](../overview-of-json-functions-and-operators.md)
:::
## Syntax
```Haskell
json_remove(json_object_expr, json_path[, json_path] ...)
```
## Parameters
- `json_object_expr`: the expression that represents the JSON object. The object can be a JSON column, or a JSON object that is produced by a JSON constructor function such as PARSE_JSON.
- `json_path`: one or more expressions that represent the paths to elements in the JSON object that should be removed. The value of each parameter is a string. For information about the JSON path syntax that is supported by StarRocks, see [Overview of JSON functions and operators](../overview-of-json-functions-and-operators.md).
## Return value
Returns a JSON document with the specified paths removed.
> - If a path does not exist in the JSON document, it is ignored.
> - If an invalid path is provided, it is ignored.
> - If all paths are invalid or non-existent, the original JSON document is returned unchanged.
## Examples
Example 1: Remove a single key from a JSON object.
```plaintext
mysql> SELECT json_remove('{"a": 1, "b": [10, 20, 30]}', '$.a');
-> {"b": [10, 20, 30]}
```
Example 2: Remove multiple keys from a JSON object.
```plaintext
mysql> SELECT json_remove('{"a": 1, "b": [10, 20, 30], "c": "test"}', '$.a', '$.c');
-> {"b": [10, 20, 30]}
```
Example 3: Remove array elements from a JSON object.
```plaintext
mysql> SELECT json_remove('{"a": 1, "b": [10, 20, 30]}', '$.b[1]');
-> {"a": 1, "b": [10, 30]}
```
Example 4: Remove nested object properties.
```plaintext
mysql> SELECT json_remove('{"a": {"x": 1, "y": 2}, "b": 3}', '$.a.x');
-> {"a": {"y": 2}, "b": 3}
```
Example 5: Attempt to remove non-existent paths (ignored).
```plaintext
mysql> SELECT json_remove('{"a": 1, "b": 2}', '$.c', '$.d');
-> {"a": 1, "b": 2}
```
Example 6: Remove multiple paths including non-existent ones.
```plaintext
mysql> SELECT json_remove('{"a": 1, "b": 2, "c": 3}', '$.a', '$.nonexistent', '$.c');
-> {"b": 2}
```
## Usage notes
- The `json_remove` function follows MySQL-compatible behavior.
- Invalid JSON paths are silently ignored rather than causing errors.
- The function supports removing multiple paths in a single operation, which is more efficient than multiple separate operations.
- Currently, the function supports simple object key removal (e.g., `$.key`). Support for complex nested paths and array element removal may be limited in the current implementation.

View File

@ -32,6 +32,7 @@ JSON query functions and processing functions are used to query and process JSON
| [get_json_int](./json-query-and-processing-functions/get_json_int.md) | Analyzes and gets the integer value from a specified path in a JSON string. | `SELECT get_json_int('{"k1":1, "k2":"2"}', "$.k1");` | `1` |
| [get_json_string](./json-query-and-processing-functions/get_json_string.md) | Analyzes and gets the strings from a specified path in a JSON string. | `SELECT get_json_string('{"k1":"v1", "k2":"v2"}', "$.k1");` | `v1` |
| [json_query](./json-query-and-processing-functions/json_query.md) | Queries the value of an element that can be located by a path expression in a JSON object. | `SELECT JSON_QUERY('{"a": 1}', '$.a');` | `1` |
| [json_remove](./json-query-and-processing-functions/json_remove.md) | Removes data from a JSON document at one or more specified JSON paths. | `SELECT JSON_REMOVE('{"a": 1, "b": [10, 20, 30]}', '$.a', '$.b[1]');` | `{"b": [10, 30]}` |
| [json_each](./json-query-and-processing-functions/json_each.md) | Expands the top-level elements of a JSON object into key-value pairs. | `SELECT * FROM tj_test, LATERAL JSON_EACH(j);` | `!`[json_each](../../../_assets/json_each.png) |
| [json_exists](./json-query-and-processing-functions/json_exists.md) | Checks whether a JSON object contains an element that can be located by a path expression. If the element exists, this function returns 1. If the element does not exist, the function returns 0. | `SELECT JSON_EXISTS('{"a": 1}', '$.a'); ` | `1` |
| [json_keys](./json-query-and-processing-functions/json_keys.md) | Returns the top-level keys from a JSON object as a JSON array, or, if a path is specified, the top-level keys from the path. | `SELECT JSON_KEYS('{"a": 1, "b": 2, "c": 3}');` | `["a", "b", "c"]`|

View File

@ -0,0 +1,88 @@
---
displayed_sidebar: docs
---
# json_remove
指定されたJSONパスからデータを削除し、修正されたJSONドキュメントを返します。
:::tip
すべてのJSON関数と演算子は、ナビゲーションと[概要ページ](../overview-of-json-functions-and-operators.md)に記載されています
:::
## 構文
```Haskell
json_remove(json_object_expr, json_path[, json_path] ...)
```
## パラメータ
- `json_object_expr`: JSONオブジェクトを表す式。オブジェクトはJSONカラム、またはPARSE_JSONなどのJSONコンストラクタ関数によって生成されたJSONオブジェクトです。
- `json_path`: JSONオブジェクト内の削除すべき要素へのパスを表す1つ以上の式。各パラメータの値は文字列です。StarRocksでサポートされているJSONパス構文については、[JSON関数と演算子の概要](../overview-of-json-functions-and-operators.md)を参照してください。
## 戻り値
指定されたパスが削除されたJSONドキュメントを返します。
> - パスがJSONドキュメントに存在しない場合、無視されます。
> - 無効なパスが提供された場合、無視されます。
> - すべてのパスが無効または存在しない場合、元のJSONドキュメントが変更されずに返されます。
## 例
例1: JSONオブジェクトから単一のキーを削除します。
```plaintext
mysql> SELECT json_remove('{"a": 1, "b": [10, 20, 30]}', '$.a');
-> {"b": [10, 20, 30]}
```
例2: JSONオブジェクトから複数のキーを削除します。
```plaintext
mysql> SELECT json_remove('{"a": 1, "b": [10, 20, 30], "c": "test"}', '$.a', '$.c');
-> {"b": [10, 20, 30]}
```
例3: JSONオブジェクトから配列要素を削除します。
```plaintext
mysql> SELECT json_remove('{"a": 1, "b": [10, 20, 30]}', '$.b[1]');
-> {"a": 1, "b": [10, 30]}
```
例4: ネストされたオブジェクトのプロパティを削除します。
```plaintext
mysql> SELECT json_remove('{"a": {"x": 1, "y": 2}, "b": 3}', '$.a.x');
-> {"a": {"y": 2}, "b": 3}
```
例5: 存在しないパスの削除を試行します(無視されます)。
```plaintext
mysql> SELECT json_remove('{"a": 1, "b": 2}', '$.c', '$.d');
-> {"a": 1, "b": 2}
```
例6: 存在しないパスを含む複数のパスを削除します。
```plaintext
mysql> SELECT json_remove('{"a": 1, "b": 2, "c": 3}', '$.a', '$.nonexistent', '$.c');
-> {"b": 2}
```
## 使用上の注意
- `json_remove`関数はMySQL互換の動作に従います。
- 無効なJSONパスはエラーを発生させることなく静かに無視されます。
- この関数は単一の操作で複数のパスを削除することをサポートしており、複数の個別操作よりも効率的です。
- 現在、この関数は単純なオブジェクトキーの削除(例:`$.key`)をサポートしています。複雑なネストされたパスや配列要素の削除のサポートは、現在の実装では制限される場合があります。

View File

@ -0,0 +1,88 @@
---
displayed_sidebar: docs
---
# json_remove
从JSON文档中删除一个或多个指定JSON路径的数据并返回修改后的JSON文档。
:::tip
所有JSON函数和运算符都列在导航和[概述页面](../overview-of-json-functions-and-operators.md)中
:::
## 语法
```Haskell
json_remove(json_object_expr, json_path[, json_path] ...)
```
## 参数
- `json_object_expr`: 表示JSON对象的表达式。对象可以是JSON列或由JSON构造函数如PARSE_JSON生成的JSON对象。
- `json_path`: 一个或多个表示JSON对象中应删除元素路径的表达式。每个参数的值都是字符串。有关StarRocks支持的JSON路径语法信息请参阅[JSON函数和运算符概述](../overview-of-json-functions-and-operators.md)。
## 返回值
返回删除了指定路径的JSON文档。
> - 如果路径在JSON文档中不存在则会被忽略。
> - 如果提供了无效路径,则会被忽略。
> - 如果所有路径都无效或不存在则返回未更改的原始JSON文档。
## 示例
示例1: 从JSON对象中删除单个键。
```plaintext
mysql> SELECT json_remove('{"a": 1, "b": [10, 20, 30]}', '$.a');
-> {"b": [10, 20, 30]}
```
示例2: 从JSON对象中删除多个键。
```plaintext
mysql> SELECT json_remove('{"a": 1, "b": [10, 20, 30], "c": "test"}', '$.a', '$.c');
-> {"b": [10, 20, 30]}
```
示例3: 从JSON对象中删除数组元素。
```plaintext
mysql> SELECT json_remove('{"a": 1, "b": [10, 20, 30]}', '$.b[1]');
-> {"a": 1, "b": [10, 30]}
```
示例4: 删除嵌套对象属性。
```plaintext
mysql> SELECT json_remove('{"a": {"x": 1, "y": 2}, "b": 3}', '$.a.x');
-> {"a": {"y": 2}, "b": 3}
```
示例5: 尝试删除不存在的路径(被忽略)。
```plaintext
mysql> SELECT json_remove('{"a": 1, "b": 2}', '$.c', '$.d');
-> {"a": 1, "b": 2}
```
示例6: 删除多个路径,包括不存在的路径。
```plaintext
mysql> SELECT json_remove('{"a": 1, "b": 2, "c": 3}', '$.a', '$.nonexistent', '$.c');
-> {"b": 2}
```
## 使用说明
- `json_remove`函数遵循MySQL兼容的行为。
- 无效的JSON路径会被静默忽略而不是导致错误。
- 该函数支持在单个操作中删除多个路径,这比多个单独操作更高效。
- 目前,该函数支持简单的对象键删除(例如,`$.key`)。复杂嵌套路径和数组元素删除的支持在当前实现中可能有限。

View File

@ -236,6 +236,7 @@ public class FunctionSet {
public static final String GET_JSON_STRING = "get_json_string";
public static final String GET_JSON_OBJECT = "get_json_object";
public static final String JSON_LENGTH = "json_length";
public static final String JSON_REMOVE = "json_remove";
// Matching functions:
public static final String ILIKE = "ilike";

View File

@ -54,6 +54,7 @@ public class PruneSubfieldRule extends TransformationRule {
.add(FunctionSet.JSON_QUERY)
.add(FunctionSet.JSON_EXISTS)
.add(FunctionSet.JSON_LENGTH)
.add(FunctionSet.JSON_REMOVE)
.build();
public static final List<String> PRUNE_FUNCTIONS = ImmutableList.<String>builder()

View File

@ -872,6 +872,8 @@ vectorized_functions = [
[110018, "json_keys", False, False, "JSON", ["JSON"], "JsonFunctions::json_keys"],
[110019, "json_keys", False, False, "JSON", ["JSON", "VARCHAR"], "JsonFunctions::json_keys",
"JsonFunctions::native_json_path_prepare", "JsonFunctions::native_json_path_close"],
[110024, "json_remove", False, False, "JSON", ["JSON", "VARCHAR", "..."], "JsonFunctions::json_remove",
"JsonFunctions::native_json_path_prepare", "JsonFunctions::native_json_path_close"],
[110100, "to_json", False, False, "JSON", ["ANY_MAP"], "JsonFunctions::to_json"],
[110101, "to_json", False, False, "JSON", ["ANY_STRUCT"], "JsonFunctions::to_json"],

View File

@ -0,0 +1,148 @@
-- name: test_json_remove
CREATE DATABASE test_json_remove;
-- result:
-- !result
USE test_json_remove;
-- result:
-- !result
SELECT json_remove('{"a": 1, "b": 2, "c": 3}', '$.a') as remove_single_key;
-- result:
{"b": 2, "c": 3}
-- !result
SELECT json_remove('{"a": 1, "b": 2, "c": 3}', '$.a', '$.c') as remove_multiple_keys;
-- result:
{"b": 2}
-- !result
SELECT json_remove('{"a": 1, "b": [10, 20, 30]}', '$.b[1]') as remove_array_element;
-- result:
{"a": 1, "b": [10, 30]}
-- !result
SELECT json_remove('{"a": {"x": 1, "y": 2}, "b": 3}', '$.a.x') as remove_nested_key;
-- result:
{"a": {"y": 2}, "b": 3}
-- !result
SELECT json_remove('{"level1": {"level2": {"level3": "value"}}}', '$.level1.level2') as remove_nested_object;
-- result:
{"level1": {}}
-- !result
SELECT json_remove('{"a": 1, "b": 2}', '$.nonexistent') as remove_nonexistent_key;
-- result:
{"a": 1, "b": 2}
-- !result
SELECT json_remove('{"a": 1, "b": 2}', 'invalid_path') as remove_invalid_path;
-- result:
{"a": 1, "b": 2}
-- !result
SELECT json_remove('{"a": 1, "b": 2}', '$.a', '$.nonexistent', '$.b') as remove_mixed_paths;
-- result:
{}
-- !result
SELECT json_remove('{}', '$.a') as remove_from_empty_object;
-- result:
{}
-- !result
SELECT json_remove('{"a": 1}', '$.a') as remove_all_keys;
-- result:
{}
-- !result
SELECT json_remove('{"a": []}', '$.a') as remove_empty_array;
-- result:
{}
-- !result
SELECT json_remove('{"str": "hello", "num": 42, "bool": true, "null": null}', '$.str', '$.bool') as remove_various_types;
-- result:
{"null": null, "num": 42}
-- !result
SELECT json_remove('{"users": [{"id": 1, "name": "John"}, {"id": 2, "name": "Jane"}], "count": 2}', '$.count') as remove_from_complex;
-- result:
{"users": [{"id": 1, "name": "John"}, {"id": 2, "name": "Jane"}]}
-- !result
CREATE TABLE json_test_table (
id INT,
json_data JSON
) DUPLICATE KEY(id);
-- result:
-- !result
INSERT INTO json_test_table VALUES
(1, parse_json('{"name": "Alice", "age": 30, "city": "New York"}')),
(2, parse_json('{"name": "Bob", "age": 25, "city": "San Francisco", "hobbies": ["reading", "gaming"]}')),
(3, parse_json('{"product": "laptop", "price": 999.99, "specs": {"cpu": "Intel", "ram": "16GB"}}')),
(4, parse_json('{"empty": {}, "array": [1, 2, 3, 4, 5]}')),
(5, parse_json('null'));
-- result:
-- !result
SELECT id, json_remove(json_data, '$.age') as without_age FROM json_test_table WHERE id = 1;
-- result:
1 {"city": "New York", "name": "Alice"}
-- !result
SELECT id, json_remove(json_data, '$.city', '$.hobbies') as filtered_data FROM json_test_table WHERE id = 2;
-- result:
2 {"age": 25, "name": "Bob"}
-- !result
SELECT id, json_remove(json_data, '$.specs.ram') as without_ram FROM json_test_table WHERE id = 3;
-- result:
3 {"price": 999.99, "product": "laptop", "specs": {"cpu": "Intel"}}
-- !result
SELECT id, json_remove(json_data, '$.array[2]', '$.array[3]') as removed_elements FROM json_test_table WHERE id = 4;
-- result:
4 {"array": [1, 2, 5], "empty": {}}
-- !result
SELECT id, json_remove(json_data, '$.any') as null_result FROM json_test_table WHERE id = 5;
-- result:
5 "null"
-- !result
SELECT id, json_remove(json_data, '$.name', '$.city') as minimal_data FROM json_test_table WHERE id <= 2 ORDER BY id;
-- result:
1 {"age": 30}
2 {"age": 25, "hobbies": ["reading", "gaming"]}
-- !result
SELECT json_length(json_remove('{"a": 1, "b": 2, "c": 3}', '$.b')) as length_after_remove;
-- result:
2
-- !result
SELECT json_keys(json_remove('{"x": 1, "y": 2, "z": 3}', '$.y')) as keys_after_remove;
-- result:
["x", "z"]
-- !result
SELECT json_exists(json_remove('{"a": 1, "b": 2}', '$.a'), '$.a') as exists_after_remove;
-- result:
0
-- !result
SELECT json_remove(parse_json('{"temp": "data", "keep": "this"}'), '$.temp') as parsed_and_removed;
-- result:
{"keep": "this"}
-- !result
SELECT json_remove('{"a": 1}', '$') as root_path;
-- result:
{"a": 1}
-- !result
SELECT json_remove('{"a": 1}', '$.') as dot_path;
-- result:
{"a": 1}
-- !result
SELECT json_remove('{"a": {"b": {"c": {"d": {"e": "deep"}}}}}', '$.a.b.c.d') as remove_deep_nested;
-- result:
{"a": {"b": {"c": {}}}}
-- !result
SELECT json_remove('{"arr": [0, 1, 2, 3, 4]}', '$.arr[0]') as remove_first_element;
-- result:
{"arr": [1, 2, 3, 4]}
-- !result
SELECT json_remove('{"arr": [0, 1, 2, 3, 4]}', '$.arr[4]') as remove_last_element;
-- result:
{"arr": [0, 1, 2, 3]}
-- !result
SELECT json_remove('{"arr": [0, 1, 2, 3, 4]}', '$.arr[2]') as remove_middle_element;
-- result:
{"arr": [0, 1, 3, 4]}
-- !result
SELECT json_remove('{"key with spaces": "value", "normal": "data"}', '$."key with spaces"') as remove_special_key;
-- result:
{"normal": "data"}
-- !result
DROP TABLE json_test_table;
-- result:
-- !result
DROP DATABASE test_json_remove;
-- result:
-- !result

View File

@ -0,0 +1,81 @@
-- name: test_json_remove
CREATE DATABASE test_json_remove;
USE test_json_remove;
-- Test basic json_remove functionality with literal JSON strings
SELECT json_remove('{"a": 1, "b": 2, "c": 3}', '$.a') as remove_single_key;
SELECT json_remove('{"a": 1, "b": 2, "c": 3}', '$.a', '$.c') as remove_multiple_keys;
SELECT json_remove('{"a": 1, "b": [10, 20, 30]}', '$.b[1]') as remove_array_element;
-- Test with nested objects
SELECT json_remove('{"a": {"x": 1, "y": 2}, "b": 3}', '$.a.x') as remove_nested_key;
SELECT json_remove('{"level1": {"level2": {"level3": "value"}}}', '$.level1.level2') as remove_nested_object;
-- Test edge cases
SELECT json_remove('{"a": 1, "b": 2}', '$.nonexistent') as remove_nonexistent_key;
SELECT json_remove('{"a": 1, "b": 2}', 'invalid_path') as remove_invalid_path;
SELECT json_remove('{"a": 1, "b": 2}', '$.a', '$.nonexistent', '$.b') as remove_mixed_paths;
-- Test with empty objects and arrays
SELECT json_remove('{}', '$.a') as remove_from_empty_object;
SELECT json_remove('{"a": 1}', '$.a') as remove_all_keys;
SELECT json_remove('{"a": []}', '$.a') as remove_empty_array;
-- Test with various JSON data types
SELECT json_remove('{"str": "hello", "num": 42, "bool": true, "null": null}', '$.str', '$.bool') as remove_various_types;
-- Test with complex nested structures
SELECT json_remove('{"users": [{"id": 1, "name": "John"}, {"id": 2, "name": "Jane"}], "count": 2}', '$.count') as remove_from_complex;
-- Create a table to test with JSON columns
CREATE TABLE json_test_table (
id INT,
json_data JSON
) DUPLICATE KEY(id);
-- Insert test data
INSERT INTO json_test_table VALUES
(1, parse_json('{"name": "Alice", "age": 30, "city": "New York"}')),
(2, parse_json('{"name": "Bob", "age": 25, "city": "San Francisco", "hobbies": ["reading", "gaming"]}')),
(3, parse_json('{"product": "laptop", "price": 999.99, "specs": {"cpu": "Intel", "ram": "16GB"}}')),
(4, parse_json('{"empty": {}, "array": [1, 2, 3, 4, 5]}')),
(5, parse_json('null'));
-- Test json_remove with table data
SELECT id, json_remove(json_data, '$.age') as without_age FROM json_test_table WHERE id = 1;
SELECT id, json_remove(json_data, '$.city', '$.hobbies') as filtered_data FROM json_test_table WHERE id = 2;
SELECT id, json_remove(json_data, '$.specs.ram') as without_ram FROM json_test_table WHERE id = 3;
SELECT id, json_remove(json_data, '$.array[2]', '$.array[3]') as removed_elements FROM json_test_table WHERE id = 4;
-- Test with NULL JSON data
SELECT id, json_remove(json_data, '$.any') as null_result FROM json_test_table WHERE id = 5;
-- Test json_remove with multiple paths on table data
SELECT id, json_remove(json_data, '$.name', '$.city') as minimal_data FROM json_test_table WHERE id <= 2 ORDER BY id;
-- Test combining json_remove with other JSON functions
SELECT json_length(json_remove('{"a": 1, "b": 2, "c": 3}', '$.b')) as length_after_remove;
SELECT json_keys(json_remove('{"x": 1, "y": 2, "z": 3}', '$.y')) as keys_after_remove;
SELECT json_exists(json_remove('{"a": 1, "b": 2}', '$.a'), '$.a') as exists_after_remove;
-- Test with parse_json
SELECT json_remove(parse_json('{"temp": "data", "keep": "this"}'), '$.temp') as parsed_and_removed;
-- Test error handling and edge cases
SELECT json_remove('{"a": 1}', '$') as root_path;
SELECT json_remove('{"a": 1}', '$.') as dot_path;
-- Test with deeply nested structure
SELECT json_remove('{"a": {"b": {"c": {"d": {"e": "deep"}}}}}', '$.a.b.c.d') as remove_deep_nested;
-- Test removing from arrays at different positions
SELECT json_remove('{"arr": [0, 1, 2, 3, 4]}', '$.arr[0]') as remove_first_element;
SELECT json_remove('{"arr": [0, 1, 2, 3, 4]}', '$.arr[4]') as remove_last_element;
SELECT json_remove('{"arr": [0, 1, 2, 3, 4]}', '$.arr[2]') as remove_middle_element;
-- Test with string values containing special characters
SELECT json_remove('{"key with spaces": "value", "normal": "data"}', '$."key with spaces"') as remove_special_key;
-- Clean up
DROP TABLE json_test_table;
DROP DATABASE test_json_remove;