Compare commits

...

1 Commits

Author SHA1 Message Date
Cursor Agent efb4671148 Add json_update function to JSON manipulation capabilities
Co-authored-by: huanmingwong <huanmingwong@gmail.com>
2025-07-31 02:40:09 +00:00
7 changed files with 450 additions and 0 deletions

View File

@ -1156,6 +1156,194 @@ StatusOr<ColumnPtr> JsonFunctions::_json_keys_without_path(FunctionContext* cont
return result.build(ColumnHelper::is_all_const(columns));
}
StatusOr<ColumnPtr> JsonFunctions::json_update(FunctionContext* context, const Columns& columns) {
RETURN_IF_COLUMNS_ONLY_NULL(columns);
if (columns.size() != 3) {
return Status::InvalidArgument("json_update function requires exactly 3 arguments");
}
auto json_column = down_cast<const JsonColumn*>(columns[0].get());
auto path_column = down_cast<const BinaryColumn*>(columns[1].get());
auto value_column = down_cast<const JsonColumn*>(columns[2].get());
if (json_column == nullptr || path_column == nullptr || value_column == nullptr) {
return Status::InvalidArgument("Invalid column types for json_update function");
}
size_t num_rows = json_column->size();
JsonColumn::Builder result;
for (size_t i = 0; i < num_rows; i++) {
if (json_column->is_null(i) || path_column->is_null(i) || value_column->is_null(i)) {
result.append_null();
continue;
}
// Get the JSON object, path, and new value
JsonValue json_value = json_column->get_object(i);
Slice path_slice = path_column->get_slice(i);
JsonValue new_value = value_column->get_object(i);
std::string path_str = path_slice.to_string();
// Parse the JSON path
std::vector<SimpleJsonPath> parsed_paths;
std::vector<std::string> path_exprs;
// Handle JSON path parsing - support both $.a.b and a.b formats
if (path_str.empty()) {
result.append_null();
continue;
}
// Remove leading $ if present
if (path_str[0] == '$') {
if (path_str.length() > 1 && path_str[1] == '.') {
path_str = path_str.substr(2);
} else if (path_str.length() == 1) {
// Just $ means root, replace entire document
result.append(new_value);
continue;
} else {
path_str = path_str.substr(1);
}
}
// Split path by dots, but handle array indices
std::string current_token;
for (size_t j = 0; j < path_str.length(); j++) {
char c = path_str[j];
if (c == '.') {
if (!current_token.empty()) {
path_exprs.push_back(current_token);
current_token.clear();
}
} else {
current_token += c;
}
}
if (!current_token.empty()) {
path_exprs.push_back(current_token);
}
Status parse_status = _get_parsed_paths(path_exprs, &parsed_paths);
if (!parse_status.ok()) {
result.append_null();
continue;
}
// Update the JSON at the specified path
try {
vpack::Slice original_slice = json_value.to_vslice();
vpack::Builder updated_builder;
_update_json_at_path(original_slice, parsed_paths, 0, new_value.to_vslice(), updated_builder);
vpack::Slice updated_slice = updated_builder.slice();
result.append(JsonValue(updated_slice));
} catch (const std::exception& e) {
result.append_null();
}
}
return result.build(ColumnHelper::is_all_const(columns));
}
void JsonFunctions::_update_json_at_path(const vpack::Slice& original,
const std::vector<SimpleJsonPath>& paths,
size_t path_index,
const vpack::Slice& new_value,
vpack::Builder& builder) {
if (path_index >= paths.size()) {
// Reached the end of the path, set the new value
builder.add(new_value);
return;
}
const SimpleJsonPath& current_path = paths[path_index];
if (original.isObject()) {
// Handle object update
builder.openObject();
bool key_found = false;
for (auto it : vpack::ObjectIterator(original)) {
std::string key = it.key.copyString();
if (key == current_path.key) {
// This is the key we want to update
builder.add(vpack::Value(key));
if (path_index == paths.size() - 1) {
// Last path element, set the new value
builder.add(new_value);
} else {
// Recurse deeper
_update_json_at_path(it.value, paths, path_index + 1, new_value, builder);
}
key_found = true;
} else {
// Copy existing key-value pair
builder.add(vpack::Value(key));
builder.add(it.value);
}
}
// If key wasn't found and this is the last path element, add it
if (!key_found && path_index == paths.size() - 1) {
builder.add(vpack::Value(current_path.key));
builder.add(new_value);
} else if (!key_found) {
// Key not found but not at end of path - create intermediate object
builder.add(vpack::Value(current_path.key));
vpack::Builder intermediate_builder;
intermediate_builder.openObject();
intermediate_builder.close();
_update_json_at_path(intermediate_builder.slice(), paths, path_index + 1, new_value, builder);
}
builder.close();
} else if (original.isArray() && current_path.idx >= 0) {
// Handle array update
builder.openArray();
vpack::ArrayIterator array_it(original);
size_t index = 0;
while (array_it.valid()) {
if (index == static_cast<size_t>(current_path.idx)) {
// This is the index we want to update
if (path_index == paths.size() - 1) {
// Last path element, set the new value
builder.add(new_value);
} else {
// Recurse deeper
_update_json_at_path(array_it.value(), paths, path_index + 1, new_value, builder);
}
} else {
// Copy existing array element
builder.add(array_it.value());
}
array_it.next();
index++;
}
// If index is beyond current array size and this is the last path element
if (static_cast<size_t>(current_path.idx) >= index && path_index == paths.size() - 1) {
// Extend array with nulls if necessary
while (index < static_cast<size_t>(current_path.idx)) {
builder.add(vpack::Value(vpack::ValueType::Null));
index++;
}
builder.add(new_value);
}
builder.close();
} else {
// Invalid path or type mismatch, return original
builder.add(original);
}
}
StatusOr<ColumnPtr> JsonFunctions::to_json(FunctionContext* context, const Columns& columns) {
RETURN_IF_COLUMNS_ONLY_NULL(columns);
return cast_nested_to_json(columns[0], context->allow_throw_exception());

View File

@ -157,6 +157,14 @@ public:
*/
DEFINE_VECTORIZED_FN(to_json);
/**
* Update a JSON object by setting the value at the specified path
* @param: [json_object, json_path, new_value]
* @paramType: [JsonColumn, BinaryColumn, JsonColumn]
* @return: JsonColumn
*/
DEFINE_VECTORIZED_FN(json_update);
static Status native_json_path_prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope);
static Status native_json_path_close(FunctionContext* context, FunctionContext::FunctionStateScope scope);
@ -223,6 +231,12 @@ private:
static Status _get_parsed_paths(const std::vector<std::string>& path_exprs,
std::vector<SimpleJsonPath>* parsed_paths);
static void _update_json_at_path(const vpack::Slice& original,
const std::vector<SimpleJsonPath>& paths,
size_t path_index,
const vpack::Slice& new_value,
vpack::Builder& builder);
};
} // namespace starrocks

View File

@ -1665,4 +1665,125 @@ TEST_F(JsonFunctionsTest, query_json_obj) {
ASSERT_EQ(result->debug_string(), "[0]");
}
// Test cases for json_update function
TEST_F(JsonFunctionsTest, json_update_basic) {
// Test basic object update
auto json_col = JsonColumn::create();
auto path_col = BinaryColumn::create();
auto value_col = JsonColumn::create();
json_col->append(JsonValue::parse(R"({"a": 1, "b": 2})").value());
path_col->append("a");
value_col->append(JsonValue::parse("42").value());
auto result = JsonFunctions::json_update(_ctx.get(), {json_col, path_col, value_col});
ASSERT_TRUE(result.ok());
auto result_col = std::static_pointer_cast<JsonColumn>(result.value());
ASSERT_EQ(result_col->size(), 1);
auto result_json = result_col->get_object(0);
EXPECT_EQ(result_json.to_string(), R"({"a":42,"b":2})");
}
TEST_F(JsonFunctionsTest, json_update_nested_object) {
// Test nested object update
auto json_col = JsonColumn::create();
auto path_col = BinaryColumn::create();
auto value_col = JsonColumn::create();
json_col->append(JsonValue::parse(R"({"a": {"b": 1, "c": 2}, "d": 3})").value());
path_col->append("a.b");
value_col->append(JsonValue::parse("99").value());
auto result = JsonFunctions::json_update(_ctx.get(), {json_col, path_col, value_col});
ASSERT_TRUE(result.ok());
auto result_col = std::static_pointer_cast<JsonColumn>(result.value());
ASSERT_EQ(result_col->size(), 1);
auto result_json = result_col->get_object(0);
EXPECT_EQ(result_json.to_string(), R"({"a":{"b":99,"c":2},"d":3})");
}
TEST_F(JsonFunctionsTest, json_update_array) {
// Test array element update
auto json_col = JsonColumn::create();
auto path_col = BinaryColumn::create();
auto value_col = JsonColumn::create();
json_col->append(JsonValue::parse(R"({"arr": [1, 2, 3]})").value());
path_col->append("arr[1]");
value_col->append(JsonValue::parse("99").value());
auto result = JsonFunctions::json_update(_ctx.get(), {json_col, path_col, value_col});
ASSERT_TRUE(result.ok());
auto result_col = std::static_pointer_cast<JsonColumn>(result.value());
ASSERT_EQ(result_col->size(), 1);
auto result_json = result_col->get_object(0);
EXPECT_EQ(result_json.to_string(), R"({"arr":[1,99,3]})");
}
TEST_F(JsonFunctionsTest, json_update_new_key) {
// Test adding new key to object
auto json_col = JsonColumn::create();
auto path_col = BinaryColumn::create();
auto value_col = JsonColumn::create();
json_col->append(JsonValue::parse(R"({"a": 1})").value());
path_col->append("b");
value_col->append(JsonValue::parse("\"new_value\"").value());
auto result = JsonFunctions::json_update(_ctx.get(), {json_col, path_col, value_col});
ASSERT_TRUE(result.ok());
auto result_col = std::static_pointer_cast<JsonColumn>(result.value());
ASSERT_EQ(result_col->size(), 1);
auto result_json = result_col->get_object(0);
EXPECT_EQ(result_json.to_string(), R"({"a":1,"b":"new_value"})");
}
TEST_F(JsonFunctionsTest, json_update_null_handling) {
// Test null handling
auto json_col = JsonColumn::create();
auto path_col = BinaryColumn::create();
auto value_col = JsonColumn::create();
json_col->append_null();
path_col->append("a");
value_col->append(JsonValue::parse("1").value());
auto result = JsonFunctions::json_update(_ctx.get(), {json_col, path_col, value_col});
ASSERT_TRUE(result.ok());
auto result_col = std::static_pointer_cast<JsonColumn>(result.value());
ASSERT_EQ(result_col->size(), 1);
ASSERT_TRUE(result_col->is_null(0));
}
TEST_F(JsonFunctionsTest, json_update_invalid_path) {
// Test invalid path handling
auto json_col = JsonColumn::create();
auto path_col = BinaryColumn::create();
auto value_col = JsonColumn::create();
json_col->append(JsonValue::parse(R"({"a": 1})").value());
path_col->append("b.c.d"); // Path doesn't exist
value_col->append(JsonValue::parse("99").value());
auto result = JsonFunctions::json_update(_ctx.get(), {json_col, path_col, value_col});
ASSERT_TRUE(result.ok());
auto result_col = std::static_pointer_cast<JsonColumn>(result.value());
ASSERT_EQ(result_col->size(), 1);
// Should return original JSON when path is invalid
auto result_json = result_col->get_object(0);
EXPECT_EQ(result_json.to_string(), R"({"a":1})");
}
} // namespace starrocks

View File

@ -0,0 +1,78 @@
# json_update
Updates a JSON object by setting the value at the specified path and returns the modified JSON object.
tip
All of the JSON functions and operators are listed in the navigation and on the overview page
Accelerate your queries with generated columns
## Syntax
```
json_update(json_object_expr, json_path, new_value)
```
## Parameters
* `json_object_expr`: the expression that represents the JSON object. The object can be a JSON column, or a JSON object that is produced by a JSON constructor function such as PARSE_JSON.
* `json_path`: the expression that represents the path to an element in the JSON object. The value of this parameter is a string. For information about the JSON path syntax that is supported by StarRocks, see Overview of JSON functions and operators.
* `new_value`: the new JSON value to set at the specified path. This can be any valid JSON value.
## Return value
Returns a JSON object with the value at the specified path updated to the new value.
> If the path does not exist, the function will create the necessary structure to set the value at that path.
> If any of the input parameters is NULL, the function returns NULL.
## Examples
Example 1: Update a simple key in a JSON object.
```sql
mysql> SELECT json_update(PARSE_JSON('{"a": 1, "b": 2}'), 'a', PARSE_JSON('42'));
-> {"a": 42, "b": 2}
```
Example 2: Update a nested value in a JSON object.
```sql
mysql> SELECT json_update(PARSE_JSON('{"a": {"b": 1, "c": 2}, "d": 3}'), 'a.b', PARSE_JSON('99'));
-> {"a": {"b": 99, "c": 2}, "d": 3}
```
Example 3: Update an array element.
```sql
mysql> SELECT json_update(PARSE_JSON('{"arr": [1, 2, 3]}'), 'arr[1]', PARSE_JSON('99'));
-> {"arr": [1, 99, 3]}
```
Example 4: Add a new key to a JSON object.
```sql
mysql> SELECT json_update(PARSE_JSON('{"a": 1}'), 'b', PARSE_JSON('"new_value"'));
-> {"a": 1, "b": "new_value"}
```
Example 5: Update using a JSON path with root notation.
```sql
mysql> SELECT json_update(PARSE_JSON('{"a": 1, "b": 2}'), '$.a', PARSE_JSON('100'));
-> {"a": 100, "b": 2}
```
## Usage notes
* The function supports both simple path notation (e.g., `'a.b'`) and JSON path notation with root symbol (e.g., `'$.a.b'`).
* Array indices are specified using square brackets (e.g., `'arr[0]'`).
* If the specified path doesn't exist, the function will create the necessary intermediate objects.
* The function preserves the original structure of the JSON object, only modifying the specified path.
* This function is useful for updating specific fields in JSON documents stored in tables without having to reconstruct the entire JSON object.

View File

@ -37,6 +37,7 @@ JSON query functions and processing functions are used to query and process JSON
| [json_keys](./json-query-and-processing-functions/json_keys.md) | Returns the top-level keys from a JSON object as a JSON array, or, if a path is specified, the top-level keys from the path. | `SELECT JSON_KEYS('{"a": 1, "b": 2, "c": 3}');` | `["a", "b", "c"]`|
| [json_length](./json-query-and-processing-functions/json_length.md) | Returns the length of a JSON document. | `SELECT json_length('{"Name": "Alice"}');` | `1` |
| [json_string](./json-query-and-processing-functions/json_string.md) | Converts the JSON object to a JSON string | `SELECT json_string(parse_json('{"Name": "Alice"}'));` | `{"Name": "Alice"}` |
| [json_update](./json-query-and-processing-functions/json_update.md) | Updates a JSON object by setting the value at the specified path | `SELECT json_update(parse_json('{"a": 1}'), 'a', parse_json('2'));` | `{"a": 2}` |
## JSON operators

View File

@ -872,6 +872,8 @@ vectorized_functions = [
"JsonFunctions::native_json_path_prepare", "JsonFunctions::native_json_path_close"],
[110100, "to_json", False, False, "JSON", ["ANY_MAP"], "JsonFunctions::to_json"],
[110101, "to_json", False, False, "JSON", ["ANY_STRUCT"], "JsonFunctions::to_json"],
[110102, "json_update", False, False, "JSON", ["JSON", "VARCHAR", "JSON"], "JsonFunctions::json_update",
"JsonFunctions::native_json_path_prepare", "JsonFunctions::native_json_path_close"],
# aes and base64 function
[120100, "aes_encrypt", False, False, "VARCHAR", ["VARCHAR", "VARCHAR"], "EncryptionFunctions::aes_encrypt"],

View File

@ -0,0 +1,46 @@
-- Test cases for json_update function
-- This file demonstrates the usage of the newly implemented json_update function
-- Test 1: Basic object update
SELECT json_update(PARSE_JSON('{"a": 1, "b": 2}'), 'a', PARSE_JSON('42')) as test1;
-- Expected: {"a": 42, "b": 2}
-- Test 2: Nested object update
SELECT json_update(PARSE_JSON('{"a": {"b": 1, "c": 2}, "d": 3}'), 'a.b', PARSE_JSON('99')) as test2;
-- Expected: {"a": {"b": 99, "c": 2}, "d": 3}
-- Test 3: Array element update
SELECT json_update(PARSE_JSON('{"arr": [1, 2, 3]}'), 'arr[1]', PARSE_JSON('99')) as test3;
-- Expected: {"arr": [1, 99, 3]}
-- Test 4: Add new key
SELECT json_update(PARSE_JSON('{"a": 1}'), 'b', PARSE_JSON('"new_value"')) as test4;
-- Expected: {"a": 1, "b": "new_value"}
-- Test 5: JSON path with root notation
SELECT json_update(PARSE_JSON('{"a": 1, "b": 2}'), '$.a', PARSE_JSON('100')) as test5;
-- Expected: {"a": 100, "b": 2}
-- Test 6: Update with string value
SELECT json_update(PARSE_JSON('{"name": "John", "age": 30}'), 'name', PARSE_JSON('"Jane"')) as test6;
-- Expected: {"name": "Jane", "age": 30}
-- Test 7: Update with boolean value
SELECT json_update(PARSE_JSON('{"active": false, "count": 5}'), 'active', PARSE_JSON('true')) as test7;
-- Expected: {"active": true, "count": 5}
-- Test 8: Update with null value
SELECT json_update(PARSE_JSON('{"data": "value"}'), 'data', PARSE_JSON('null')) as test8;
-- Expected: {"data": null}
-- Test 9: Complex nested update
SELECT json_update(
PARSE_JSON('{"user": {"profile": {"name": "John", "settings": {"theme": "dark"}}}}'),
'user.profile.settings.theme',
PARSE_JSON('"light"')
) as test9;
-- Expected: {"user": {"profile": {"name": "John", "settings": {"theme": "light"}}}}
-- Test 10: NULL handling
SELECT json_update(NULL, 'a', PARSE_JSON('1')) as test10;
-- Expected: NULL