starrocks/thirdparty/patches/arrow-16.1.0-parquet-map-ke...

34 lines
1.3 KiB
Diff

diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index ec3890a41..f615b3f0a 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -564,10 +564,24 @@ Status MapToSchemaField(const GroupNode& group, LevelInfo current_levels,
return Status::Invalid("Key-value map node must have 1 or 2 child elements. Found: ",
key_value.field_count());
}
- const Node& key_node = *key_value.field(0);
- if (!key_node.is_required()) {
- return Status::Invalid("Map keys must be annotated as required.");
- }
+
+ // The map key generated by hive may be optional.
+ //
+ // required group field_id=-1 hive_schema {
+ // optional int32 field_id=-1 col_int;
+ // optional group field_id=-1 col_map (Map) {
+ // repeated group field_id=-1 map (Map) {
+ // optional byte_array field_id=-1 key (String);
+ // optional byte_array field_id=-1 value (String);
+ // }
+ // }
+ // }
+ //
+ // const Node& key_node = *key_value.field(0);
+ // if (!key_node.is_required()) {
+ // return Status::Invalid("Map keys must be annotated as required.");
+ // }
+
// Arrow doesn't support 1 column maps (i.e. Sets). The options are to either
// make the values column nullable, or process the map as a list. We choose the latter
// as it is simpler.