From 865b915962a784fd4b81a5df3537a34de991979e Mon Sep 17 00:00:00 2001 From: miomiocat <284487410@qq.com> Date: Fri, 28 Jul 2023 14:50:17 +0800 Subject: [PATCH] [Enhancement] improve error message handling of JNI connector and support char type of paimon connector (#28044) Fixes #27992 - fail fast for unsupported types of paimon table - output error massage of JNI to client terminal - support char type of paimon connector - some minor refactor Signed-off-by: miomiocat <284487410@qq.com> --- be/src/connector/hive_connector.cpp | 4 + be/src/exec/hdfs_scanner.h | 1 + be/src/exec/jni_scanner.cpp | 38 +++++---- be/src/exec/jni_scanner.h | 5 +- .../connector/ColumnTypeConverter.java | 55 +++++++------ .../paimon/PaimonColumnConverterTest.java | 79 +++++++++++++++---- .../starrocks/jni/connector/ColumnType.java | 2 +- .../paimon/reader/PaimonTypeUtils.java | 12 ++- 8 files changed, 139 insertions(+), 57 deletions(-) diff --git a/be/src/connector/hive_connector.cpp b/be/src/connector/hive_connector.cpp index 56174d20168..91e60e53779 100644 --- a/be/src/connector/hive_connector.cpp +++ b/be/src/connector/hive_connector.cpp @@ -523,6 +523,10 @@ Status HiveDataSource::_init_scanner(RuntimeState* state) { RETURN_IF_ERROR(scanner->init(state, scanner_params)); Status st = scanner->open(state); if (!st.ok()) { + if (scanner->is_jni_scanner()) { + return st; + } + auto msg = fmt::format("file = {}", native_file_path); // After catching the AWS 404 file not found error and returning it to the FE, diff --git a/be/src/exec/hdfs_scanner.h b/be/src/exec/hdfs_scanner.h index c4b85b03597..2f86a4f4b02 100644 --- a/be/src/exec/hdfs_scanner.h +++ b/be/src/exec/hdfs_scanner.h @@ -310,6 +310,7 @@ public: virtual Status do_get_next(RuntimeState* runtime_state, ChunkPtr* chunk) = 0; virtual Status do_init(RuntimeState* runtime_state, const HdfsScannerParams& scanner_params) = 0; virtual void do_update_counter(HdfsScanProfile* profile); + virtual bool is_jni_scanner() { return false; } void enter_pending_queue(); // how long it stays inside pending queue. diff --git a/be/src/exec/jni_scanner.cpp b/be/src/exec/jni_scanner.cpp index 21839421f26..d34e621793f 100644 --- a/be/src/exec/jni_scanner.cpp +++ b/be/src/exec/jni_scanner.cpp @@ -25,10 +25,12 @@ namespace starrocks { Status JniScanner::_check_jni_exception(JNIEnv* _jni_env, const std::string& message) { - if (_jni_env->ExceptionCheck()) { + if (jthrowable thr = _jni_env->ExceptionOccurred(); thr) { + std::string jni_error_message = JVMFunctionHelper::getInstance().dumpExceptionString(thr); _jni_env->ExceptionDescribe(); _jni_env->ExceptionClear(); - return Status::InternalError(message); + _jni_env->DeleteLocalRef(thr); + return Status::InternalError(message + " java exception details: " + jni_error_message); } return Status::OK(); } @@ -139,10 +141,11 @@ Status JniScanner::_get_next_chunk(JNIEnv* _jni_env, long* chunk_meta) { return Status::OK(); } -template +template Status JniScanner::_append_primitive_data(const FillColumnArgs& args) { char* column_ptr = static_cast(next_chunk_meta_as_ptr()); using ColumnType = typename starrocks::RunTimeColumnType; + using CppType = typename starrocks::RunTimeCppType; auto* runtime_column = down_cast(args.column); runtime_column->resize_uninitialized(args.num_rows); memcpy(runtime_column->get_data().data(), column_ptr, args.num_rows * sizeof(CppType)); @@ -169,12 +172,13 @@ Status JniScanner::_append_string_data(const FillColumnArgs& args) { return Status::OK(); } -template +template Status JniScanner::_append_decimal_data(const FillColumnArgs& args) { int* offset_ptr = static_cast(next_chunk_meta_as_ptr()); char* column_ptr = static_cast(next_chunk_meta_as_ptr()); using ColumnType = typename starrocks::RunTimeColumnType; + using CppType = typename starrocks::RunTimeCppType; auto* runtime_column = down_cast(args.column); runtime_column->resize_uninitialized(args.num_rows); CppType* runtime_data = runtime_column->get_data().data(); @@ -365,37 +369,41 @@ Status JniScanner::_fill_column(FillColumnArgs* pargs) { pargs->column = data_column; pargs->nulls = null_data.data(); } else { - // otherwise we skil this chunk meta, because in Java side - // we assume every column starswith `null_column`. + // otherwise we skip this chunk meta, because in Java side + // we assume every column starts with `null_column`. } LogicalType column_type = args.slot_type.type; if (column_type == LogicalType::TYPE_BOOLEAN) { - RETURN_IF_ERROR((_append_primitive_data(args))); + RETURN_IF_ERROR((_append_primitive_data(args))); + } else if (column_type == LogicalType::TYPE_TINYINT) { + RETURN_IF_ERROR((_append_primitive_data(args))); } else if (column_type == LogicalType::TYPE_SMALLINT) { - RETURN_IF_ERROR((_append_primitive_data(args))); + RETURN_IF_ERROR((_append_primitive_data(args))); } else if (column_type == LogicalType::TYPE_INT) { - RETURN_IF_ERROR((_append_primitive_data(args))); + RETURN_IF_ERROR((_append_primitive_data(args))); } else if (column_type == LogicalType::TYPE_FLOAT) { - RETURN_IF_ERROR((_append_primitive_data(args))); + RETURN_IF_ERROR((_append_primitive_data(args))); } else if (column_type == LogicalType::TYPE_BIGINT) { - RETURN_IF_ERROR((_append_primitive_data(args))); + RETURN_IF_ERROR((_append_primitive_data(args))); } else if (column_type == LogicalType::TYPE_DOUBLE) { - RETURN_IF_ERROR((_append_primitive_data(args))); + RETURN_IF_ERROR((_append_primitive_data(args))); } else if (column_type == LogicalType::TYPE_VARCHAR) { RETURN_IF_ERROR((_append_string_data(args))); } else if (column_type == LogicalType::TYPE_CHAR) { RETURN_IF_ERROR((_append_string_data(args))); + } else if (column_type == LogicalType::TYPE_VARBINARY) { + RETURN_IF_ERROR((_append_string_data(args))); } else if (column_type == LogicalType::TYPE_DATE) { RETURN_IF_ERROR((_append_date_data(args))); } else if (column_type == LogicalType::TYPE_DATETIME) { RETURN_IF_ERROR((_append_datetime_data(args))); } else if (column_type == LogicalType::TYPE_DECIMAL32) { - RETURN_IF_ERROR((_append_decimal_data(args))); + RETURN_IF_ERROR((_append_decimal_data(args))); } else if (column_type == LogicalType::TYPE_DECIMAL64) { - RETURN_IF_ERROR((_append_decimal_data(args))); + RETURN_IF_ERROR((_append_decimal_data(args))); } else if (column_type == LogicalType::TYPE_DECIMAL128) { - RETURN_IF_ERROR((_append_decimal_data(args))); + RETURN_IF_ERROR((_append_decimal_data(args))); } else if (column_type == LogicalType::TYPE_ARRAY) { RETURN_IF_ERROR((_append_array_data(args))); } else if (column_type == LogicalType::TYPE_MAP) { diff --git a/be/src/exec/jni_scanner.h b/be/src/exec/jni_scanner.h index 25e5cbe9ddc..7b05da68a94 100644 --- a/be/src/exec/jni_scanner.h +++ b/be/src/exec/jni_scanner.h @@ -35,6 +35,7 @@ public: void do_close(RuntimeState* runtime_state) noexcept override; Status do_get_next(RuntimeState* runtime_state, ChunkPtr* chunk) override; Status do_init(RuntimeState* runtime_state, const HdfsScannerParams& scanner_params) override; + bool is_jni_scanner() override { return true; } private: struct FillColumnArgs { @@ -57,10 +58,10 @@ private: Status _get_next_chunk(JNIEnv* _jni_env, long* chunk_meta); - template + template Status _append_primitive_data(const FillColumnArgs& args); - template + template Status _append_decimal_data(const FillColumnArgs& args); template diff --git a/fe/fe-core/src/main/java/com/starrocks/connector/ColumnTypeConverter.java b/fe/fe-core/src/main/java/com/starrocks/connector/ColumnTypeConverter.java index b8953b12b6f..711a5e04e98 100644 --- a/fe/fe-core/src/main/java/com/starrocks/connector/ColumnTypeConverter.java +++ b/fe/fe-core/src/main/java/com/starrocks/connector/ColumnTypeConverter.java @@ -32,18 +32,17 @@ import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; import org.apache.iceberg.types.Types; import org.apache.paimon.types.BigIntType; +import org.apache.paimon.types.BinaryType; import org.apache.paimon.types.BooleanType; -import org.apache.paimon.types.DataField; +import org.apache.paimon.types.CharType; import org.apache.paimon.types.DataTypeDefaultVisitor; import org.apache.paimon.types.DateType; import org.apache.paimon.types.DecimalType; import org.apache.paimon.types.DoubleType; import org.apache.paimon.types.FloatType; import org.apache.paimon.types.IntType; -import org.apache.paimon.types.RowType; import org.apache.paimon.types.SmallIntType; import org.apache.paimon.types.TimestampType; -import org.apache.paimon.types.TinyIntType; import org.apache.paimon.types.VarCharType; import java.util.ArrayList; @@ -394,6 +393,14 @@ public class ColumnTypeConverter { private static final PaimonToHiveTypeVisitor INSTANCE = new PaimonToHiveTypeVisitor(); + public Type visit(BinaryType binaryType) { + return ScalarType.createType(PrimitiveType.VARBINARY); + } + + public Type visit(CharType charType) { + return ScalarType.createCharType(charType.getLength()); + } + public Type visit(VarCharType varCharType) { return ScalarType.createDefaultExternalTableString(); } @@ -406,9 +413,10 @@ public class ColumnTypeConverter { return ScalarType.createUnifiedDecimalType(decimalType.getPrecision(), decimalType.getScale()); } - public Type visit(TinyIntType tinyIntType) { - return ScalarType.createType(PrimitiveType.TINYINT); - } + // TODO: uncomment this and unit test case when this type is supported in paimon connector + //public Type visit(TinyIntType tinyIntType) { + // return ScalarType.createType(PrimitiveType.TINYINT); + //} public Type visit(SmallIntType smallIntType) { return ScalarType.createType(PrimitiveType.SMALLINT); @@ -438,24 +446,27 @@ public class ColumnTypeConverter { return ScalarType.createType(PrimitiveType.DATETIME); } - public Type visit(org.apache.paimon.types.ArrayType arrayType) { - return new ArrayType(fromPaimonType(arrayType.getElementType())); - } + // TODO: uncomment this and unit test case when this type is supported in paimon connector + //public Type visit(org.apache.paimon.types.ArrayType arrayType) { + // return new ArrayType(fromPaimonType(arrayType.getElementType())); + //} - public Type visit(org.apache.paimon.types.MapType mapType) { - return new MapType(fromPaimonType(mapType.getKeyType()), fromPaimonType(mapType.getValueType())); - } + // TODO: uncomment this and unit test case when this type is supported in paimon connector + //public Type visit(org.apache.paimon.types.MapType mapType) { + // return new MapType(fromPaimonType(mapType.getKeyType()), fromPaimonType(mapType.getValueType())); + //} - public Type visit(RowType rowType) { - List fields = rowType.getFields(); - ArrayList structFields = new ArrayList<>(fields.size()); - for (DataField field : fields) { - String fieldName = field.name(); - Type fieldType = fromPaimonType(field.type()); - structFields.add(new StructField(fieldName, fieldType)); - } - return new StructType(structFields); - } + // TODO: uncomment this and unit test case when this type is supported in paimon connector + //public Type visit(RowType rowType) { + // List fields = rowType.getFields(); + // ArrayList structFields = new ArrayList<>(fields.size()); + // for (DataField field : fields) { + // String fieldName = field.name(); + // Type fieldType = fromPaimonType(field.type()); + // structFields.add(new StructField(fieldName, fieldType)); + // } + // return new StructType(structFields); + //} @Override protected Type defaultMethod(org.apache.paimon.types.DataType dataType) { diff --git a/fe/fe-core/src/test/java/com/starrocks/connector/paimon/PaimonColumnConverterTest.java b/fe/fe-core/src/test/java/com/starrocks/connector/paimon/PaimonColumnConverterTest.java index 84f24d2a697..ce99fb58414 100644 --- a/fe/fe-core/src/test/java/com/starrocks/connector/paimon/PaimonColumnConverterTest.java +++ b/fe/fe-core/src/test/java/com/starrocks/connector/paimon/PaimonColumnConverterTest.java @@ -20,7 +20,9 @@ import com.starrocks.catalog.Type; import com.starrocks.connector.ColumnTypeConverter; import org.apache.paimon.types.ArrayType; import org.apache.paimon.types.BigIntType; +import org.apache.paimon.types.BinaryType; import org.apache.paimon.types.BooleanType; +import org.apache.paimon.types.CharType; import org.apache.paimon.types.DataField; import org.apache.paimon.types.DateType; import org.apache.paimon.types.DecimalType; @@ -34,6 +36,7 @@ import org.apache.paimon.types.TimestampType; import org.apache.paimon.types.TinyIntType; import org.apache.paimon.types.VarCharType; import org.junit.Assert; +import org.junit.Ignore; import org.junit.Test; import java.util.Arrays; @@ -42,7 +45,22 @@ import java.util.List; public class PaimonColumnConverterTest { @Test - public void testConvertString() { + public void testConvertBinary() { + BinaryType paimonType = new BinaryType(); + Type result = ColumnTypeConverter.fromPaimonType(paimonType); + Assert.assertEquals(result, Type.VARBINARY); + } + + @Test + public void testConvertChar() { + CharType paimonType = new CharType(10); + Type result = ColumnTypeConverter.fromPaimonType(paimonType); + Type srType = ScalarType.createCharType(10); + Assert.assertEquals(result, srType); + } + + @Test + public void testConvertVarchar() { VarCharType paimonType = new VarCharType(); Type result = ColumnTypeConverter.fromPaimonType(paimonType); Type srType = ScalarType.createDefaultExternalTableString(); @@ -56,6 +74,31 @@ public class PaimonColumnConverterTest { Assert.assertEquals(result, Type.BOOLEAN); } + @Test + public void testConvertDecimal() { + int precision = 9; + int scale = 5; + DecimalType paimonType = new DecimalType(precision, scale); + Type result = ColumnTypeConverter.fromPaimonType(paimonType); + Type srType = ScalarType.createUnifiedDecimalType(precision, scale); + Assert.assertEquals(result, srType); + } + + + @Ignore + public void testConvertTinyInt() { + TinyIntType paimonType = new TinyIntType(); + Type result = ColumnTypeConverter.fromPaimonType(paimonType); + Assert.assertEquals(result, Type.TINYINT); + } + + @Test + public void testConvertSmallint() { + SmallIntType paimonType = new SmallIntType(); + Type result = ColumnTypeConverter.fromPaimonType(paimonType); + Assert.assertEquals(result, Type.SMALLINT); + } + @Test public void testConvertInt() { IntType paimonType = new IntType(); @@ -63,6 +106,20 @@ public class PaimonColumnConverterTest { Assert.assertEquals(result, Type.INT); } + @Test + public void testConvertBigint() { + BigIntType paimonType = new BigIntType(); + Type result = ColumnTypeConverter.fromPaimonType(paimonType); + Assert.assertEquals(result, Type.BIGINT); + } + + @Test + public void testConvertFlout() { + FloatType paimonType = new FloatType(); + Type result = ColumnTypeConverter.fromPaimonType(paimonType); + Assert.assertEquals(result, Type.FLOAT); + } + @Test public void testConvertDouble() { DoubleType paimonType = new DoubleType(); @@ -84,17 +141,7 @@ public class PaimonColumnConverterTest { Assert.assertEquals(result, Type.DATETIME); } - @Test - public void testConvertDecimal() { - int precision = 9; - int scale = 5; - DecimalType paimonType = new DecimalType(precision, scale); - Type result = ColumnTypeConverter.fromPaimonType(paimonType); - Type srType = ScalarType.createUnifiedDecimalType(precision, scale); - Assert.assertEquals(result, srType); - } - - @Test + @Ignore public void testConvertArray() { ArrayType paimonType = new ArrayType(new SmallIntType()); Type result = ColumnTypeConverter.fromPaimonType(paimonType); @@ -103,7 +150,7 @@ public class PaimonColumnConverterTest { Assert.assertEquals(Type.SMALLINT, srType.getItemType()); } - @Test + @Ignore public void testConvertMap() { MapType paimonType = new MapType(new VarCharType(20), new TimestampType()); Type result = ColumnTypeConverter.fromPaimonType(paimonType); @@ -113,11 +160,11 @@ public class PaimonColumnConverterTest { Assert.assertEquals(Type.DATETIME, srType.getValueType()); } - @Test + @Ignore public void testConvertStruct() { List fields = Arrays.asList( - new DataField(0, "f0", new TinyIntType()), + new DataField(0, "f0", new BinaryType()), new DataField(1, "f1", new BigIntType()), new DataField(2, "f2", new FloatType())); RowType paimonType = new RowType(fields); @@ -125,7 +172,7 @@ public class PaimonColumnConverterTest { Assert.assertTrue(result instanceof StructType); StructType srType = (StructType) result; Assert.assertEquals(3, srType.getFields().size()); - Assert.assertEquals(Type.TINYINT, srType.getField("f0").getType()); + Assert.assertEquals(Type.VARBINARY, srType.getField("f0").getType()); Assert.assertEquals(Type.BIGINT, srType.getField("f1").getType()); Assert.assertEquals(Type.FLOAT, srType.getField("f2").getType()); } diff --git a/java-extensions/jni-connector/src/main/java/com/starrocks/jni/connector/ColumnType.java b/java-extensions/jni-connector/src/main/java/com/starrocks/jni/connector/ColumnType.java index 3246d0a49f7..92dee3dd3e7 100644 --- a/java-extensions/jni-connector/src/main/java/com/starrocks/jni/connector/ColumnType.java +++ b/java-extensions/jni-connector/src/main/java/com/starrocks/jni/connector/ColumnType.java @@ -198,7 +198,7 @@ public class ColumnType { } if (typeValue == null) { - throw new RuntimeException("Unknown type: " + t); + throw new RuntimeException("Unsupported type: " + t); } } diff --git a/java-extensions/paimon-reader/src/main/java/com/starrocks/paimon/reader/PaimonTypeUtils.java b/java-extensions/paimon-reader/src/main/java/com/starrocks/paimon/reader/PaimonTypeUtils.java index de410ed3afb..9a8bec16e6d 100644 --- a/java-extensions/paimon-reader/src/main/java/com/starrocks/paimon/reader/PaimonTypeUtils.java +++ b/java-extensions/paimon-reader/src/main/java/com/starrocks/paimon/reader/PaimonTypeUtils.java @@ -18,6 +18,7 @@ import org.apache.paimon.types.ArrayType; import org.apache.paimon.types.BigIntType; import org.apache.paimon.types.BinaryType; import org.apache.paimon.types.BooleanType; +import org.apache.paimon.types.CharType; import org.apache.paimon.types.DataType; import org.apache.paimon.types.DataTypeDefaultVisitor; import org.apache.paimon.types.DateType; @@ -29,6 +30,7 @@ import org.apache.paimon.types.MapType; import org.apache.paimon.types.RowType; import org.apache.paimon.types.SmallIntType; import org.apache.paimon.types.TimestampType; +import org.apache.paimon.types.TinyIntType; import org.apache.paimon.types.VarCharType; import java.util.stream.Collectors; @@ -45,6 +47,10 @@ public class PaimonTypeUtils { private static final PaimonToHiveTypeVisitor INSTANCE = new PaimonToHiveTypeVisitor(); + public String visit(CharType charType) { + return "string"; + } + public String visit(VarCharType varCharType) { return "string"; } @@ -61,6 +67,10 @@ public class PaimonTypeUtils { return String.format("decimal(%d,%d)", decimalType.getPrecision(), decimalType.getScale()); } + public String visit(TinyIntType tinyIntType) { + return "tinyint"; + } + public String visit(SmallIntType smallIntType) { return "short"; } @@ -107,7 +117,7 @@ public class PaimonTypeUtils { @Override protected String defaultMethod(DataType dataType) { - return "unsupported_type"; + return dataType.getTypeRoot().name(); } } }