[BugFix] Fix BE crash in tracer when jaeger_endpoint is invalid (#63257)

## Why I'm doing:
When BE config jaeger_endpoint is invalid, BE will crash in tracer:
```
*** Aborted at 1757906071 (unix time) try "date -d @1757906071" if you are using GNU date ***
PC: @          0xbe8d0af starrocks::Tracer::start_trace_or_add_span(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)
*** SIGSEGV (@0x0) received by PID 3328297 (TID 0x7f30f7d2c640) LWP(3328810) from PID 0; stack trace: ***
    @     0x7f31e6d41ee8 (/usr/lib/x86_64-linux-gnu/libc.so.6+0x99ee7)
    @         0x109a4ae8 google::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*)
    @     0x7f31e6cea520 (/usr/lib/x86_64-linux-gnu/libc.so.6+0x4251f)
    @          0xbe8d0af starrocks::Tracer::start_trace_or_add_span(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)
    @          0xcc1383c starrocks::OlapTableSink::init(starrocks::TDataSink const&, starrocks::RuntimeState*)
    @          0xcc09965 starrocks::DataSink::create_data_sink(starrocks::RuntimeState*, starrocks::TDataSink const&, std::vector<starrocks::TExpr, std::allocator<starrocks::TExpr> > const&, starrocks::TPlanFragmentExecParams const&, int, starrocks::RowDescriptor const&, std::uniq@^A
    @          0xcc5475f starrocks::pipeline::FragmentExecutor::_prepare_pipeline_driver(starrocks::ExecEnv*, starrocks::pipeline::UnifiedExecPlanFragmentParams const&)
    @          0xcc55a96 starrocks::pipeline::FragmentExecutor::prepare(starrocks::ExecEnv*, starrocks::TExecPlanFragmentParams const&, starrocks::TExecPlanFragmentParams const&)
    @          0xcdea3f5 starrocks::PInternalServiceImplBase<starrocks::PInternalService>::_exec_plan_fragment_by_pipeline(starrocks::TExecPlanFragmentParams const&, starrocks::TExecPlanFragmentParams const&)
    @          0xcde9873 starrocks::PInternalServiceImplBase<starrocks::PInternalService>::_exec_plan_fragment(brpc::Controller*, starrocks::PExecPlanFragmentRequest const*)
    @          0xcde9403 starrocks::PInternalServiceImplBase<starrocks::PInternalService>::_exec_plan_fragment(google::protobuf::RpcController*, starrocks::PExecPlanFragmentRequest const*, starrocks::PExecPlanFragmentResult*, google::protobuf::Closure*)
    @          0xcb1e547 starrocks::PriorityThreadPool::work_thread(int)
    @         0x1095100b thread_proxy
    @     0x7f31e6d3cac3 (/usr/lib/x86_64-linux-gnu/libc.so.6+0x94ac2)
    @     0x7f31e6dce850 (/usr/lib/x86_64-linux-gnu/libc.so.6+0x12684f)
```

Invalid jaeger_endpoint example: 
`jaeger_endpoint = http://localhost:14268`

Valid jaeger_endpoint example:
`jaeger_endpoint = localhost:14268 `

## What I'm doing:
Create a no-op tracer to protect BE from crash when jaeger_endpoint is invalid.

Signed-off-by: xiangguangyxg <xiangguangyxg@gmail.com>
This commit is contained in:
xiangguangyxg 2025-09-19 10:02:46 +08:00 committed by GitHub
parent c014cdb711
commit 2fc227d19a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 8 additions and 1 deletions

View File

@ -1094,6 +1094,8 @@ CONF_Int64(rpc_connect_timeout_ms, "30000");
CONF_Int32(max_batch_publish_latency_ms, "100");
// Config for opentelemetry tracing.
// Valid example: jaeger_endpoint = localhost:14268
// Invalid example: jaeger_endpoint = http://localhost:14268
CONF_String(jaeger_endpoint, "");
// Config for query debug trace

View File

@ -40,12 +40,17 @@ void Tracer::release_instance() {
Instance().shutdown();
}
static inline opentelemetry::nostd::shared_ptr<opentelemetry::trace::Tracer> create_no_op_tracer() {
return opentelemetry::trace::Provider::GetTracerProvider()->GetTracer("no-op", OPENTELEMETRY_SDK_VERSION);
}
void Tracer::init(const std::string& service_name) {
if (!config::jaeger_endpoint.empty()) {
opentelemetry::exporter::jaeger::JaegerExporterOptions opts;
vector<string> host_port = strings::Split(config::jaeger_endpoint, ":");
if (host_port.size() != 2) {
LOG(WARNING) << "bad jaeger_endpoint " << config::jaeger_endpoint;
_tracer = create_no_op_tracer();
return;
}
opts.endpoint = host_port[0];
@ -63,7 +68,7 @@ void Tracer::init(const std::string& service_name) {
new opentelemetry::sdk::trace::TracerProvider(std::move(processor), jaeger_resource));
_tracer = provider->GetTracer(service_name, OPENTELEMETRY_SDK_VERSION);
} else {
_tracer = opentelemetry::trace::Provider::GetTracerProvider()->GetTracer("no-op", OPENTELEMETRY_SDK_VERSION);
_tracer = create_no_op_tracer();
}
}