831 lines
40 KiB
C++
831 lines
40 KiB
C++
// Copyright 2021-present StarRocks, Inc. All rights reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// This file is based on code available under the Apache license here:
|
|
// https://github.com/apache/incubator-doris/blob/master/be/src/agent/agent_server.cpp
|
|
|
|
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#include "agent/agent_server.h"
|
|
|
|
#include <thrift/protocol/TDebugProtocol.h>
|
|
|
|
#include <filesystem>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "agent/agent_task.h"
|
|
#include "agent/master_info.h"
|
|
#include "agent/task_signatures_manager.h"
|
|
#include "agent/task_worker_pool.h"
|
|
#include "common/config.h"
|
|
#include "common/logging.h"
|
|
#include "common/status.h"
|
|
#include "gutil/strings/substitute.h"
|
|
#include "runtime/exec_env.h"
|
|
#include "storage/snapshot_manager.h"
|
|
#include "testutil/sync_point.h"
|
|
#include "util/phmap/phmap.h"
|
|
#include "util/threadpool.h"
|
|
|
|
namespace starrocks {
|
|
|
|
namespace {
|
|
constexpr size_t DEFAULT_DYNAMIC_THREAD_POOL_QUEUE_SIZE = 2048;
|
|
constexpr size_t MIN_CLONE_TASK_THREADS_IN_POOL = 2;
|
|
constexpr int32_t REPLICATION_CPU_CORES_MULTIPLIER = 4;
|
|
} // namespace
|
|
|
|
using TTaskTypeHash = std::hash<std::underlying_type<TTaskType::type>::type>;
|
|
|
|
#ifndef BE_TEST
|
|
const uint32_t REPORT_TASK_WORKER_COUNT = 1;
|
|
const uint32_t REPORT_DISK_STATE_WORKER_COUNT = 1;
|
|
const uint32_t REPORT_OLAP_TABLE_WORKER_COUNT = 1;
|
|
const uint32_t REPORT_WORKGROUP_WORKER_COUNT = 1;
|
|
const uint32_t REPORT_RESOURCE_USAGE_WORKER_COUNT = 1;
|
|
const uint32_t REPORT_DATACACHE_METRICS_WORKER_COUNT = 1;
|
|
#endif
|
|
|
|
/* calculate real num threads
|
|
* if num_threads > 0, return num_threads
|
|
* if num_threads < 0, return -num_threads * cpu_cores
|
|
* if num_threads == 0, return cpu_cores_multiplier * cpu_cores
|
|
*/
|
|
static int32_t calc_real_num_threads(int32_t num_threads, int32_t cpu_cores_multiplier = 1) {
|
|
if (num_threads == 0) {
|
|
num_threads = -cpu_cores_multiplier;
|
|
}
|
|
if (num_threads < 0) {
|
|
num_threads = -num_threads;
|
|
num_threads *= CpuInfo::num_cores();
|
|
}
|
|
if (num_threads < 1) {
|
|
num_threads = 1;
|
|
}
|
|
return num_threads;
|
|
}
|
|
|
|
class AgentServer::Impl {
|
|
public:
|
|
explicit Impl(ExecEnv* exec_env, bool is_compute_node) : _exec_env(exec_env), _is_compute_node(is_compute_node) {}
|
|
|
|
~Impl();
|
|
|
|
Status init();
|
|
|
|
void stop();
|
|
|
|
void submit_tasks(TAgentResult& agent_result, const std::vector<TAgentTaskRequest>& tasks);
|
|
|
|
void make_snapshot(TAgentResult& agent_result, const TSnapshotRequest& snapshot_request);
|
|
|
|
void release_snapshot(TAgentResult& agent_result, const std::string& snapshot_path);
|
|
|
|
void publish_cluster_state(TAgentResult& agent_result, const TAgentPublishRequest& request);
|
|
|
|
void update_max_thread_by_type(int type, int new_val);
|
|
|
|
ThreadPool* get_thread_pool(int type) const;
|
|
|
|
void stop_task_worker_pool(TaskWorkerType type) const;
|
|
|
|
DISALLOW_COPY_AND_MOVE(Impl);
|
|
|
|
private:
|
|
ExecEnv* _exec_env;
|
|
|
|
std::unique_ptr<ThreadPool> _thread_pool_publish_version;
|
|
std::unique_ptr<ThreadPool> _thread_pool_clone;
|
|
std::unique_ptr<ThreadPool> _thread_pool_drop;
|
|
std::unique_ptr<ThreadPool> _thread_pool_create_tablet;
|
|
std::unique_ptr<ThreadPool> _thread_pool_alter_tablet;
|
|
std::unique_ptr<ThreadPool> _thread_pool_clear_transaction;
|
|
std::unique_ptr<ThreadPool> _thread_pool_storage_medium_migrate;
|
|
std::unique_ptr<ThreadPool> _thread_pool_check_consistency;
|
|
std::unique_ptr<ThreadPool> _thread_pool_compaction;
|
|
std::unique_ptr<ThreadPool> _thread_pool_compaction_control;
|
|
std::unique_ptr<ThreadPool> _thread_pool_update_schema;
|
|
|
|
std::unique_ptr<ThreadPool> _thread_pool_upload;
|
|
std::unique_ptr<ThreadPool> _thread_pool_download;
|
|
std::unique_ptr<ThreadPool> _thread_pool_make_snapshot;
|
|
std::unique_ptr<ThreadPool> _thread_pool_release_snapshot;
|
|
std::unique_ptr<ThreadPool> _thread_pool_move_dir;
|
|
std::unique_ptr<ThreadPool> _thread_pool_update_tablet_meta_info;
|
|
std::unique_ptr<ThreadPool> _thread_pool_drop_auto_increment_map;
|
|
std::unique_ptr<ThreadPool> _thread_pool_remote_snapshot;
|
|
std::unique_ptr<ThreadPool> _thread_pool_replicate_snapshot;
|
|
|
|
std::unique_ptr<PushTaskWorkerPool> _push_workers;
|
|
std::unique_ptr<PublishVersionTaskWorkerPool> _publish_version_workers;
|
|
std::unique_ptr<DeleteTaskWorkerPool> _delete_workers;
|
|
|
|
// These 3 worker-pool do not accept tasks from FE.
|
|
// It is self triggered periodically and reports to Fe master
|
|
std::unique_ptr<ReportTaskWorkerPool> _report_task_workers;
|
|
std::unique_ptr<ReportDiskStateTaskWorkerPool> _report_disk_state_workers;
|
|
std::unique_ptr<ReportOlapTableTaskWorkerPool> _report_tablet_workers;
|
|
std::unique_ptr<ReportWorkgroupTaskWorkerPool> _report_workgroup_workers;
|
|
std::unique_ptr<ReportResourceUsageTaskWorkerPool> _report_resource_usage_workers;
|
|
std::unique_ptr<ReportDataCacheMetricsTaskWorkerPool> _report_datacache_metrics_workers;
|
|
|
|
// Compute node only need _report_resource_usage_workers and _report_task_workers
|
|
const bool _is_compute_node;
|
|
};
|
|
|
|
Status AgentServer::Impl::init() {
|
|
if (!_is_compute_node) {
|
|
for (auto& path : _exec_env->store_paths()) {
|
|
try {
|
|
std::string dpp_download_path_str = path.path + DPP_PREFIX;
|
|
std::filesystem::path dpp_download_path(dpp_download_path_str);
|
|
if (std::filesystem::exists(dpp_download_path)) {
|
|
std::filesystem::remove_all(dpp_download_path);
|
|
}
|
|
} catch (...) {
|
|
LOG(WARNING) << "std exception when remove dpp download path. path=" << path.path;
|
|
}
|
|
}
|
|
|
|
#define BUILD_DYNAMIC_TASK_THREAD_POOL(name, min_threads, max_threads, queue_size, pool) \
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL_WITH_IDLE(name, min_threads, max_threads, queue_size, \
|
|
ThreadPoolDefaultIdleTimeoutMS, pool)
|
|
|
|
#define BUILD_DYNAMIC_TASK_THREAD_POOL_WITH_IDLE(name, min_threads, max_threads, queue_size, idle_timeout, pool) \
|
|
do { \
|
|
RETURN_IF_ERROR(ThreadPoolBuilder(#name) \
|
|
.set_min_threads(min_threads) \
|
|
.set_max_threads(max_threads) \
|
|
.set_max_queue_size(queue_size) \
|
|
.set_idle_timeout(MonoDelta::FromMilliseconds(idle_timeout)) \
|
|
.build(&pool)); \
|
|
REGISTER_THREAD_POOL_METRICS(name, pool); \
|
|
} while (false)
|
|
|
|
// The ideal queue size of threadpool should be larger than the maximum number of tablet of a partition.
|
|
// But it seems that there's no limit for the number of tablets of a partition.
|
|
// Since a large queue size brings a little overhead, a big one is chosen here.
|
|
#ifdef BE_TEST
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(publish_version, 1, 3, DEFAULT_DYNAMIC_THREAD_POOL_QUEUE_SIZE,
|
|
_thread_pool_publish_version);
|
|
#else
|
|
int max_publish_version_worker_count = calc_real_num_threads(config::transaction_publish_version_worker_count);
|
|
max_publish_version_worker_count =
|
|
std::max(max_publish_version_worker_count, MIN_TRANSACTION_PUBLISH_WORKER_COUNT);
|
|
int min_publish_version_worker_count =
|
|
std::max(config::transaction_publish_version_thread_pool_num_min, MIN_TRANSACTION_PUBLISH_WORKER_COUNT);
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL_WITH_IDLE(publish_version, min_publish_version_worker_count,
|
|
max_publish_version_worker_count, std::numeric_limits<int>::max(),
|
|
config::transaction_publish_version_thread_pool_idle_time_ms,
|
|
_thread_pool_publish_version);
|
|
#endif
|
|
int real_drop_tablet_worker_count = (config::drop_tablet_worker_count > 0)
|
|
? config::drop_tablet_worker_count
|
|
: std::max((int)(CpuInfo::num_cores() / 2), (int)1);
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(drop, 1, real_drop_tablet_worker_count, std::numeric_limits<int>::max(),
|
|
_thread_pool_drop);
|
|
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(create_tablet, 1, config::create_tablet_worker_count,
|
|
std::numeric_limits<int>::max(), _thread_pool_create_tablet);
|
|
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(alter_tablet, 0, config::alter_tablet_worker_count,
|
|
std::numeric_limits<int>::max(), _thread_pool_alter_tablet);
|
|
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(clear_transaction, 0, config::clear_transaction_task_worker_count,
|
|
std::numeric_limits<int>::max(), _thread_pool_clear_transaction);
|
|
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(storage_medium_migrate, 0, config::storage_medium_migrate_count,
|
|
std::numeric_limits<int>::max(), _thread_pool_storage_medium_migrate);
|
|
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(check_consistency, 0, config::check_consistency_worker_count,
|
|
std::numeric_limits<int>::max(), _thread_pool_check_consistency);
|
|
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(manual_compaction, 0, 1, std::numeric_limits<int>::max(),
|
|
_thread_pool_compaction);
|
|
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(compaction_control, 0, 1, std::numeric_limits<int>::max(),
|
|
_thread_pool_compaction_control);
|
|
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(update_schema, 0, config::update_schema_worker_count,
|
|
std::numeric_limits<int>::max(), _thread_pool_update_schema);
|
|
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(upload, 0, calc_real_num_threads(config::upload_worker_count),
|
|
std::numeric_limits<int>::max(), _thread_pool_upload);
|
|
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(download, 0, calc_real_num_threads(config::download_worker_count),
|
|
std::numeric_limits<int>::max(), _thread_pool_download);
|
|
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(make_snapshot, 0, config::make_snapshot_worker_count,
|
|
std::numeric_limits<int>::max(), _thread_pool_make_snapshot);
|
|
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(release_snapshot, 0, config::release_snapshot_worker_count,
|
|
std::numeric_limits<int>::max(), _thread_pool_release_snapshot);
|
|
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(move_dir, 0, calc_real_num_threads(config::download_worker_count),
|
|
std::numeric_limits<int>::max(), _thread_pool_move_dir);
|
|
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(update_tablet_meta_info, 0, 1, std::numeric_limits<int>::max(),
|
|
_thread_pool_update_tablet_meta_info);
|
|
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(drop_auto_increment_map_dir, 0, 1, std::numeric_limits<int>::max(),
|
|
_thread_pool_drop_auto_increment_map);
|
|
|
|
// Currently FE can have at most num_of_storage_path * schedule_slot_num_per_path(default 2) clone tasks
|
|
// scheduled simultaneously, but previously we have only 3 clone worker threads by default,
|
|
// so this is to keep the dop of clone task handling in sync with FE.
|
|
//
|
|
// TODO(shangyiming): using dynamic thread pool to handle task directly instead of using TaskThreadPool
|
|
// Currently, the task submission and processing logic is deeply coupled with TaskThreadPool, change that will
|
|
// need to modify many interfaces. So for now we still use TaskThreadPool to submit clone tasks, but with
|
|
// only a single worker thread, then we use dynamic thread pool to handle the task concurrently in clone task
|
|
// callback, so that we can match the dop of FE clone task scheduling.
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(clone, 0,
|
|
std::max(_exec_env->store_paths().size() * config::parallel_clone_task_per_path,
|
|
MIN_CLONE_TASK_THREADS_IN_POOL),
|
|
DEFAULT_DYNAMIC_THREAD_POOL_QUEUE_SIZE, _thread_pool_clone);
|
|
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(
|
|
remote_snapshot, 0,
|
|
calc_real_num_threads(config::replication_threads, REPLICATION_CPU_CORES_MULTIPLIER),
|
|
std::numeric_limits<int>::max(), _thread_pool_remote_snapshot);
|
|
|
|
BUILD_DYNAMIC_TASK_THREAD_POOL(
|
|
replicate_snapshot, 0,
|
|
calc_real_num_threads(config::replication_threads, REPLICATION_CPU_CORES_MULTIPLIER),
|
|
std::numeric_limits<int>::max(), _thread_pool_replicate_snapshot);
|
|
|
|
// It is the same code to create workers of each type, so we use a macro
|
|
// to make code to be more readable.
|
|
#ifndef BE_TEST
|
|
#define CREATE_AND_START_POOL(pool_name, CLASS_NAME, worker_num) \
|
|
pool_name.reset(new CLASS_NAME(_exec_env, worker_num)); \
|
|
pool_name->start();
|
|
#else
|
|
#define CREATE_AND_START_POOL(pool_name, CLASS_NAME, worker_num)
|
|
#endif // BE_TEST
|
|
|
|
CREATE_AND_START_POOL(_publish_version_workers, PublishVersionTaskWorkerPool, CpuInfo::num_cores())
|
|
// Both PUSH and REALTIME_PUSH type use _push_workers
|
|
CREATE_AND_START_POOL(_push_workers, PushTaskWorkerPool,
|
|
config::push_worker_count_high_priority + config::push_worker_count_normal_priority)
|
|
CREATE_AND_START_POOL(_delete_workers, DeleteTaskWorkerPool,
|
|
config::delete_worker_count_normal_priority + config::delete_worker_count_high_priority)
|
|
CREATE_AND_START_POOL(_report_disk_state_workers, ReportDiskStateTaskWorkerPool, REPORT_DISK_STATE_WORKER_COUNT)
|
|
CREATE_AND_START_POOL(_report_tablet_workers, ReportOlapTableTaskWorkerPool, REPORT_OLAP_TABLE_WORKER_COUNT)
|
|
CREATE_AND_START_POOL(_report_workgroup_workers, ReportWorkgroupTaskWorkerPool, REPORT_WORKGROUP_WORKER_COUNT)
|
|
}
|
|
CREATE_AND_START_POOL(_report_resource_usage_workers, ReportResourceUsageTaskWorkerPool,
|
|
REPORT_RESOURCE_USAGE_WORKER_COUNT)
|
|
CREATE_AND_START_POOL(_report_datacache_metrics_workers, ReportDataCacheMetricsTaskWorkerPool,
|
|
REPORT_DATACACHE_METRICS_WORKER_COUNT)
|
|
CREATE_AND_START_POOL(_report_task_workers, ReportTaskWorkerPool, REPORT_TASK_WORKER_COUNT)
|
|
#undef CREATE_AND_START_POOL
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
void AgentServer::Impl::stop() {
|
|
if (!_is_compute_node) {
|
|
_thread_pool_publish_version->shutdown();
|
|
_thread_pool_drop->shutdown();
|
|
_thread_pool_create_tablet->shutdown();
|
|
_thread_pool_alter_tablet->shutdown();
|
|
_thread_pool_clear_transaction->shutdown();
|
|
_thread_pool_storage_medium_migrate->shutdown();
|
|
_thread_pool_check_consistency->shutdown();
|
|
_thread_pool_compaction->shutdown();
|
|
_thread_pool_compaction_control->shutdown();
|
|
_thread_pool_update_schema->shutdown();
|
|
_thread_pool_upload->shutdown();
|
|
_thread_pool_download->shutdown();
|
|
_thread_pool_make_snapshot->shutdown();
|
|
_thread_pool_release_snapshot->shutdown();
|
|
_thread_pool_move_dir->shutdown();
|
|
_thread_pool_update_tablet_meta_info->shutdown();
|
|
_thread_pool_drop_auto_increment_map->shutdown();
|
|
|
|
#ifndef BE_TEST
|
|
_thread_pool_clone->shutdown();
|
|
_thread_pool_remote_snapshot->shutdown();
|
|
_thread_pool_replicate_snapshot->shutdown();
|
|
#define STOP_POOL(type, pool_name) pool_name->stop();
|
|
#else
|
|
#define STOP_POOL(type, pool_name)
|
|
#endif // BE_TEST
|
|
STOP_POOL(PUBLISH_VERSION, _publish_version_workers);
|
|
// Both PUSH and REALTIME_PUSH type use _push_workers
|
|
STOP_POOL(PUSH, _push_workers);
|
|
STOP_POOL(DELETE, _delete_workers);
|
|
STOP_POOL(REPORT_DISK_STATE, _report_disk_state_workers);
|
|
STOP_POOL(REPORT_OLAP_TABLE, _report_tablet_workers);
|
|
STOP_POOL(REPORT_WORKGROUP, _report_workgroup_workers);
|
|
}
|
|
STOP_POOL(REPORT_WORKGROUP, _report_resource_usage_workers);
|
|
STOP_POOL(REPORT_DATACACHE_METRICS, _report_datacache_metrics_workers);
|
|
STOP_POOL(REPORT_TASK, _report_task_workers);
|
|
#undef STOP_POOL
|
|
}
|
|
|
|
AgentServer::Impl::~Impl() = default;
|
|
|
|
// TODO(lingbin): each task in the batch may have it own status or FE must check and
|
|
// resend request when something is wrong(BE may need some logic to guarantee idempotence.
|
|
void AgentServer::Impl::submit_tasks(TAgentResult& agent_result, const std::vector<TAgentTaskRequest>& tasks) {
|
|
Status ret_st;
|
|
auto master_address = get_master_address();
|
|
if (master_address.hostname.empty() || master_address.port == 0) {
|
|
ret_st = Status::Cancelled("Have not get FE Master heartbeat yet");
|
|
ret_st.to_thrift(&agent_result.status);
|
|
return;
|
|
}
|
|
|
|
phmap::flat_hash_map<TTaskType::type, std::vector<const TAgentTaskRequest*>, TTaskTypeHash> task_divider;
|
|
phmap::flat_hash_map<TPushType::type, std::vector<const TAgentTaskRequest*>, TTaskTypeHash> push_divider;
|
|
|
|
for (const auto& task : tasks) {
|
|
VLOG_RPC << "submit one task: " << apache::thrift::ThriftDebugString(task).c_str();
|
|
TTaskType::type task_type = task.task_type;
|
|
int64_t signature = task.signature;
|
|
|
|
#define HANDLE_TYPE(t_task_type, req_member) \
|
|
case t_task_type: \
|
|
if (task.__isset.req_member) { \
|
|
task_divider[t_task_type].push_back(&task); \
|
|
} else { \
|
|
ret_st = Status::InvalidArgument( \
|
|
strings::Substitute("task(signature=$0) has wrong request member", signature)); \
|
|
} \
|
|
break;
|
|
|
|
// TODO(lingbin): It still too long, divided these task types into several categories
|
|
switch (task_type) {
|
|
HANDLE_TYPE(TTaskType::CREATE, create_tablet_req);
|
|
HANDLE_TYPE(TTaskType::DROP, drop_tablet_req);
|
|
HANDLE_TYPE(TTaskType::PUBLISH_VERSION, publish_version_req);
|
|
HANDLE_TYPE(TTaskType::CLEAR_TRANSACTION_TASK, clear_transaction_task_req);
|
|
HANDLE_TYPE(TTaskType::CLONE, clone_req);
|
|
HANDLE_TYPE(TTaskType::STORAGE_MEDIUM_MIGRATE, storage_medium_migrate_req);
|
|
HANDLE_TYPE(TTaskType::CHECK_CONSISTENCY, check_consistency_req);
|
|
HANDLE_TYPE(TTaskType::COMPACTION, compaction_req);
|
|
HANDLE_TYPE(TTaskType::COMPACTION_CONTROL, compaction_control_req);
|
|
HANDLE_TYPE(TTaskType::UPLOAD, upload_req);
|
|
HANDLE_TYPE(TTaskType::UPDATE_SCHEMA, update_schema_req);
|
|
HANDLE_TYPE(TTaskType::DOWNLOAD, download_req);
|
|
HANDLE_TYPE(TTaskType::MAKE_SNAPSHOT, snapshot_req);
|
|
HANDLE_TYPE(TTaskType::RELEASE_SNAPSHOT, release_snapshot_req);
|
|
HANDLE_TYPE(TTaskType::MOVE, move_dir_req);
|
|
HANDLE_TYPE(TTaskType::UPDATE_TABLET_META_INFO, update_tablet_meta_info_req);
|
|
HANDLE_TYPE(TTaskType::DROP_AUTO_INCREMENT_MAP, drop_auto_increment_map_req);
|
|
HANDLE_TYPE(TTaskType::REMOTE_SNAPSHOT, remote_snapshot_req);
|
|
HANDLE_TYPE(TTaskType::REPLICATE_SNAPSHOT, replicate_snapshot_req);
|
|
|
|
case TTaskType::REALTIME_PUSH:
|
|
if (!task.__isset.push_req) {
|
|
ret_st = Status::InvalidArgument(
|
|
strings::Substitute("task(signature=$0) has wrong request member", signature));
|
|
break;
|
|
}
|
|
if (task.push_req.push_type == TPushType::LOAD_V2 || task.push_req.push_type == TPushType::DELETE ||
|
|
task.push_req.push_type == TPushType::CANCEL_DELETE) {
|
|
push_divider[task.push_req.push_type].push_back(&task);
|
|
} else {
|
|
ret_st = Status::InvalidArgument(
|
|
strings::Substitute("task(signature=$0, type=$1, push_type=$2) has wrong push_type", signature,
|
|
task_type, task.push_req.push_type));
|
|
}
|
|
break;
|
|
case TTaskType::ALTER:
|
|
if (task.__isset.alter_tablet_req || task.__isset.alter_tablet_req_v2) {
|
|
task_divider[TTaskType::ALTER].push_back(&task);
|
|
} else {
|
|
ret_st = Status::InvalidArgument(
|
|
strings::Substitute("task(signature=$0) has wrong request member", signature));
|
|
}
|
|
break;
|
|
default:
|
|
ret_st = Status::InvalidArgument(
|
|
strings::Substitute("task(signature=$0, type=$1) has wrong task type", signature, task_type));
|
|
break;
|
|
}
|
|
#undef HANDLE_TYPE
|
|
|
|
if (!ret_st.ok()) {
|
|
LOG(WARNING) << "fail to submit task. reason: " << ret_st.message() << ", task: " << task;
|
|
// For now, all tasks in the batch share one status, so if any task
|
|
// was failed to submit, we can only return error to FE(even when some
|
|
// tasks have already been successfully submitted).
|
|
// However, Fe does not check the return status of submit_tasks() currently,
|
|
// and it is not sure that FE will retry when something is wrong, so here we
|
|
// only print an warning log and go on(i.e. do not break current loop),
|
|
// to ensure every task can be submitted once. It is OK for now, because the
|
|
// ret_st can be error only when it encounters an wrong task_type and
|
|
// req-member in TAgentTaskRequest, which is basically impossible.
|
|
// TODO(lingbin): check the logic in FE again later.
|
|
}
|
|
}
|
|
|
|
#define HANDLE_TASK(t_task_type, all_tasks, do_func, AGENT_REQ, request, env) \
|
|
{ \
|
|
std::string submit_log = "Submit task success. type=" + to_string(t_task_type) + ", signatures="; \
|
|
size_t log_count = 0; \
|
|
size_t queue_len = 0; \
|
|
for (auto* task : all_tasks) { \
|
|
auto pool = get_thread_pool(t_task_type); \
|
|
auto signature = task->signature; \
|
|
std::pair<bool, size_t> register_pair = register_task_info(task_type, signature); \
|
|
if (register_pair.first) { \
|
|
if (log_count++ < 100) { \
|
|
submit_log += std::to_string(signature) + ","; \
|
|
} \
|
|
queue_len = register_pair.second; \
|
|
ret_st = pool->submit_func( \
|
|
std::bind(do_func, std::make_shared<AGENT_REQ>(*task, task->request, time(nullptr)), env)); \
|
|
if (!ret_st.ok()) { \
|
|
LOG(WARNING) << "fail to submit task. reason: " << ret_st.message() << ", task: " << task; \
|
|
} \
|
|
} else { \
|
|
LOG(INFO) << "Submit task failed, already exists type=" << t_task_type << ", signature=" << signature; \
|
|
} \
|
|
} \
|
|
if (queue_len > 0) { \
|
|
if (log_count >= 100) { \
|
|
submit_log += "...,"; \
|
|
} \
|
|
LOG(INFO) << submit_log << " task_count_in_queue=" << queue_len; \
|
|
} \
|
|
}
|
|
|
|
// batch submit tasks
|
|
for (const auto& task_item : task_divider) {
|
|
const auto& task_type = task_item.first;
|
|
auto all_tasks = task_item.second;
|
|
switch (task_type) {
|
|
case TTaskType::CREATE:
|
|
HANDLE_TASK(TTaskType::CREATE, all_tasks, run_create_tablet_task, CreateTabletAgentTaskRequest,
|
|
create_tablet_req, _exec_env);
|
|
break;
|
|
case TTaskType::DROP:
|
|
HANDLE_TASK(TTaskType::DROP, all_tasks, run_drop_tablet_task, DropTabletAgentTaskRequest, drop_tablet_req,
|
|
_exec_env);
|
|
break;
|
|
case TTaskType::PUBLISH_VERSION: {
|
|
for (const auto& task : all_tasks) {
|
|
_publish_version_workers->submit_task(*task);
|
|
}
|
|
break;
|
|
}
|
|
case TTaskType::CLEAR_TRANSACTION_TASK:
|
|
HANDLE_TASK(TTaskType::CLEAR_TRANSACTION_TASK, all_tasks, run_clear_transaction_task,
|
|
ClearTransactionAgentTaskRequest, clear_transaction_task_req, _exec_env);
|
|
break;
|
|
case TTaskType::CLONE:
|
|
HANDLE_TASK(TTaskType::CLONE, all_tasks, run_clone_task, CloneAgentTaskRequest, clone_req, _exec_env);
|
|
break;
|
|
case TTaskType::STORAGE_MEDIUM_MIGRATE:
|
|
HANDLE_TASK(TTaskType::STORAGE_MEDIUM_MIGRATE, all_tasks, run_storage_medium_migrate_task,
|
|
StorageMediumMigrateTaskRequest, storage_medium_migrate_req, _exec_env);
|
|
break;
|
|
case TTaskType::CHECK_CONSISTENCY:
|
|
HANDLE_TASK(TTaskType::CHECK_CONSISTENCY, all_tasks, run_check_consistency_task,
|
|
CheckConsistencyTaskRequest, check_consistency_req, _exec_env);
|
|
break;
|
|
case TTaskType::COMPACTION:
|
|
HANDLE_TASK(TTaskType::COMPACTION, all_tasks, run_compaction_task, CompactionTaskRequest, compaction_req,
|
|
_exec_env);
|
|
break;
|
|
case TTaskType::COMPACTION_CONTROL:
|
|
HANDLE_TASK(TTaskType::COMPACTION_CONTROL, all_tasks, run_compaction_control_task,
|
|
CompactionControlTaskRequest, compaction_control_req, _exec_env);
|
|
break;
|
|
case TTaskType::UPDATE_SCHEMA:
|
|
HANDLE_TASK(TTaskType::UPDATE_SCHEMA, all_tasks, run_update_schema_task, UpdateSchemaTaskRequest,
|
|
update_schema_req, _exec_env);
|
|
break;
|
|
case TTaskType::UPLOAD:
|
|
HANDLE_TASK(TTaskType::UPLOAD, all_tasks, run_upload_task, UploadAgentTaskRequest, upload_req, _exec_env);
|
|
break;
|
|
case TTaskType::DOWNLOAD:
|
|
HANDLE_TASK(TTaskType::DOWNLOAD, all_tasks, run_download_task, DownloadAgentTaskRequest, download_req,
|
|
_exec_env);
|
|
break;
|
|
case TTaskType::MAKE_SNAPSHOT:
|
|
HANDLE_TASK(TTaskType::MAKE_SNAPSHOT, all_tasks, run_make_snapshot_task, SnapshotAgentTaskRequest,
|
|
snapshot_req, _exec_env);
|
|
break;
|
|
case TTaskType::RELEASE_SNAPSHOT:
|
|
HANDLE_TASK(TTaskType::RELEASE_SNAPSHOT, all_tasks, run_release_snapshot_task,
|
|
ReleaseSnapshotAgentTaskRequest, release_snapshot_req, _exec_env);
|
|
break;
|
|
case TTaskType::MOVE:
|
|
HANDLE_TASK(TTaskType::MOVE, all_tasks, run_move_dir_task, MoveDirAgentTaskRequest, move_dir_req,
|
|
_exec_env);
|
|
break;
|
|
case TTaskType::UPDATE_TABLET_META_INFO:
|
|
HANDLE_TASK(TTaskType::UPDATE_TABLET_META_INFO, all_tasks, run_update_meta_info_task,
|
|
UpdateTabletMetaInfoAgentTaskRequest, update_tablet_meta_info_req, _exec_env);
|
|
break;
|
|
case TTaskType::DROP_AUTO_INCREMENT_MAP:
|
|
HANDLE_TASK(TTaskType::DROP_AUTO_INCREMENT_MAP, all_tasks, run_drop_auto_increment_map_task,
|
|
DropAutoIncrementMapAgentTaskRequest, drop_auto_increment_map_req, _exec_env);
|
|
break;
|
|
case TTaskType::REMOTE_SNAPSHOT:
|
|
HANDLE_TASK(TTaskType::REMOTE_SNAPSHOT, all_tasks, run_remote_snapshot_task, RemoteSnapshotAgentTaskRequest,
|
|
remote_snapshot_req, _exec_env);
|
|
break;
|
|
case TTaskType::REPLICATE_SNAPSHOT:
|
|
HANDLE_TASK(TTaskType::REPLICATE_SNAPSHOT, all_tasks, run_replicate_snapshot_task,
|
|
ReplicateSnapshotAgentTaskRequest, replicate_snapshot_req, _exec_env);
|
|
break;
|
|
case TTaskType::REALTIME_PUSH:
|
|
case TTaskType::PUSH: {
|
|
// should not run here
|
|
break;
|
|
}
|
|
case TTaskType::ALTER:
|
|
HANDLE_TASK(TTaskType::ALTER, all_tasks, run_alter_tablet_task, AlterTabletAgentTaskRequest,
|
|
alter_tablet_req_v2, _exec_env);
|
|
break;
|
|
default:
|
|
ret_st = Status::InvalidArgument(strings::Substitute("tasks(type=$0) has wrong task type", task_type));
|
|
LOG(WARNING) << "fail to batch submit task. reason: " << ret_st.message();
|
|
}
|
|
}
|
|
|
|
// batch submit push tasks
|
|
if (!push_divider.empty()) {
|
|
LOG(INFO) << "begin batch submit task: " << tasks[0].task_type;
|
|
for (const auto& push_item : push_divider) {
|
|
const auto& push_type = push_item.first;
|
|
auto all_push_tasks = push_item.second;
|
|
switch (push_type) {
|
|
case TPushType::LOAD_V2:
|
|
_push_workers->submit_tasks(all_push_tasks);
|
|
break;
|
|
case TPushType::DELETE:
|
|
case TPushType::CANCEL_DELETE:
|
|
_delete_workers->submit_tasks(all_push_tasks);
|
|
break;
|
|
default:
|
|
ret_st = Status::InvalidArgument(strings::Substitute("tasks(type=$0, push_type=$1) has wrong task type",
|
|
TTaskType::PUSH, push_type));
|
|
LOG(WARNING) << "fail to batch submit push task. reason: " << ret_st.message();
|
|
}
|
|
}
|
|
}
|
|
|
|
ret_st.to_thrift(&agent_result.status);
|
|
}
|
|
|
|
void AgentServer::Impl::make_snapshot(TAgentResult& t_agent_result, const TSnapshotRequest& snapshot_request) {
|
|
std::string snapshot_path;
|
|
auto st = SnapshotManager::instance()->make_snapshot(snapshot_request, &snapshot_path);
|
|
if (!st.ok()) {
|
|
LOG(WARNING) << "fail to make_snapshot. tablet_id:" << snapshot_request.tablet_id << " msg:" << st.to_string();
|
|
} else {
|
|
LOG(INFO) << "success to make_snapshot. tablet_id:" << snapshot_request.tablet_id << " path:" << snapshot_path;
|
|
t_agent_result.__set_snapshot_path(snapshot_path);
|
|
}
|
|
|
|
st.to_thrift(&t_agent_result.status);
|
|
t_agent_result.__set_snapshot_format(snapshot_request.preferred_snapshot_format);
|
|
t_agent_result.__set_allow_incremental_clone(true);
|
|
}
|
|
|
|
void AgentServer::Impl::release_snapshot(TAgentResult& t_agent_result, const std::string& snapshot_path) {
|
|
Status ret_st = SnapshotManager::instance()->release_snapshot(snapshot_path);
|
|
if (!ret_st.ok()) {
|
|
LOG(WARNING) << "Fail to release_snapshot. snapshot_path:" << snapshot_path;
|
|
} else {
|
|
LOG(INFO) << "success to release_snapshot. snapshot_path:" << snapshot_path;
|
|
}
|
|
ret_st.to_thrift(&t_agent_result.status);
|
|
}
|
|
|
|
void AgentServer::Impl::publish_cluster_state(TAgentResult& t_agent_result, const TAgentPublishRequest& request) {
|
|
Status status = Status::NotSupported("deprecated method(publish_cluster_state) was invoked");
|
|
status.to_thrift(&t_agent_result.status);
|
|
}
|
|
|
|
void AgentServer::Impl::update_max_thread_by_type(int type, int new_val) {
|
|
Status st;
|
|
switch (type) {
|
|
case TTaskType::UPLOAD:
|
|
st = _thread_pool_upload->update_max_threads(calc_real_num_threads(new_val));
|
|
break;
|
|
case TTaskType::DOWNLOAD:
|
|
st = _thread_pool_download->update_max_threads(calc_real_num_threads(new_val));
|
|
break;
|
|
case TTaskType::MOVE:
|
|
st = _thread_pool_move_dir->update_max_threads(calc_real_num_threads(new_val));
|
|
break;
|
|
case TTaskType::REMOTE_SNAPSHOT:
|
|
st = _thread_pool_remote_snapshot->update_max_threads(
|
|
calc_real_num_threads(new_val, REPLICATION_CPU_CORES_MULTIPLIER));
|
|
break;
|
|
case TTaskType::REPLICATE_SNAPSHOT:
|
|
st = _thread_pool_replicate_snapshot->update_max_threads(
|
|
calc_real_num_threads(new_val, REPLICATION_CPU_CORES_MULTIPLIER));
|
|
break;
|
|
default: {
|
|
ThreadPool* thread_pool = get_thread_pool(type);
|
|
if (thread_pool) {
|
|
st = thread_pool->update_max_threads(new_val);
|
|
} else {
|
|
LOG(WARNING) << "Failed to update max thread, cannot get thread pool by task type: "
|
|
<< to_string((TTaskType::type)type);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
LOG_IF(ERROR, !st.ok()) << st;
|
|
}
|
|
|
|
#define STOP_IF_NOT_NULL(worker_pool) \
|
|
if (worker_pool != nullptr) { \
|
|
worker_pool->stop(); \
|
|
}
|
|
|
|
void AgentServer::Impl::stop_task_worker_pool(TaskWorkerType type) const {
|
|
switch (type) {
|
|
case TaskWorkerType::PUSH:
|
|
STOP_IF_NOT_NULL(_push_workers);
|
|
break;
|
|
case TaskWorkerType::PUBLISH_VERSION:
|
|
STOP_IF_NOT_NULL(_publish_version_workers);
|
|
break;
|
|
case TaskWorkerType::DELETE:
|
|
STOP_IF_NOT_NULL(_delete_workers);
|
|
break;
|
|
case TaskWorkerType::REPORT_TASK:
|
|
STOP_IF_NOT_NULL(_report_task_workers);
|
|
break;
|
|
case TaskWorkerType::REPORT_DISK_STATE:
|
|
STOP_IF_NOT_NULL(_report_disk_state_workers);
|
|
break;
|
|
case TaskWorkerType::REPORT_OLAP_TABLE:
|
|
STOP_IF_NOT_NULL(_report_tablet_workers);
|
|
break;
|
|
case TaskWorkerType::REPORT_WORKGROUP:
|
|
STOP_IF_NOT_NULL(_report_workgroup_workers);
|
|
STOP_IF_NOT_NULL(_report_resource_usage_workers);
|
|
break;
|
|
case TaskWorkerType::REPORT_DATACACHE_METRICS:
|
|
STOP_IF_NOT_NULL(_report_datacache_metrics_workers);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
ThreadPool* AgentServer::Impl::get_thread_pool(int type) const {
|
|
// TODO: more thread pools.
|
|
ThreadPool* ret = nullptr;
|
|
switch (type) {
|
|
case TTaskType::PUBLISH_VERSION:
|
|
ret = _thread_pool_publish_version.get();
|
|
break;
|
|
case TTaskType::CLONE:
|
|
ret = _thread_pool_clone.get();
|
|
break;
|
|
case TTaskType::DROP:
|
|
ret = _thread_pool_drop.get();
|
|
break;
|
|
case TTaskType::CREATE:
|
|
ret = _thread_pool_create_tablet.get();
|
|
break;
|
|
case TTaskType::STORAGE_MEDIUM_MIGRATE:
|
|
ret = _thread_pool_storage_medium_migrate.get();
|
|
break;
|
|
case TTaskType::MAKE_SNAPSHOT:
|
|
ret = _thread_pool_make_snapshot.get();
|
|
break;
|
|
case TTaskType::RELEASE_SNAPSHOT:
|
|
ret = _thread_pool_release_snapshot.get();
|
|
break;
|
|
case TTaskType::CHECK_CONSISTENCY:
|
|
ret = _thread_pool_check_consistency.get();
|
|
break;
|
|
case TTaskType::COMPACTION:
|
|
ret = _thread_pool_compaction.get();
|
|
break;
|
|
case TTaskType::COMPACTION_CONTROL:
|
|
ret = _thread_pool_compaction_control.get();
|
|
break;
|
|
case TTaskType::UPDATE_SCHEMA:
|
|
ret = _thread_pool_update_schema.get();
|
|
break;
|
|
case TTaskType::UPLOAD:
|
|
ret = _thread_pool_upload.get();
|
|
break;
|
|
case TTaskType::DOWNLOAD:
|
|
ret = _thread_pool_download.get();
|
|
break;
|
|
case TTaskType::MOVE:
|
|
ret = _thread_pool_move_dir.get();
|
|
break;
|
|
case TTaskType::UPDATE_TABLET_META_INFO:
|
|
ret = _thread_pool_update_tablet_meta_info.get();
|
|
break;
|
|
case TTaskType::ALTER:
|
|
ret = _thread_pool_alter_tablet.get();
|
|
break;
|
|
case TTaskType::CLEAR_TRANSACTION_TASK:
|
|
ret = _thread_pool_clear_transaction.get();
|
|
break;
|
|
case TTaskType::DROP_AUTO_INCREMENT_MAP:
|
|
ret = _thread_pool_drop_auto_increment_map.get();
|
|
break;
|
|
case TTaskType::REMOTE_SNAPSHOT:
|
|
ret = _thread_pool_remote_snapshot.get();
|
|
break;
|
|
case TTaskType::REPLICATE_SNAPSHOT:
|
|
ret = _thread_pool_replicate_snapshot.get();
|
|
break;
|
|
case TTaskType::PUSH:
|
|
case TTaskType::REALTIME_PUSH:
|
|
case TTaskType::ROLLUP:
|
|
case TTaskType::SCHEMA_CHANGE:
|
|
case TTaskType::CANCEL_DELETE:
|
|
case TTaskType::CLEAR_REMOTE_FILE:
|
|
case TTaskType::CLEAR_ALTER_TASK:
|
|
case TTaskType::RECOVER_TABLET:
|
|
case TTaskType::STREAM_LOAD:
|
|
case TTaskType::INSTALL_PLUGIN:
|
|
case TTaskType::UNINSTALL_PLUGIN:
|
|
case TTaskType::NUM_TASK_TYPE:
|
|
break;
|
|
}
|
|
TEST_SYNC_POINT_CALLBACK("AgentServer::Impl::get_thread_pool:1", &ret);
|
|
return ret;
|
|
}
|
|
|
|
AgentServer::AgentServer(ExecEnv* exec_env, bool is_compute_node)
|
|
: _impl(std::make_unique<AgentServer::Impl>(exec_env, is_compute_node)) {}
|
|
|
|
AgentServer::~AgentServer() = default;
|
|
|
|
void AgentServer::submit_tasks(TAgentResult& agent_result, const std::vector<TAgentTaskRequest>& tasks) {
|
|
_impl->submit_tasks(agent_result, tasks);
|
|
}
|
|
|
|
void AgentServer::make_snapshot(TAgentResult& agent_result, const TSnapshotRequest& snapshot_request) {
|
|
_impl->make_snapshot(agent_result, snapshot_request);
|
|
}
|
|
|
|
void AgentServer::release_snapshot(TAgentResult& agent_result, const std::string& snapshot_path) {
|
|
_impl->release_snapshot(agent_result, snapshot_path);
|
|
}
|
|
|
|
void AgentServer::publish_cluster_state(TAgentResult& agent_result, const TAgentPublishRequest& request) {
|
|
_impl->publish_cluster_state(agent_result, request);
|
|
}
|
|
|
|
void AgentServer::update_max_thread_by_type(int type, int new_val) {
|
|
_impl->update_max_thread_by_type(type, new_val);
|
|
}
|
|
|
|
ThreadPool* AgentServer::get_thread_pool(int type) const {
|
|
return _impl->get_thread_pool(type);
|
|
}
|
|
|
|
void AgentServer::stop_task_worker_pool(TaskWorkerType type) const {
|
|
return _impl->stop_task_worker_pool(type);
|
|
}
|
|
|
|
Status AgentServer::init() {
|
|
return _impl->init();
|
|
}
|
|
|
|
void AgentServer::stop() {
|
|
return _impl->stop();
|
|
}
|
|
|
|
} // namespace starrocks
|