@@ -39,6 +39,10 @@ namespace doris {
3939
4040bvar::Adder<uint64_t > g_file_cache_warm_up_cache_async_submitted_segment_num (
4141 " file_cache_warm_up_cache_async_submitted_segment_num" );
42+ bvar::Adder<uint64_t > g_file_cache_warm_up_cache_async_submitted_task_num (
43+ " file_cache_warm_up_cache_async_submitted_task_num" );
44+ bvar::Adder<uint64_t > g_file_cache_warm_up_cache_async_submitted_tablet_num (
45+ " file_cache_warm_up_cache_async_submitted_tablet_num" );
4246
4347CloudBackendService::CloudBackendService (CloudStorageEngine& engine, ExecEnv* exec_env)
4448 : BaseBackendService(exec_env), _engine(engine) {}
@@ -170,83 +174,89 @@ void CloudBackendService::warm_up_tablets(TWarmUpTabletsResponse& response,
170174
171175void CloudBackendService::warm_up_cache_async (TWarmUpCacheAsyncResponse& response,
172176 const TWarmUpCacheAsyncRequest& request) {
173- std::ostringstream oss;
174- oss << " [" ;
175- for (size_t i = 0 ; i < request.tablet_ids .size () && i < 10 ; ++i) {
176- if (i > 0 ) oss << " ," ;
177- oss << request.tablet_ids [i];
178- }
179- oss << " ]" ;
180- LOG (INFO) << " warm_up_cache_async: enter, request=" << request.host << " :" << request.brpc_port
181- << " , tablets num=" << request.tablet_ids .size () << " , tablet_ids=" << oss.str ();
177+ // just submit the task to the thread pool, no need to wait for the result
178+ auto do_warm_up = [this , request]() {
179+ std::ostringstream oss;
180+ oss << " [" ;
181+ for (size_t i = 0 ; i < request.tablet_ids .size () && i < 10 ; ++i) {
182+ if (i > 0 ) oss << " ," ;
183+ oss << request.tablet_ids [i];
184+ }
185+ oss << " ]" ;
186+ g_file_cache_warm_up_cache_async_submitted_tablet_num << request.tablet_ids .size ();
187+ LOG (INFO) << " warm_up_cache_async: enter, request=" << request.host << " :"
188+ << request.brpc_port << " , tablets num=" << request.tablet_ids .size ()
189+ << " , tablet_ids=" << oss.str ();
182190
183- auto & manager = ExecEnv::GetInstance ()->storage_engine ().to_cloud ().cloud_warm_up_manager ();
184- // Record each tablet in manager
185- for (int64_t tablet_id : request.tablet_ids ) {
186- manager.record_balanced_tablet (tablet_id, request.host , request.brpc_port );
187- }
191+ auto & manager = ExecEnv::GetInstance ()->storage_engine ().to_cloud ().cloud_warm_up_manager ();
192+ // Record each tablet in manager
193+ for (int64_t tablet_id : request.tablet_ids ) {
194+ manager.record_balanced_tablet (tablet_id, request.host , request.brpc_port );
195+ }
188196
189- std::string host = request.host ;
190- auto dns_cache = ExecEnv::GetInstance ()->dns_cache ();
191- if (dns_cache == nullptr ) {
192- LOG (WARNING) << " DNS cache is not initialized, skipping hostname resolve" ;
193- } else if (!is_valid_ip (request.host )) {
194- Status status = dns_cache->get (request.host , &host);
195- if (!status.ok ()) {
196- LOG (WARNING) << " failed to get ip from host " << request.host << " : "
197- << status.to_string ();
198- // Remove failed tablets from tracking
199- manager.remove_balanced_tablets (request.tablet_ids );
197+ std::string host = request.host ;
198+ auto dns_cache = ExecEnv::GetInstance ()->dns_cache ();
199+ if (dns_cache == nullptr ) {
200+ LOG (WARNING) << " DNS cache is not initialized, skipping hostname resolve" ;
201+ } else if (!is_valid_ip (request.host )) {
202+ Status status = dns_cache->get (request.host , &host);
203+ if (!status.ok ()) {
204+ LOG (WARNING) << " failed to get ip from host " << request.host << " : "
205+ << status.to_string ();
206+ return ;
207+ }
208+ }
209+ std::string brpc_addr = get_host_port (host, request.brpc_port );
210+ std::shared_ptr<PBackendService_Stub> brpc_stub =
211+ _exec_env->brpc_internal_client_cache ()->get_new_client_no_cache (brpc_addr);
212+ if (!brpc_stub) {
213+ LOG (WARNING) << " warm_up_cache_async: failed to get brpc_stub for addr " << brpc_addr;
200214 return ;
201215 }
202- }
203- std::string brpc_addr = get_host_port (host, request.brpc_port );
204- Status st = Status::OK ();
205- TStatus t_status;
206- std::shared_ptr<PBackendService_Stub> brpc_stub =
207- _exec_env->brpc_internal_client_cache ()->get_new_client_no_cache (brpc_addr);
208- if (!brpc_stub) {
209- st = Status::RpcError (" Address {} is wrong" , brpc_addr);
210- LOG (WARNING) << " warm_up_cache_async: failed to get brpc_stub for addr " << brpc_addr;
211- // Remove failed tablets from tracking
212- manager.remove_balanced_tablets (request.tablet_ids );
213- return ;
214- }
215- brpc::Controller cntl;
216- PGetFileCacheMetaRequest brpc_request;
217- std::stringstream ss;
218- std::for_each (request.tablet_ids .cbegin (), request.tablet_ids .cend (), [&](int64_t tablet_id) {
219- brpc_request.add_tablet_ids (tablet_id);
220- ss << tablet_id << " ," ;
221- });
222- VLOG_DEBUG << " tablets set: " << ss.str () << " stack: " << get_stack_trace ();
223- PGetFileCacheMetaResponse brpc_response;
216+ PGetFileCacheMetaRequest brpc_request;
217+ std::for_each (request.tablet_ids .cbegin (), request.tablet_ids .cend (),
218+ [&](int64_t tablet_id) { brpc_request.add_tablet_ids (tablet_id); });
224219
225- brpc_stub->get_file_cache_meta_by_tablet_id (&cntl, &brpc_request, &brpc_response, nullptr );
226- VLOG_DEBUG << " warm_up_cache_async: request=" << brpc_request.DebugString ()
227- << " , response=" << brpc_response.DebugString ();
228- if (!cntl.Failed ()) {
229- g_file_cache_warm_up_cache_async_submitted_segment_num
230- << brpc_response.file_cache_block_metas ().size ();
231- auto & file_cache_block_metas = *brpc_response.mutable_file_cache_block_metas ();
232- if (!file_cache_block_metas.empty ()) {
220+ auto run_rpc = [this , brpc_stub,
221+ brpc_addr](PGetFileCacheMetaRequest request_copy) -> Status {
222+ brpc::Controller cntl;
223+ cntl.set_timeout_ms (20 * 1000 ); // 20s
224+ PGetFileCacheMetaResponse brpc_response;
225+ brpc_stub->get_file_cache_meta_by_tablet_id (&cntl, &request_copy, &brpc_response,
226+ nullptr );
227+ if (cntl.Failed ()) {
228+ LOG (WARNING) << " warm_up_cache_async: brpc call failed, addr=" << brpc_addr
229+ << " , error=" << cntl.ErrorText ()
230+ << " , error code=" << cntl.ErrorCode ();
231+ return Status::RpcError (" {} isn't connected, error code={}" , brpc_addr,
232+ cntl.ErrorCode ());
233+ }
234+ VLOG_DEBUG << " warm_up_cache_async: request=" << request_copy.DebugString ()
235+ << " , response=" << brpc_response.DebugString ();
236+ g_file_cache_warm_up_cache_async_submitted_segment_num
237+ << brpc_response.file_cache_block_metas ().size ();
233238 _engine.file_cache_block_downloader ().submit_download_task (
234- std::move (file_cache_block_metas));
235- LOG (INFO) << " warm_up_cache_async: successfully submitted download task for tablets="
236- << oss.str ();
237- } else {
238- LOG (INFO) << " warm_up_cache_async: no file cache block meta found, addr=" << brpc_addr;
239- manager.remove_balanced_tablets (request.tablet_ids );
239+ std::move (*brpc_response.mutable_file_cache_block_metas ()));
240+ return Status::OK ();
241+ };
242+
243+ Status rpc_status = run_rpc (std::move (brpc_request));
244+ if (!rpc_status.ok ()) {
245+ LOG (WARNING) << " warm_up_cache_async: rpc failed for addr=" << brpc_addr
246+ << " , status=" << rpc_status;
240247 }
241- } else {
242- st = Status::RpcError (" {} isn't connected" , brpc_addr);
243- // Remove failed tablets from tracking
244- manager.remove_balanced_tablets (request.tablet_ids );
245- LOG (WARNING) << " warm_up_cache_async: brpc call failed, addr=" << brpc_addr
246- << " , error=" << cntl.ErrorText ();
248+ };
249+ g_file_cache_warm_up_cache_async_submitted_task_num << 1 ;
250+ Status submit_st = _engine.warmup_cache_async_thread_pool ().submit_func (std::move (do_warm_up));
251+ if (!submit_st.ok ()) {
252+ LOG (WARNING) << " warm_up_cache_async: fail to submit heavy task to "
253+ " warmup_cache_async_thread_pool, status="
254+ << submit_st.to_string () << " , execute synchronously" ;
255+ do_warm_up ();
247256 }
248- st.to_thrift (&t_status);
249- response.status = t_status;
257+ TStatus t_status;
258+ submit_st.to_thrift (&t_status);
259+ response.status = std::move (t_status);
250260}
251261
252262void CloudBackendService::check_warm_up_cache_async (TCheckWarmUpCacheAsyncResponse& response,
0 commit comments