From 1effb3699bd9d0f6987c4674a00bca52f1b6a5fe Mon Sep 17 00:00:00 2001 From: koarz <66543806+koarz@users.noreply.github.com> Date: Tue, 30 Dec 2025 11:33:16 +0800 Subject: [PATCH] [fix](core)be core when BeConfDataDirReader::get_data_dir_by_file_path (#59204) During the execution of `init_file_cache_factory`, the following call path is triggered: ```txt init_file_cache_factory -> FileCacheFactory::create_file_cache -> cache->initialize() -> initialize_unlocked -> _storage->init(this) -> FSFileCacheStorage::init() ``` (At this point, a thread named `_cache_background_load_thread` is created, and the remaining operations run within this thread) `-> upgrade_cache_dir_if_necessary -> read_file_cache_version -> FileSystem::open_file -> open_file_impl -> LocalFileReader::LocalFileReader -> BeConfDataDirReader::get_data_dir_by_file_path` After `FSFileCacheStorage::init` completes (spawning the `_cache_background_load_thread`), `ExecEnv::_init` continues to execute `doris::io::BeConfDataDirReader::init_be_conf_data_dir`. This function performs push operations on `be_config_data_dir_list`. Simultaneously, `BeConfDataDirReader::get_data_dir_by_file_path` (running in the background thread) iterates over this same `be_config_data_dir_list`. This leads to a race condition: if `doris::io::BeConfDataDirReader::init_be_conf_data_dir` is inserting data while the vector is being read, two issues arise: 1. Modifying `be_config_data_dir_list` while iterating over it via a range-based for loop results in **Undefined Behavior (UB)**. 2. If `be_config_data_dir_list` triggers a reallocation (expansion) during the insertion, concurrent read operations on its elements will access dangling references, triggering a **heap-use-after-free** error. Since `init_be_conf_data_dir` depends on `cache_paths` derived from `init_file_cache_factory`, we must carefully manage the synchronization sequence to prevent these errors. --- be/src/io/fs/local_file_reader.cpp | 15 +++++++++++++++ be/src/io/fs/local_file_reader.h | 1 + 2 files changed, 16 insertions(+) diff --git a/be/src/io/fs/local_file_reader.cpp b/be/src/io/fs/local_file_reader.cpp index 771e356482f2d8..0291de3fd2dc27 100644 --- a/be/src/io/fs/local_file_reader.cpp +++ b/be/src/io/fs/local_file_reader.cpp @@ -41,15 +41,25 @@ #include "runtime/workload_management/io_throttle.h" #include "util/async_io.h" #include "util/debug_points.h" +#include "util/defer_op.h" #include "util/doris_metrics.h" namespace doris { namespace io { +// 1: initing 2: inited 0: before init +std::atomic_int BeConfDataDirReader::be_config_data_dir_list_state = 0; std::vector BeConfDataDirReader::be_config_data_dir_list; void BeConfDataDirReader::get_data_dir_by_file_path(io::Path* file_path, std::string* data_dir_arg) { + int state = be_config_data_dir_list_state.load(std::memory_order_acquire); + if (state == 0) [[unlikely]] { + return; + } else if (state == 1) [[unlikely]] { + be_config_data_dir_list_state.wait(1); + } + for (const auto& data_dir_info : be_config_data_dir_list) { if (data_dir_info.path.size() >= file_path->string().size()) { continue; @@ -65,6 +75,11 @@ void BeConfDataDirReader::init_be_conf_data_dir( const std::vector& store_paths, const std::vector& spill_store_paths, const std::vector& cache_paths) { + be_config_data_dir_list_state.store(1, std::memory_order_release); + Defer defer {[]() { + be_config_data_dir_list_state.store(2, std::memory_order_release); + be_config_data_dir_list_state.notify_all(); + }}; for (int i = 0; i < store_paths.size(); i++) { DataDirInfo data_dir_info; data_dir_info.path = store_paths[i].path; diff --git a/be/src/io/fs/local_file_reader.h b/be/src/io/fs/local_file_reader.h index 0ffd6ccde9e029..38b4cfa55af3bf 100644 --- a/be/src/io/fs/local_file_reader.h +++ b/be/src/io/fs/local_file_reader.h @@ -35,6 +35,7 @@ struct CachePath; namespace doris::io { struct BeConfDataDirReader { + static std::atomic_int be_config_data_dir_list_state; static std::vector be_config_data_dir_list; static void get_data_dir_by_file_path(Path* file_path, std::string* data_dir_arg);