@@ -141,6 +141,8 @@ void WorkerServer::create_spawn_server(int local_rank,
141141 const char * block_size_ptr = block_size_str.c_str ();
142142 auto enable_shm_str = std::to_string (options.enable_shm ());
143143 const char * enable_shm_ptr = enable_shm_str.c_str ();
144+ auto is_local_str = std::to_string (options.is_local ());
145+ const char * is_local_ptr = is_local_str.c_str ();
144146 std::string spawn_worker_bin_path =
145147 options.spawn_worker_path () + " /spawn_worker" ;
146148 LOG (INFO) << " Spawn worker path: " << spawn_worker_bin_path;
@@ -153,6 +155,7 @@ void WorkerServer::create_spawn_server(int local_rank,
153155 num_decoding_tokens_ptr,
154156 block_size_ptr,
155157 enable_shm_ptr,
158+ is_local_ptr,
156159 nullptr };
157160 pid_t pid;
158161 posix_spawn_file_actions_init (&file_actions_);
@@ -181,14 +184,16 @@ void WorkerServer::prepare_shm(
181184 int dp_local_tp_size = parallel_args.world_size () / parallel_args.dp_size ();
182185 int dp_group = parallel_args.rank () / dp_local_tp_size;
183186
187+ std::string name_prefix =
188+ " xllm_" + net::extract_port (options.master_node_addr ().value ());
184189 string name = ForwardSharedMemoryManager::create_unique_name (
185- dp_group, FORWARD_RAW_INPUT_TYPE, parallel_args.rank ());
190+ name_prefix, dp_group, FORWARD_RAW_INPUT_TYPE, parallel_args.rank ());
186191 input_shm_manager = std::make_unique<ForwardSharedMemoryManager>(
187192 name, PB_INPUT_SHM_SIZE, is_creator, FORWARD_RAW_INPUT_TYPE);
188193 LOG (INFO) << " Create input shared memory manager with name: " << name;
189194
190195 name = ForwardSharedMemoryManager::create_unique_name (
191- dp_group, FORWARD_RAW_OUTPUT_TYPE, parallel_args.rank ());
196+ name_prefix, dp_group, FORWARD_RAW_OUTPUT_TYPE, parallel_args.rank ());
192197 output_shm_manager = std::make_unique<ForwardSharedMemoryManager>(
193198 name, PB_OUTPUT_SHM_SIZE, is_creator, FORWARD_RAW_OUTPUT_TYPE);
194199 LOG (INFO) << " Create output shared memory manager with name: " << name;
@@ -204,31 +209,34 @@ WorkerServer::WorkerServer(int local_worker_idx,
204209 WorkerType worker_type,
205210 bool use_spawn_worker) {
206211 if (worker_type == WorkerType::LLM || worker_type == WorkerType::ELM) {
212+ // TODO: Refactor these code later.
207213 if (use_spawn_worker) {
208214 // start worker in a spawn process(for offline inference worker.)
209215 create_spawn_server (
210216 local_worker_idx, master_node_addr, done, parallel_args, d, options);
211217 return ;
212- }
218+ } else {
219+ std::unique_ptr<ForwardSharedMemoryManager> input_shm_manager = nullptr ;
220+ std::unique_ptr<ForwardSharedMemoryManager> output_shm_manager = nullptr ;
221+ prepare_shm (
222+ parallel_args, options, input_shm_manager, output_shm_manager);
213223
214- std::unique_ptr<ForwardSharedMemoryManager> input_shm_manager = nullptr ;
215- std::unique_ptr<ForwardSharedMemoryManager> output_shm_manager = nullptr ;
216- prepare_shm (parallel_args, options, input_shm_manager, output_shm_manager);
217- // start worker in a thread.
218- worker_thread_ =
219- std::make_unique<std::thread>(&WorkerServer::create_server,
220- this ,
221- std::cref (options),
222- std::ref (done),
223- std::cref (master_node_addr),
224- std::cref (d),
225- parallel_args.world_size (),
226- parallel_args.rank (),
227- parallel_args.dp_size (),
228- local_worker_idx,
229- parallel_args.ep_size (),
230- std::move (input_shm_manager),
231- std::move (output_shm_manager));
224+ // start worker in a thread.
225+ worker_thread_ =
226+ std::make_unique<std::thread>(&WorkerServer::create_server,
227+ this ,
228+ std::cref (options),
229+ std::ref (done),
230+ std::cref (master_node_addr),
231+ std::cref (d),
232+ parallel_args.world_size (),
233+ parallel_args.rank (),
234+ parallel_args.dp_size (),
235+ local_worker_idx,
236+ parallel_args.ep_size (),
237+ std::move (input_shm_manager),
238+ std::move (output_shm_manager));
239+ }
232240 } else {
233241 // TODO: support other model type later.
234242 LOG (ERROR) << " Unsupported model type: " << worker_type;
0 commit comments