@@ -19,20 +19,6 @@ std::string GetEnvAsStr(const std::string &name, const std::string &default_valu
1919 return value ? std::string (value) : default_value;
2020}
2121
22- #ifdef USE_NCCL
23- ncclUniqueId StringToNcclId (const std::string &str) {
24- ncclUniqueId id;
25- for (int i = 0 ; i < NCCL_UNIQUE_ID_BYTES; ++i) {
26- unsigned int byte;
27- std::stringstream ss;
28- ss << std::hex << str.substr (i * 2 , 2 );
29- ss >> byte;
30- id.internal [i] = static_cast <char >(byte);
31- }
32- return id;
33- }
34- #endif
35-
3622} // namespace
3723
3824namespace infini_train ::nn::parallel::global {
@@ -125,10 +111,6 @@ void GlobalEnv::Init(int nthread_per_process, int tensor_parallel_size, bool seq
125111 // FIXME(zbl): set PP size
126112 layout_.sizes [PP] = 1 ;
127113 layout_.InitStrides ();
128- // FIXME(dcj): what if no nccl id?
129- #ifdef USE_NCCL
130- nccl_id_ = StringToNcclId (GetEnvAsStr (" NCCL_UNIQUE_ID" , " " ));
131- #endif
132114
133115 initialized_ = true ;
134116}
@@ -225,34 +207,6 @@ inline void AppendAxisGroups(std::ostringstream &oss, const Layout &L, Axis targ
225207 }
226208}
227209
228- /* *
229- * @brief Generate a human-readable overview of all parallel communication groups.
230- *
231- * The output is intended for debugging, logging, and runtime verification of
232- * distributed parallelism configuration.
233- *
234- * @param L The Layout describing DP / TP / PP sizes and axis ordering.
235- * @param skip_trivial_axes
236- * If true, axes whose size <= 1(i.e. parallel strategy that is not enabled)
237- * will be marked as "unenabled" and their detailed group listing will be skipped.
238- *
239- * @return A formatted string containing the full overview of process groups.
240- *
241- * Example:
242- * === Parallel Communication Groups ===
243- * world_size = 8, config: {DP=2, TP=4, PP=1}, order: {DP -> TP -> PP}
244- * [DP] size=2, num_groups=4
245- * - DP 0 (dp=-, tp=0, pp=0): [0, 4]
246- * - DP 1 (dp=-, tp=1, pp=0): [1, 5]
247- * - DP 2 (dp=-, tp=2, pp=0): [2, 6]
248- * - DP 3 (dp=-, tp=3, pp=0): [3, 7]
249- *
250- * [TP] size=4, num_groups=2
251- * - TP 0 (dp=0, tp=-, pp=0): [0, 1, 2, 3]
252- * - TP 1 (dp=1, tp=-, pp=0): [4, 5, 6, 7]
253- *
254- * [PP] size=1, unenabled
255- */
256210std::string ProcessGroupOverview (const Layout &L, bool skip_trivial_axes) {
257211 std::ostringstream oss;
258212 oss << std::format (" \n === Parallel Communication Groups ===\n "
@@ -276,11 +230,5 @@ std::string ProcessGroupOverview(const Layout &L, bool skip_trivial_axes) {
276230 oss << " \n " ;
277231 return oss.str ();
278232}
279- #ifdef USE_NCCL
280- ncclUniqueId GlobalEnv::nccl_id () const {
281- CHECK (initialized_) << " GlobalEnv is not initialized!" ;
282- return nccl_id_;
283- }
284- #endif
285233
286234} // namespace infini_train::nn::parallel::global
0 commit comments