NVIDIA-NeMo · guyueh1 · Oct 2, 2025 · Oct 2, 2025 · Oct 3, 2025 · Oct 6, 2025
diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -814,6 +814,9 @@ def __init__(
         ## used for streaming update inference engine weights
         self._held_gather_buffer = None
 
+        os.environ["NCCL_DEBUG"] = "INFO"
+        os.environ["NCCL_DEBUG_SUBSYS"] = "TUNING"
+
     def init_collective(self, ip: str, port: int, world_size: int) -> None:
         """Initialize the collective communication."""
         from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator

@@ -22,9 +22,10 @@
 from transformers.models.qwen2.configuration_qwen2 import Qwen2Config
 from transformers.models.qwen3.configuration_qwen3 import Qwen3Config
 from transformers.models.qwen3_moe.configuration_qwen3_moe import Qwen3MoeConfig
+from transformers.models.deepseek_v3.configuration_deepseek_v3 import DeepSeekV3Config
 
 from nemo_rl.models.policy.utils import sliding_window_overwrite
-from nemo_rl.utils.flops_formulas import FLOPSConfig, llama, qwen2, qwen3
+from nemo_rl.utils.flops_formulas import FLOPSConfig, llama, qwen2, qwen3, deepseekv3
 
 
 def get_default_hf_config(model_name: str) -> PretrainedConfig:
@@ -76,6 +77,27 @@ def convert_config_to_flops_config(
             attention_heads=config.num_attention_heads,
             vocab_size=config.vocab_size,
         ), llama
+    elif isinstance(config, DeepSeekV3Config):
+        return FLOPSConfig(
+            gbs=0,
+            hs=config.hidden_size,
+            layers=config.num_hidden_layers,
+            ffn_hs=config.intermediate_size,
+            attention_heads=config.num_attention_heads,
+            moe_router_topk=config.num_experts_per_tok,
+            query_groups=config.num_key_value_heads,
+            vocab_size=config.vocab_size,
+            q_lora_rank=config.q_lora_rank,
+            kv_lora_rank=config.kv_lora_rank,
+            qk_head_dim=config.qk_nope_head_dim,
+            qk_pos_emb_head_dim=config.qk_rope_head_dim,
+            v_head_dim=config.v_head_dim,
+            moe_layer_freq=1,
+            moe_shared_expert_intermediate_size=config.moe_intermediate_size,
+            moe_ffn_hidden_size=config.moe_intermediate_size,
+            mtp_num_layers=0,
+            causal_self_attn=True,
+        ), deepseekv3
     else:
         raise ValueError(f"Unsupported config type: {type(config)}")