save debug comments

brb-nv · brb-nv · commit 2598513f8bf2 · 2025-12-15T00:23:20.000Z
diff --git a/examples/disaggregated/clients/long_prompts.json b/examples/disaggregated/clients/long_prompts.json
@@ -1,3 +1 @@
-[
-    "Place of birth\nThe place of birth (POB) or birthplace is the place where a person was born. This place is often used in legal documents, together with name and date of birth, to uniquely identify a person. Practice regarding whether this place should be a country, a territory or a city/town/locality differs in different countries, but often city or territory is used for native-born citizen passports and countries for foreign-born ones.\nAs a general rule with respect to passports, if the place of birth is to be a country, it's determined to be the country that currently has sovereignty over the actual place of birth, regardless of when the birth actually occurred. The place of birth is not necessarily the place where the parents of the new baby live. If the baby is born in a hospital in another place, that place is the place of birth. In many countries, this also means that the government requires that the birth of the new baby is registered in the place of birth.\nSome countries place less or no importance on the place of birth, instead using alternative geographical characteristics for the purpose of identity documents. For example, Sweden has used the concept of födelsehemort (\"domicile of birth\") since 1947. This means that the domicile of the baby's mother is the registered place of birth.\nSimilarly, Switzerland uses the concept of place of origin. A child born to Swiss parents is automatically assigned the place of origin of the parent with the same last name, so the child either gets their mother's or father's place of origin. A child born to one Swiss parent and one foreign parent acquires the place of origin of their Swiss parent. In a Swiss passport and identity card, the holder's place of origin is stated, not their place of birth. In Japan, the registered domicile is a similar concept.\nIn some countries (primarily in the Americas), the place of birth automatically determines the nationality of the baby, a practice often referred to by the Latin phrase jus soli."
-]
+["Global warming is the long term rise in Earth temperature caused by greenhouse gases from human activity, burning fossil fuels, and deforestation. It leads to melting ice, rising seas, and extreme weather that threaten ecosystems, wildlife, and people. Urgent global action is "]
diff --git a/tensorrt_llm/_torch/distributed/communicator.py b/tensorrt_llm/_torch/distributed/communicator.py
@@ -394,14 +394,17 @@ def recv_object(self, src, tag=0):
         return mpi_recv_object(src, tag)
 
     def create_tp_comm(self):
+        print(f"[MPIDist::create_tp_comm] rank: {self.mapping.rank}, tp_rank: {self.mapping.tp_rank}, tp_group: {self.mapping.tp_group}")
         new_group = mpi_comm().group.Incl(self.mapping.tp_group)
         self.tp_comm = mpi_comm().Create_group(new_group)
 
     def create_pp_comm(self):
+        print(f"[MPIDist::create_pp_comm] rank: {self.mapping.rank}, pp_rank: {self.mapping.pp_rank}, pp_group: {self.mapping.pp_group}")
         new_group = mpi_comm().group.Incl(self.mapping.pp_group)
         self.pp_comm = mpi_comm().Create_group(new_group)
 
     def create_cp_comm(self):
+        print(f"[MPIDist::create_cp_comm] rank: {self.mapping.rank}, cp_rank: {self.mapping.cp_rank}, cp_group: {self.mapping.cp_group}")
         new_group = mpi_comm().group.Incl(self.mapping.cp_group)
         self.cp_comm = mpi_comm().Create_group(new_group)
 
diff --git a/tensorrt_llm/_torch/models/modeling_deepseekv3.py b/tensorrt_llm/_torch/models/modeling_deepseekv3.py
@@ -1600,6 +1600,15 @@ def forward(
         return_context_logits: bool = False,
         **kwargs,
     ) -> torch.Tensor:
+        print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] input_ids: {input_ids}")
+        print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] position_ids: {position_ids}")
+        print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] helix_is_inactive_rank: {attn_metadata.helix_is_inactive_rank}")
+        print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] kv_cache_params.num_cached_tokens_per_seq: {attn_metadata.kv_cache_params.num_cached_tokens_per_seq}")
+        print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] kv_lens_cuda: {attn_metadata.kv_lens_cuda}")
+        assert attn_metadata.kv_cache_manager.tokens_per_block == 32
+        block_ids_per_seq = attn_metadata.kv_cache_manager.get_batch_cache_indices(attn_metadata.request_ids)
+        for request_id, block_ids in zip(attn_metadata.request_ids, block_ids_per_seq):
+            print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] request_id: {request_id}, block_ids: {block_ids}")
         return super().forward(attn_metadata=attn_metadata,
                                input_ids=input_ids,
                                position_ids=position_ids,
diff --git a/tensorrt_llm/_torch/pyexecutor/executor_request_queue.py b/tensorrt_llm/_torch/pyexecutor/executor_request_queue.py
@@ -684,6 +684,9 @@ def _merge_helix_requests(self, new_requests: list[RequestQueueItem],
                 input_ids_this_rank = input_ids_this_rank[:-padding_len]
                 position_ids_this_rank = position_ids_this_rank[:-padding_len]
 
+            print(f"[ExecutorRequestQueue::_merge_helix_requests][rank {self.dist.rank}][cp_rank {curr_cp_rank}]: input_ids_this_rank: {input_ids_this_rank}")
+            print(f"[ExecutorRequestQueue::_merge_helix_requests][rank {self.dist.rank}][cp_rank {curr_cp_rank}]: position_ids_this_rank: {position_ids_this_rank}")
+
             req = executor_request_to_llm_request(
                 req_id=req_item.id,
                 executor_request=req_item.request,
diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor.py b/tensorrt_llm/_torch/pyexecutor/py_executor.py
@@ -2030,6 +2030,7 @@ def _prepare_disagg_gen_transmission_complete(self, scheduled_batch):
 
         for req in scheduled_batch.generation_requests:
             if req.is_disagg_generation_transmission_complete:
+                print(f"[PyExecutor::_prepare_disagg_gen_transmission_complete][rank {self.dist.rank}][cp_rank {self.dist.cp_rank}]: TRANSMISSION COMPLETE for request ID: {req.py_request_id}")
                 req.state = LlmRequestState.GENERATION_IN_PROGRESS
                 req.context_current_position = req.prompt_len
                 req.decoding_iter = 1
diff --git a/tensorrt_llm/_torch/pyexecutor/resource_manager.py b/tensorrt_llm/_torch/pyexecutor/resource_manager.py
@@ -474,7 +474,9 @@ def prepare_resources(self, scheduled_batch: ScheduledRequests):
                         req.py_helix_is_inactive_rank = True
                 # Skip allocating KV cache at decode for inactive helix ranks.
                 if req.py_helix_is_inactive_rank:
+                    print(f"[ResourceManager::prepare_resources][rank {self.mapping.rank}][cp_rank {self.mapping.cp_rank}] Skipping KV allocation for request {req.py_request_id}.")
                     continue
+                print(f"[ResourceManager::prepare_resources][rank {self.mapping.rank}][cp_rank {self.mapping.cp_rank}] Adding KV allocation for request {req.py_request_id}.")
                 self.impl.add_token(req.py_request_id)
                 for _ in range(get_draft_token_length(req)):
                     self.impl.add_token(req.py_request_id)
diff --git a/tests/integration/defs/disaggregated/test_configs/disagg_config_ctxtp2_gentp1cp2_deepseek_v3_lite_bf16_tllm_gen.yaml b/tests/integration/defs/disaggregated/test_configs/disagg_config_ctxtp2_gentp1cp2_deepseek_v3_lite_bf16_tllm_gen.yaml
@@ -20,7 +20,7 @@ context_servers:
       - "localhost:8001"
 generation_servers:
   num_instances: 1
-  tensor_parallel_size: 1
+  tensor_parallel_size: 2
   pipeline_parallel_size: 1
   context_parallel_size: 2
   enable_chunked_prefill: False
diff --git a/tests/integration/defs/disaggregated/test_disaggregated.py b/tests/integration/defs/disaggregated/test_disaggregated.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import contextlib
+import glob
 import os
 import re
 import subprocess
@@ -272,7 +273,7 @@ def get_test_config(test_desc, example_dir, test_root):
         "llama4_kv_cache_overflow":
         (8, f"{test_configs_root}/disagg_config_llama4_kv_cache_overflow.yaml"),
         "deepseek_v3_lite_bf16_tllm_gen_helix":
-        (4,
+        (6,
          f"{test_configs_root}/disagg_config_ctxtp2_gentp1cp2_deepseek_v3_lite_bf16_tllm_gen.yaml"
          ),
     }
@@ -332,16 +333,15 @@ def run_client_tests(example_dir,
                      use_ray=False):
     """Run client tests against the disaggregated server."""
     client_dir = f"{example_dir}/clients"
-    for _ in range(num_iters):
+    # Use only 1 iteration for long prompts test
+    effective_num_iters = 1 if prompt_file == "long_prompts.json" else num_iters
+    for _ in range(effective_num_iters):
         client_cmd = [
             'python3', f'{client_dir}/disagg_client.py', '-c', f'{config_file}',
             '-p', f'{client_dir}/{prompt_file}', '--ignore-eos',
             '--server-start-timeout',
             str(server_start_timeout)
         ]
-        if prompt_file == "long_prompts.json":
-            # Use max_tokens 4 for long prompts to reduce test time
-            client_cmd.extend(['--max-tokens', '4'])
 
         # Prepare poll processes
         worker_processes = []
@@ -354,11 +354,13 @@ def run_client_tests(example_dir,
         poll_procs = worker_processes + [server_proc]
         check_call(client_cmd, env=env, poll_procs=poll_procs)
 
-        # Streaming client run
-        streaming_client_cmd = client_cmd + [
-            '--streaming', '-o', 'output_streaming.json'
-        ]
-        check_call(streaming_client_cmd, env=env, poll_procs=poll_procs)
+        # Skip streaming for long prompts test
+        if prompt_file != "long_prompts.json":
+            # Streaming client run
+            streaming_client_cmd = client_cmd + [
+                '--streaming', '-o', 'output_streaming.json'
+            ]
+            check_call(streaming_client_cmd, env=env, poll_procs=poll_procs)
 
         # Run the chat completion endpoint test only for TinyLlama
         if test_desc == "overlap" or test_desc == "trtllm_sampler":
@@ -374,8 +376,15 @@ def run_client_tests(example_dir,
                        env=env,
                        poll_procs=poll_procs)
 
-        # Skip output verification for long prompts test
+        # Print and skip further verification for long prompts test
         if prompt_file == "long_prompts.json":
+            # Print all output_*.json file contents
+            for output_file in glob.glob('output_*.json') + ['output.json']:
+                if os.path.exists(output_file):
+                    with open(output_file, 'r') as f:
+                        content = f.read()
+                        logger.info(f"-------- {output_file} content --------")
+                        logger.info(content)
             continue
 
         if extra_endpoints_test is not None:
@@ -1928,7 +1937,7 @@ def test_llama4_long_context_kv_cache_overflow(disaggregated_test_root,
                                  cwd=llm_venv.get_working_directory())
 
 
-@pytest.mark.skip_less_device(4)
+@pytest.mark.skip_less_device(8)
 @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-bf16'],
                          indirect=True)
 def test_disaggregated_deepseek_v3_lite_bf16_tllm_gen_helix(
diff --git a/tests/unittest/_torch/modules/test_mla_helix_redo.py b/tests/unittest/_torch/modules/test_mla_helix_redo.py

-Original file line number
+Diff line change
@@ @@ -1,3 +1 @@ @@
 -[
 -    "Place of birth\nThe place of birth (POB) or birthplace is the place where a person was born. This place is often used in legal documents, together with name and date of birth, to uniquely identify a person. Practice regarding whether this place should be a country, a territory or a city/town/locality differs in different countries, but often city or territory is used for native-born citizen passports and countries for foreign-born ones.\nAs a general rule with respect to passports, if the place of birth is to be a country, it's determined to be the country that currently has sovereignty over the actual place of birth, regardless of when the birth actually occurred. The place of birth is not necessarily the place where the parents of the new baby live. If the baby is born in a hospital in another place, that place is the place of birth. In many countries, this also means that the government requires that the birth of the new baby is registered in the place of birth.\nSome countries place less or no importance on the place of birth, instead using alternative geographical characteristics for the purpose of identity documents. For example, Sweden has used the concept of födelsehemort (\"domicile of birth\") since 1947. This means that the domicile of the baby's mother is the registered place of birth.\nSimilarly, Switzerland uses the concept of place of origin. A child born to Swiss parents is automatically assigned the place of origin of the parent with the same last name, so the child either gets their mother's or father's place of origin. A child born to one Swiss parent and one foreign parent acquires the place of origin of their Swiss parent. In a Swiss passport and identity card, the holder's place of origin is stated, not their place of birth. In Japan, the registered domicile is a similar concept.\nIn some countries (primarily in the Americas), the place of birth automatically determines the nationality of the baby, a practice often referred to by the Latin phrase jus soli."
 -]
 +["Global warming is the long term rise in Earth temperature caused by greenhouse gases from human activity, burning fossil fuels, and deforestation. It leads to melting ice, rising seas, and extreme weather that threaten ecosystems, wildlife, and people. Urgent global action is "]