Skip to content

Commit 0ece6f3

Browse files
committed
save debug comments
1 parent 115bc25 commit 0ece6f3

File tree

9 files changed

+1250
-16
lines changed

9 files changed

+1250
-16
lines changed
Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1 @@
1-
[
2-
"Place of birth\nThe place of birth (POB) or birthplace is the place where a person was born. This place is often used in legal documents, together with name and date of birth, to uniquely identify a person. Practice regarding whether this place should be a country, a territory or a city/town/locality differs in different countries, but often city or territory is used for native-born citizen passports and countries for foreign-born ones.\nAs a general rule with respect to passports, if the place of birth is to be a country, it's determined to be the country that currently has sovereignty over the actual place of birth, regardless of when the birth actually occurred. The place of birth is not necessarily the place where the parents of the new baby live. If the baby is born in a hospital in another place, that place is the place of birth. In many countries, this also means that the government requires that the birth of the new baby is registered in the place of birth.\nSome countries place less or no importance on the place of birth, instead using alternative geographical characteristics for the purpose of identity documents. For example, Sweden has used the concept of födelsehemort (\"domicile of birth\") since 1947. This means that the domicile of the baby's mother is the registered place of birth.\nSimilarly, Switzerland uses the concept of place of origin. A child born to Swiss parents is automatically assigned the place of origin of the parent with the same last name, so the child either gets their mother's or father's place of origin. A child born to one Swiss parent and one foreign parent acquires the place of origin of their Swiss parent. In a Swiss passport and identity card, the holder's place of origin is stated, not their place of birth. In Japan, the registered domicile is a similar concept.\nIn some countries (primarily in the Americas), the place of birth automatically determines the nationality of the baby, a practice often referred to by the Latin phrase jus soli."
3-
]
1+
["Global warming is the long term rise in Earth temperature caused by greenhouse gases from human activity, burning fossil fuels, and deforestation. It leads to melting ice, rising seas, and extreme weather that threaten ecosystems, wildlife, and people. Urgent global action is "]

tensorrt_llm/_torch/distributed/communicator.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,14 +393,17 @@ def recv_object(self, src, tag=0):
393393
return mpi_recv_object(src, tag)
394394

395395
def create_tp_comm(self):
396+
print(f"[MPIDist::create_tp_comm] rank: {self.mapping.rank}, tp_rank: {self.mapping.tp_rank}, tp_group: {self.mapping.tp_group}")
396397
new_group = mpi_comm().group.Incl(self.mapping.tp_group)
397398
self.tp_comm = mpi_comm().Create_group(new_group)
398399

399400
def create_pp_comm(self):
401+
print(f"[MPIDist::create_pp_comm] rank: {self.mapping.rank}, pp_rank: {self.mapping.pp_rank}, pp_group: {self.mapping.pp_group}")
400402
new_group = mpi_comm().group.Incl(self.mapping.pp_group)
401403
self.pp_comm = mpi_comm().Create_group(new_group)
402404

403405
def create_cp_comm(self):
406+
print(f"[MPIDist::create_cp_comm] rank: {self.mapping.rank}, cp_rank: {self.mapping.cp_rank}, cp_group: {self.mapping.cp_group}")
404407
new_group = mpi_comm().group.Incl(self.mapping.cp_group)
405408
self.cp_comm = mpi_comm().Create_group(new_group)
406409

tensorrt_llm/_torch/models/modeling_deepseekv3.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1593,6 +1593,15 @@ def forward(
15931593
return_context_logits: bool = False,
15941594
**kwargs,
15951595
) -> torch.Tensor:
1596+
print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] input_ids: {input_ids}")
1597+
print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] position_ids: {position_ids}")
1598+
print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] helix_is_inactive_rank: {attn_metadata.helix_is_inactive_rank}")
1599+
print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] kv_cache_params.num_cached_tokens_per_seq: {attn_metadata.kv_cache_params.num_cached_tokens_per_seq}")
1600+
print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] kv_lens_cuda: {attn_metadata.kv_lens_cuda}")
1601+
assert attn_metadata.kv_cache_manager.tokens_per_block == 32
1602+
block_ids_per_seq = attn_metadata.kv_cache_manager.get_batch_cache_indices(attn_metadata.request_ids)
1603+
for request_id, block_ids in zip(attn_metadata.request_ids, block_ids_per_seq):
1604+
print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] request_id: {request_id}, block_ids: {block_ids}")
15961605
return super().forward(attn_metadata=attn_metadata,
15971606
input_ids=input_ids,
15981607
position_ids=position_ids,

tensorrt_llm/_torch/pyexecutor/executor_request_queue.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,9 @@ def _merge_helix_requests(self, new_requests: list[RequestQueueItem],
684684
input_ids_this_rank = input_ids_this_rank[:-padding_len]
685685
position_ids_this_rank = position_ids_this_rank[:-padding_len]
686686

687+
print(f"[ExecutorRequestQueue::_merge_helix_requests][rank {self.dist.rank}][cp_rank {curr_cp_rank}]: input_ids_this_rank: {input_ids_this_rank}")
688+
print(f"[ExecutorRequestQueue::_merge_helix_requests][rank {self.dist.rank}][cp_rank {curr_cp_rank}]: position_ids_this_rank: {position_ids_this_rank}")
689+
687690
req = executor_request_to_llm_request(
688691
req_id=req_item.id,
689692
executor_request=req_item.request,

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1905,6 +1905,7 @@ def _prepare_disagg_gen_transmission_complete(self, scheduled_batch):
19051905

19061906
for req in scheduled_batch.generation_requests:
19071907
if req.is_disagg_generation_transmission_complete:
1908+
print(f"[PyExecutor::_prepare_disagg_gen_transmission_complete][rank {self.dist.rank}][cp_rank {self.dist.cp_rank}]: TRANSMISSION COMPLETE for request ID: {req.py_request_id}")
19081909
req.state = LlmRequestState.GENERATION_IN_PROGRESS
19091910
req.context_current_position = req.prompt_len
19101911
req.decoding_iter = 1

tensorrt_llm/_torch/pyexecutor/resource_manager.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,9 @@ def prepare_resources(self, scheduled_batch: ScheduledRequests):
474474
req.py_helix_is_inactive_rank = True
475475
# Skip allocating KV cache at decode for inactive helix ranks.
476476
if req.py_helix_is_inactive_rank:
477+
print(f"[ResourceManager::prepare_resources][rank {self.mapping.rank}][cp_rank {self.mapping.cp_rank}] Skipping KV allocation for request {req.py_request_id}.")
477478
continue
479+
print(f"[ResourceManager::prepare_resources][rank {self.mapping.rank}][cp_rank {self.mapping.cp_rank}] Adding KV allocation for request {req.py_request_id}.")
478480
self.impl.add_token(req.py_request_id)
479481
for _ in range(get_draft_token_length(req)):
480482
self.impl.add_token(req.py_request_id)

tests/integration/defs/disaggregated/test_configs/disagg_config_ctxtp2_gentp1cp2_deepseek_v3_lite_bf16_tllm_gen.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ context_servers:
2020
- "localhost:8001"
2121
generation_servers:
2222
num_instances: 1
23-
tensor_parallel_size: 1
23+
tensor_parallel_size: 2
2424
pipeline_parallel_size: 1
2525
context_parallel_size: 2
2626
enable_chunked_prefill: False

tests/integration/defs/disaggregated/test_disaggregated.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# limitations under the License.
1515

1616
import contextlib
17+
import glob
1718
import os
1819
import re
1920
import subprocess
@@ -268,7 +269,7 @@ def get_test_config(test_desc, example_dir, test_root):
268269
"llama4_kv_cache_overflow":
269270
(8, f"{test_configs_root}/disagg_config_llama4_kv_cache_overflow.yaml"),
270271
"deepseek_v3_lite_bf16_tllm_gen_helix":
271-
(4,
272+
(6,
272273
f"{test_configs_root}/disagg_config_ctxtp2_gentp1cp2_deepseek_v3_lite_bf16_tllm_gen.yaml"
273274
),
274275
}
@@ -327,16 +328,15 @@ def run_client_tests(example_dir,
327328
use_ray=False):
328329
"""Run client tests against the disaggregated server."""
329330
client_dir = f"{example_dir}/clients"
330-
for _ in range(num_iters):
331+
# Use only 1 iteration for long prompts test
332+
effective_num_iters = 1 if prompt_file == "long_prompts.json" else num_iters
333+
for _ in range(effective_num_iters):
331334
client_cmd = [
332335
'python3', f'{client_dir}/disagg_client.py', '-c', f'{config_file}',
333336
'-p', f'{client_dir}/{prompt_file}', '--ignore-eos',
334337
'--server-start-timeout',
335338
str(server_start_timeout)
336339
]
337-
if prompt_file == "long_prompts.json":
338-
# Use max_tokens 4 for long prompts to reduce test time
339-
client_cmd.extend(['--max-tokens', '4'])
340340

341341
# Prepare poll processes
342342
worker_processes = []
@@ -349,11 +349,13 @@ def run_client_tests(example_dir,
349349
poll_procs = worker_processes + [server_proc]
350350
check_call(client_cmd, env=env, poll_procs=poll_procs)
351351

352-
# Streaming client run
353-
streaming_client_cmd = client_cmd + [
354-
'--streaming', '-o', 'output_streaming.json'
355-
]
356-
check_call(streaming_client_cmd, env=env, poll_procs=poll_procs)
352+
# Skip streaming for long prompts test
353+
if prompt_file != "long_prompts.json":
354+
# Streaming client run
355+
streaming_client_cmd = client_cmd + [
356+
'--streaming', '-o', 'output_streaming.json'
357+
]
358+
check_call(streaming_client_cmd, env=env, poll_procs=poll_procs)
357359

358360
# Run the chat completion endpoint test only for TinyLlama
359361
if test_desc == "overlap" or test_desc == "trtllm_sampler":
@@ -369,8 +371,15 @@ def run_client_tests(example_dir,
369371
env=env,
370372
poll_procs=poll_procs)
371373

372-
# Skip output verification for long prompts test
374+
# Print and skip further verification for long prompts test
373375
if prompt_file == "long_prompts.json":
376+
# Print all output_*.json file contents
377+
for output_file in glob.glob('output_*.json') + ['output.json']:
378+
if os.path.exists(output_file):
379+
with open(output_file, 'r') as f:
380+
content = f.read()
381+
logger.info(f"-------- {output_file} content --------")
382+
logger.info(content)
374383
continue
375384

376385
if extra_endpoints_test is not None:
@@ -1920,7 +1929,7 @@ def test_llama4_long_context_kv_cache_overflow(disaggregated_test_root,
19201929
cwd=llm_venv.get_working_directory())
19211930

19221931

1923-
@pytest.mark.skip_less_device(4)
1932+
@pytest.mark.skip_less_device(8)
19241933
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-bf16'],
19251934
indirect=True)
19261935
def test_disaggregated_deepseek_v3_lite_bf16_tllm_gen_helix(

0 commit comments

Comments
 (0)