Skip to content

Commit 2598513

Browse files
committed
save debug comments
1 parent a2db449 commit 2598513

File tree

9 files changed

+1250
-16
lines changed

9 files changed

+1250
-16
lines changed
Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1 @@
1-
[
2-
"Place of birth\nThe place of birth (POB) or birthplace is the place where a person was born. This place is often used in legal documents, together with name and date of birth, to uniquely identify a person. Practice regarding whether this place should be a country, a territory or a city/town/locality differs in different countries, but often city or territory is used for native-born citizen passports and countries for foreign-born ones.\nAs a general rule with respect to passports, if the place of birth is to be a country, it's determined to be the country that currently has sovereignty over the actual place of birth, regardless of when the birth actually occurred. The place of birth is not necessarily the place where the parents of the new baby live. If the baby is born in a hospital in another place, that place is the place of birth. In many countries, this also means that the government requires that the birth of the new baby is registered in the place of birth.\nSome countries place less or no importance on the place of birth, instead using alternative geographical characteristics for the purpose of identity documents. For example, Sweden has used the concept of födelsehemort (\"domicile of birth\") since 1947. This means that the domicile of the baby's mother is the registered place of birth.\nSimilarly, Switzerland uses the concept of place of origin. A child born to Swiss parents is automatically assigned the place of origin of the parent with the same last name, so the child either gets their mother's or father's place of origin. A child born to one Swiss parent and one foreign parent acquires the place of origin of their Swiss parent. In a Swiss passport and identity card, the holder's place of origin is stated, not their place of birth. In Japan, the registered domicile is a similar concept.\nIn some countries (primarily in the Americas), the place of birth automatically determines the nationality of the baby, a practice often referred to by the Latin phrase jus soli."
3-
]
1+
["Global warming is the long term rise in Earth temperature caused by greenhouse gases from human activity, burning fossil fuels, and deforestation. It leads to melting ice, rising seas, and extreme weather that threaten ecosystems, wildlife, and people. Urgent global action is "]

tensorrt_llm/_torch/distributed/communicator.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,14 +394,17 @@ def recv_object(self, src, tag=0):
394394
return mpi_recv_object(src, tag)
395395

396396
def create_tp_comm(self):
397+
print(f"[MPIDist::create_tp_comm] rank: {self.mapping.rank}, tp_rank: {self.mapping.tp_rank}, tp_group: {self.mapping.tp_group}")
397398
new_group = mpi_comm().group.Incl(self.mapping.tp_group)
398399
self.tp_comm = mpi_comm().Create_group(new_group)
399400

400401
def create_pp_comm(self):
402+
print(f"[MPIDist::create_pp_comm] rank: {self.mapping.rank}, pp_rank: {self.mapping.pp_rank}, pp_group: {self.mapping.pp_group}")
401403
new_group = mpi_comm().group.Incl(self.mapping.pp_group)
402404
self.pp_comm = mpi_comm().Create_group(new_group)
403405

404406
def create_cp_comm(self):
407+
print(f"[MPIDist::create_cp_comm] rank: {self.mapping.rank}, cp_rank: {self.mapping.cp_rank}, cp_group: {self.mapping.cp_group}")
405408
new_group = mpi_comm().group.Incl(self.mapping.cp_group)
406409
self.cp_comm = mpi_comm().Create_group(new_group)
407410

tensorrt_llm/_torch/models/modeling_deepseekv3.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1600,6 +1600,15 @@ def forward(
16001600
return_context_logits: bool = False,
16011601
**kwargs,
16021602
) -> torch.Tensor:
1603+
print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] input_ids: {input_ids}")
1604+
print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] position_ids: {position_ids}")
1605+
print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] helix_is_inactive_rank: {attn_metadata.helix_is_inactive_rank}")
1606+
print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] kv_cache_params.num_cached_tokens_per_seq: {attn_metadata.kv_cache_params.num_cached_tokens_per_seq}")
1607+
print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] kv_lens_cuda: {attn_metadata.kv_lens_cuda}")
1608+
assert attn_metadata.kv_cache_manager.tokens_per_block == 32
1609+
block_ids_per_seq = attn_metadata.kv_cache_manager.get_batch_cache_indices(attn_metadata.request_ids)
1610+
for request_id, block_ids in zip(attn_metadata.request_ids, block_ids_per_seq):
1611+
print(f"[DeepseekV3ForCausalLM::forward][rank {self.model_config.mapping.rank}][cp_rank {self.model_config.mapping.cp_rank}] request_id: {request_id}, block_ids: {block_ids}")
16031612
return super().forward(attn_metadata=attn_metadata,
16041613
input_ids=input_ids,
16051614
position_ids=position_ids,

tensorrt_llm/_torch/pyexecutor/executor_request_queue.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,9 @@ def _merge_helix_requests(self, new_requests: list[RequestQueueItem],
684684
input_ids_this_rank = input_ids_this_rank[:-padding_len]
685685
position_ids_this_rank = position_ids_this_rank[:-padding_len]
686686

687+
print(f"[ExecutorRequestQueue::_merge_helix_requests][rank {self.dist.rank}][cp_rank {curr_cp_rank}]: input_ids_this_rank: {input_ids_this_rank}")
688+
print(f"[ExecutorRequestQueue::_merge_helix_requests][rank {self.dist.rank}][cp_rank {curr_cp_rank}]: position_ids_this_rank: {position_ids_this_rank}")
689+
687690
req = executor_request_to_llm_request(
688691
req_id=req_item.id,
689692
executor_request=req_item.request,

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2030,6 +2030,7 @@ def _prepare_disagg_gen_transmission_complete(self, scheduled_batch):
20302030

20312031
for req in scheduled_batch.generation_requests:
20322032
if req.is_disagg_generation_transmission_complete:
2033+
print(f"[PyExecutor::_prepare_disagg_gen_transmission_complete][rank {self.dist.rank}][cp_rank {self.dist.cp_rank}]: TRANSMISSION COMPLETE for request ID: {req.py_request_id}")
20332034
req.state = LlmRequestState.GENERATION_IN_PROGRESS
20342035
req.context_current_position = req.prompt_len
20352036
req.decoding_iter = 1

tensorrt_llm/_torch/pyexecutor/resource_manager.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,9 @@ def prepare_resources(self, scheduled_batch: ScheduledRequests):
474474
req.py_helix_is_inactive_rank = True
475475
# Skip allocating KV cache at decode for inactive helix ranks.
476476
if req.py_helix_is_inactive_rank:
477+
print(f"[ResourceManager::prepare_resources][rank {self.mapping.rank}][cp_rank {self.mapping.cp_rank}] Skipping KV allocation for request {req.py_request_id}.")
477478
continue
479+
print(f"[ResourceManager::prepare_resources][rank {self.mapping.rank}][cp_rank {self.mapping.cp_rank}] Adding KV allocation for request {req.py_request_id}.")
478480
self.impl.add_token(req.py_request_id)
479481
for _ in range(get_draft_token_length(req)):
480482
self.impl.add_token(req.py_request_id)

tests/integration/defs/disaggregated/test_configs/disagg_config_ctxtp2_gentp1cp2_deepseek_v3_lite_bf16_tllm_gen.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ context_servers:
2020
- "localhost:8001"
2121
generation_servers:
2222
num_instances: 1
23-
tensor_parallel_size: 1
23+
tensor_parallel_size: 2
2424
pipeline_parallel_size: 1
2525
context_parallel_size: 2
2626
enable_chunked_prefill: False

tests/integration/defs/disaggregated/test_disaggregated.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# limitations under the License.
1515

1616
import contextlib
17+
import glob
1718
import os
1819
import re
1920
import subprocess
@@ -272,7 +273,7 @@ def get_test_config(test_desc, example_dir, test_root):
272273
"llama4_kv_cache_overflow":
273274
(8, f"{test_configs_root}/disagg_config_llama4_kv_cache_overflow.yaml"),
274275
"deepseek_v3_lite_bf16_tllm_gen_helix":
275-
(4,
276+
(6,
276277
f"{test_configs_root}/disagg_config_ctxtp2_gentp1cp2_deepseek_v3_lite_bf16_tllm_gen.yaml"
277278
),
278279
}
@@ -332,16 +333,15 @@ def run_client_tests(example_dir,
332333
use_ray=False):
333334
"""Run client tests against the disaggregated server."""
334335
client_dir = f"{example_dir}/clients"
335-
for _ in range(num_iters):
336+
# Use only 1 iteration for long prompts test
337+
effective_num_iters = 1 if prompt_file == "long_prompts.json" else num_iters
338+
for _ in range(effective_num_iters):
336339
client_cmd = [
337340
'python3', f'{client_dir}/disagg_client.py', '-c', f'{config_file}',
338341
'-p', f'{client_dir}/{prompt_file}', '--ignore-eos',
339342
'--server-start-timeout',
340343
str(server_start_timeout)
341344
]
342-
if prompt_file == "long_prompts.json":
343-
# Use max_tokens 4 for long prompts to reduce test time
344-
client_cmd.extend(['--max-tokens', '4'])
345345

346346
# Prepare poll processes
347347
worker_processes = []
@@ -354,11 +354,13 @@ def run_client_tests(example_dir,
354354
poll_procs = worker_processes + [server_proc]
355355
check_call(client_cmd, env=env, poll_procs=poll_procs)
356356

357-
# Streaming client run
358-
streaming_client_cmd = client_cmd + [
359-
'--streaming', '-o', 'output_streaming.json'
360-
]
361-
check_call(streaming_client_cmd, env=env, poll_procs=poll_procs)
357+
# Skip streaming for long prompts test
358+
if prompt_file != "long_prompts.json":
359+
# Streaming client run
360+
streaming_client_cmd = client_cmd + [
361+
'--streaming', '-o', 'output_streaming.json'
362+
]
363+
check_call(streaming_client_cmd, env=env, poll_procs=poll_procs)
362364

363365
# Run the chat completion endpoint test only for TinyLlama
364366
if test_desc == "overlap" or test_desc == "trtllm_sampler":
@@ -374,8 +376,15 @@ def run_client_tests(example_dir,
374376
env=env,
375377
poll_procs=poll_procs)
376378

377-
# Skip output verification for long prompts test
379+
# Print and skip further verification for long prompts test
378380
if prompt_file == "long_prompts.json":
381+
# Print all output_*.json file contents
382+
for output_file in glob.glob('output_*.json') + ['output.json']:
383+
if os.path.exists(output_file):
384+
with open(output_file, 'r') as f:
385+
content = f.read()
386+
logger.info(f"-------- {output_file} content --------")
387+
logger.info(content)
379388
continue
380389

381390
if extra_endpoints_test is not None:
@@ -1928,7 +1937,7 @@ def test_llama4_long_context_kv_cache_overflow(disaggregated_test_root,
19281937
cwd=llm_venv.get_working_directory())
19291938

19301939

1931-
@pytest.mark.skip_less_device(4)
1940+
@pytest.mark.skip_less_device(8)
19321941
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-bf16'],
19331942
indirect=True)
19341943
def test_disaggregated_deepseek_v3_lite_bf16_tllm_gen_helix(

0 commit comments

Comments
 (0)