[None][chore] Fix yapf formatting in router and benchmark files

reasonsolo · reasonsolo · commit a47d932c4317 · 2026-03-22T19:53:06.000-07:00
Signed-off-by: Lizhi Zhou &lt;1432185+reasonsolo@users.noreply.github.com&gt;
diff --git a/examples/disaggregated/slurm/benchmark/submit.py b/examples/disaggregated/slurm/benchmark/submit.py
@@ -113,7 +113,8 @@ def assign_servers(
     return allocations
 
 
-def convert_allocations_to_server_config(allocations, server_port=8333,
+def convert_allocations_to_server_config(allocations,
+                                         server_port=8333,
                                          router_config=None):
     generation_servers = {}
     context_servers = {}
diff --git a/tensorrt_llm/serve/router.py b/tensorrt_llm/serve/router.py
@@ -633,8 +633,7 @@ def __init__(self,
             tokens_per_block = int(env_tokens_per_block)
         self._tokens_per_block = tokens_per_block
         logger.info(
-            f"KvCacheAwareRouter: tokens_per_block={self._tokens_per_block}"
-        )
+            f"KvCacheAwareRouter: tokens_per_block={self._tokens_per_block}")
 
     def _get_tokenizer(self, model: str):
         if model not in self._tokenizers:
diff --git a/tests/unittest/disaggregated/test_router.py b/tests/unittest/disaggregated/test_router.py
@@ -364,8 +364,7 @@ def build_tokens(hash_ids: list[int]) -> list[int]:
     def make_request(token_ids: list[int]):
         """Create a CompletionRequest or ChatCompletionRequest with pre-tokenized IDs."""
         if api_type == "completion":
-            return CompletionRequest(model="TinyLlama",
-                                     prompt=[token_ids])
+            return CompletionRequest(model="TinyLlama", prompt=[token_ids])
         else:
             # Use prompt_token_ids to skip tokenizer (no real model needed)
             return ChatCompletionRequest(
@@ -379,7 +378,7 @@ def make_request(token_ids: list[int]):
 
     # -- dataset-inspired hash_ids per turn (new blocks only) -------------
     # Session A (the conversation under test)
-    sess_a_turn0_hids = list(range(10))        # 10 blocks
+    sess_a_turn0_hids = list(range(10))  # 10 blocks
     sess_a_turn1_hids = list(range(100, 103))  # 3 new blocks
     sess_a_turn2_hids = list(range(200, 202))  # 2 new blocks
 
@@ -391,16 +390,14 @@ def make_request(token_ids: list[int]):
     sess_a_turn0_tokens = build_tokens(sess_a_turn0_hids)
 
     # Turn 1 accumulated: turn 0 tokens + simulated assistant reply + new user tokens
-    sess_a_turn1_tokens = build_tokens(
-        sess_a_turn0_hids + [9990, 9991] + sess_a_turn1_hids
-    )
+    sess_a_turn1_tokens = build_tokens(sess_a_turn0_hids + [9990, 9991] +
+                                       sess_a_turn1_hids)
     # (hash_ids 9990/9991 stand in for the assistant-reply blocks)
 
     # Turn 2 accumulated: extends turn 1 further
-    sess_a_turn2_tokens = build_tokens(
-        sess_a_turn0_hids + [9990, 9991] + sess_a_turn1_hids + [9992, 9993]
-        + sess_a_turn2_hids
-    )
+    sess_a_turn2_tokens = build_tokens(sess_a_turn0_hids + [9990, 9991] +
+                                       sess_a_turn1_hids + [9992, 9993] +
+                                       sess_a_turn2_hids)
 
     sess_b_tokens = build_tokens(sess_b_turn0_hids)
 
@@ -426,7 +423,8 @@ def make_request(token_ids: list[int]):
     # Verify block hashes are disjoint between sessions
     blocks_a = set(info_a0["block_hashes"][0])
     blocks_b = set(info_b0["block_hashes"][0])
-    assert blocks_a.isdisjoint(blocks_b), "Different sessions must not share block hashes"
+    assert blocks_a.isdisjoint(
+        blocks_b), "Different sessions must not share block hashes"
 
     # -- Round 2: turn 1 of session A (prefix extends turn 0) ------------
     req_a1 = make_request(sess_a_turn1_tokens)
@@ -436,16 +434,14 @@ def make_request(token_ids: list[int]):
     assert server_a1 == server_a, (
         f"Turn 1 must route to the same server as turn 0 ({server_a}) "
         f"due to KV cache prefix hit, but got {server_a1}. "
-        f"Matches: {info_a1['matches']}"
-    )
+        f"Matches: {info_a1['matches']}")
 
     # The match count on server_a must equal the prefix overlap
     server_a_idx = list(router._server_state.keys()).index(server_a)
     expected_prefix_match = len(sess_a_turn0_hids) * tokens_per_block
     assert info_a1["matches"][server_a_idx] == expected_prefix_match, (
         f"Expected {expected_prefix_match} matched tokens on server_a, "
-        f"got {info_a1['matches'][server_a_idx]}"
-    )
+        f"got {info_a1['matches'][server_a_idx]}")
 
     # Update server_a cache with new blocks from turn 1
     router._server_state[server_a].add_blocks(info_a1["block_hashes"][0])
@@ -458,17 +454,16 @@ def make_request(token_ids: list[int]):
     assert server_a2 == server_a, (
         f"Turn 2 must route to the same server as turns 0-1 ({server_a}) "
         f"due to KV cache prefix hit, but got {server_a2}. "
-        f"Matches: {info_a2['matches']}"
-    )
+        f"Matches: {info_a2['matches']}")
 
     # Turn 2 should match all of turn 0 + turn 1 prefix blocks
     expected_full_match = (
-        len(sess_a_turn0_hids) + 2 + len(sess_a_turn1_hids)  # turn0 + reply + turn1
+        len(sess_a_turn0_hids) + 2 +
+        len(sess_a_turn1_hids)  # turn0 + reply + turn1
     ) * tokens_per_block
     assert info_a2["matches"][server_a_idx] == expected_full_match, (
         f"Expected {expected_full_match} matched tokens on turn 2, "
-        f"got {info_a2['matches'][server_a_idx]}"
-    )
+        f"got {info_a2['matches'][server_a_idx]}")
 
     # -- Verify session B still routes to its own server ------------------
     req_b1 = make_request(sess_b_tokens)
@@ -477,8 +472,7 @@ def make_request(token_ids: list[int]):
 
     assert server_b1 == server_b, (
         f"Session B should route to its original server ({server_b}), "
-        f"but got {server_b1}"
-    )
+        f"but got {server_b1}")
 
 
 def test_create_router(servers):