[Distributed] Fix tiktokenizer decoding (#1257)

kwen2501 · web-flow · commit 32241ffdac06 · 2024-10-03T16:04:37.000-07:00
diff --git a/dist_run.py b/dist_run.py
@@ -442,7 +442,6 @@ def get_example_ins_outs(seqlen: int) -> Tuple[torch.Tensor, torch.Tensor]:
     # New token generated each iteration
     # need a row dimension for each prompt in the batch
     new_token = torch.zeros(batch_size, 1, device=device, dtype=torch.int64)
-    logger.info(f"{color.green}{new_token.shape=}, {new_token=}{color.reset}")
     # Store the generated tokens
     res = []
 
@@ -519,7 +518,6 @@ def get_example_ins_outs(seqlen: int) -> Tuple[torch.Tensor, torch.Tensor]:
 
             # Decode the output
             if pp_rank == last_pp_rank:
-                # logger.info(f"{color.red}Decoding...{output.shape=}{color.reset}")
                 new_token = _batch_decode_next_tokens(output, prompt_lengths, step)
                 res.append(new_token)
                 if not args.disable_in_flight_decode:
@@ -541,7 +539,13 @@ def get_example_ins_outs(seqlen: int) -> Tuple[torch.Tensor, torch.Tensor]:
         # token ids. Thus cat'ing along dim 1.
         res = torch.cat(res, dim=1)
         res_list = res.tolist()
-        responses = tokenizer.decode(res_list)
+        if isinstance(tokenizer, TiktokenTokenizer):
+            # For TiktokenTokenizer, we need to decode prompt by prompt.
+            # TODO: is there a better way to do this?
+            responses = [tokenizer.decode(sequence) for sequence in res_list]
+        else:  # SentencePieceProcessor
+            # For SentencePieceProcessor, we can decode the entire 2D list at once.
+            responses = tokenizer.decode(res_list)
         # Show prompts and responses
         for prompt_text, response_text in zip(prompt, responses):
             logger.info(f"Prompt: {color.green}{prompt_text} {color.reset}")
diff --git a/torchchat/distributed/safetensor_utils.py b/torchchat/distributed/safetensor_utils.py
@@ -88,13 +88,19 @@ def get_hf_weight_map_and_path(
         raise FileNotFoundError(
             f"Weight index file for {model_id} does not exist in HF cache."
         )
+    logger.info(
+        f"Loading weight map from: {index_file}"
+    )
     weight_map = read_weights_from_json(index_file)
     if weight_map is None:
         raise ValueError(f"Weight map not found in config file {index_file}")
     weight_map, new_to_old_keymap = remap_weight_keys(weight_map)
     weight_path = os.path.dirname(index_file)
     if not os.path.exists(weight_path):
         raise FileNotFoundError(f"Weight path {weight_path} does not exist")
+    logger.info(
+        f"Loading weights from: {weight_path}"
+    )
     return weight_map, weight_path, new_to_old_keymap