Merge branch 'main' into multiturn-mm-single-image

Jack-Khuu · web-flow · commit 91a68ab0090d · 2024-10-03T16:46:26.000-07:00
diff --git a/dist_run.py b/dist_run.py
@@ -442,7 +442,6 @@ def get_example_ins_outs(seqlen: int) -> Tuple[torch.Tensor, torch.Tensor]:
     # New token generated each iteration
     # need a row dimension for each prompt in the batch
     new_token = torch.zeros(batch_size, 1, device=device, dtype=torch.int64)
-    logger.info(f"{color.green}{new_token.shape=}, {new_token=}{color.reset}")
     # Store the generated tokens
     res = []
 
@@ -519,7 +518,6 @@ def get_example_ins_outs(seqlen: int) -> Tuple[torch.Tensor, torch.Tensor]:
 
             # Decode the output
             if pp_rank == last_pp_rank:
-                # logger.info(f"{color.red}Decoding...{output.shape=}{color.reset}")
                 new_token = _batch_decode_next_tokens(output, prompt_lengths, step)
                 res.append(new_token)
                 if not args.disable_in_flight_decode:
@@ -541,7 +539,13 @@ def get_example_ins_outs(seqlen: int) -> Tuple[torch.Tensor, torch.Tensor]:
         # token ids. Thus cat'ing along dim 1.
         res = torch.cat(res, dim=1)
         res_list = res.tolist()
-        responses = tokenizer.decode(res_list)
+        if isinstance(tokenizer, TiktokenTokenizer):
+            # For TiktokenTokenizer, we need to decode prompt by prompt.
+            # TODO: is there a better way to do this?
+            responses = [tokenizer.decode(sequence) for sequence in res_list]
+        else:  # SentencePieceProcessor
+            # For SentencePieceProcessor, we can decode the entire 2D list at once.
+            responses = tokenizer.decode(res_list)
         # Show prompts and responses
         for prompt_text, response_text in zip(prompt, responses):
             logger.info(f"Prompt: {color.green}{prompt_text} {color.reset}")
diff --git a/torchchat/distributed/safetensor_utils.py b/torchchat/distributed/safetensor_utils.py
@@ -88,13 +88,19 @@ def get_hf_weight_map_and_path(
         raise FileNotFoundError(
             f"Weight index file for {model_id} does not exist in HF cache."
         )
+    logger.info(
+        f"Loading weight map from: {index_file}"
+    )
     weight_map = read_weights_from_json(index_file)
     if weight_map is None:
         raise ValueError(f"Weight map not found in config file {index_file}")
     weight_map, new_to_old_keymap = remap_weight_keys(weight_map)
     weight_path = os.path.dirname(index_file)
     if not os.path.exists(weight_path):
         raise FileNotFoundError(f"Weight path {weight_path} does not exist")
+    logger.info(
+        f"Loading weights from: {weight_path}"
+    )
     return weight_map, weight_path, new_to_old_keymap
 
 
diff --git a/torchchat/generate.py b/torchchat/generate.py
@@ -502,7 +502,6 @@ def decode_n_tokens(
                 next_prob.clone() if next_prob is not None else None
             )
 
-
     def model_forward(self, model, x, input_pos):
         return model(x, input_pos)
 

Original file line number	Diff line number	Diff line change
`@@ -502,7 +502,6 @@ def decode_n_tokens(`
`502`	`502`	`next_prob.clone() if next_prob is not None else None`
`503`	`503`	`)`
`504`	`504`
`505`		`-`
`506`	`505`	`def model_forward(self, model, x, input_pos):`
`507`	`506`	`return model(x, input_pos)`
`508`	`507`