This repository was archived by the owner on Sep 10, 2025. It is now read-only.
File tree Expand file tree Collapse file tree 1 file changed +19
-3
lines changed Expand file tree Collapse file tree 1 file changed +19
-3
lines changed Original file line number Diff line number Diff line change 2121
2222from torchchat .cli .download import is_model_downloaded , load_model_configs
2323from torchchat .generate import Generator , GeneratorArgs
24+ from torchchat .model import FlamingoModel
2425
2526from torchchat .utils .build_utils import device_sync
2627
@@ -363,9 +364,24 @@ def chunked_completion(self, completion_request: CompletionRequest):
363364
364365 device_sync (device = self .builder_args .device )
365366
366- encoded , batch = self ._gen_model_inputs_from_openai_completion_request (
367- completion_request
368- )
367+ # If the underlying model is LLama3.2 11B, used unified processing
368+ if isinstance (self .model , FlamingoModel ):
369+ encoded , batch = self ._gen_model_inputs_from_openai_completion_request (
370+ completion_request
371+ )
372+ else :
373+ # Else use the legacy formatting logic
374+ tokens = self .chat_formatter .encode_dialog_prompt (
375+ dialog = [
376+ {"role" : message ["role" ], "content" : message ["content" ]}
377+ for message in completion_request .messages
378+ ]
379+ )
380+ print ("tokens:" , self .tokenizer .decode (tokens ), flush = True )
381+ encoded = torch .tensor (
382+ tokens , dtype = torch .int , device = self .builder_args .device
383+ )
384+ batch = None
369385
370386 idx = 0
371387 start_pos = 0
You can’t perform that action at this time.
0 commit comments