Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 36 additions & 21 deletions torchchat/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,17 @@
import torch._dynamo.config
import torch._inductor.config

from torchtune.models.llama3_2_vision._model_builders import llama3_2_vision_transform

from PIL import Image

# torchtune model definition dependencies
from torchtune.data import Message, padded_collate_tiled_images_and_mask

from torchtune.generation import sample as tune_sample
from torchtune.models.llama3 import llama3_tokenizer

from torchtune.models.llama3_2_vision._model_builders import llama3_2_vision_transform
from torchtune.training import set_default_dtype

from torchchat.cli.builder import (
_initialize_model,
_initialize_tokenizer,
Expand All @@ -34,13 +41,6 @@
from torchchat.utils.build_utils import device_sync, set_precision
from torchchat.utils.device_info import get_device_info

# torchtune model definition dependencies
from torchtune.data import Message, padded_collate_tiled_images_and_mask

from torchtune.generation import sample as tune_sample
from torchtune.models.llama3 import llama3_tokenizer
from torchtune.training import set_default_dtype


class _ChatFormatter(ABC):
def __init__(self, tokenizer):
Expand Down Expand Up @@ -357,8 +357,8 @@ def prefill(

# TODO: Verify sequential prefill works with multimodal models
is_multimodal = True
if 'encoder_input' in batch:
encoder_input = batch['encoder_input']
if "encoder_input" in batch:
encoder_input = batch["encoder_input"]
encoder_mask = batch["encoder_mask"]
is_multimodal = True
else:
Expand All @@ -369,7 +369,13 @@ def prefill(
seq_len = x.size(1)
mask = batch["causal_mask"][None, :seq_len]
input_pos = input_pos.view(1, -1)
logits = model(tokens=x, mask=mask, encoder_input=encoder_input, input_pos=input_pos, encoder_mask=encoder_mask)[:, -1]
logits = model(
tokens=x,
mask=mask,
encoder_input=encoder_input,
input_pos=input_pos,
encoder_mask=encoder_mask,
)[:, -1]

if is_multimodal:
batch["encoder_mask"] = batch["encoder_mask"][:, -1:]
Expand Down Expand Up @@ -404,7 +410,9 @@ def decode_one_token(
assert batch is not None, "Flamingo requires batch"
mask = batch["causal_mask"][None, input_pos.item(), None, :]
encoder_mask = batch["encoder_mask"] if "encoder_mask" in batch else None
logits = model(x, encoder_mask=encoder_mask, mask=mask, input_pos=input_pos)[:, -1:]
logits = model(
x, encoder_mask=encoder_mask, mask=mask, input_pos=input_pos
)[:, -1:]
else:
logits = model(x, input_pos)
# print(f"x: {x},\n input_pos: {input_pos}\n")
Expand Down Expand Up @@ -492,7 +500,6 @@ def decode_n_tokens(
next_prob.clone() if next_prob is not None else None
)


def model_forward(self, model, x, input_pos):
return model(x, input_pos)

Expand Down Expand Up @@ -605,7 +612,12 @@ def generate(
or self.model.config.model_type == ModelType.Flamingo
):
# 6404 is one-gpu affordable max_seq_length for single image input
model.setup_caches(batch_size=1, dtype=self.dtype, encoder_max_seq_len=6404, decoder_max_seq_len=T_new)
model.setup_caches(
batch_size=1,
dtype=self.dtype,
encoder_max_seq_len=6404,
decoder_max_seq_len=T_new,
)
else:
model.setup_caches(max_batch_size=1, max_seq_length=max_seq_length)
if is_speculative and draft_model is not model:
Expand Down Expand Up @@ -731,9 +743,9 @@ def _gen_model_input(
max_new_tokens: Optional[int] = None,
) -> Tuple[torch.Tensor, Optional[Dict[str, Any]]]:
"""
Convert prompt and image prompts into consumable model input args.
Convert prompt and image prompts into consumable model input args.

When prompt is a list, the anticipated format is OpenAI API Inspired:
When prompt is a list, the anticipated format is OpenAI API Inspired:
[ ..., {"role": message["role"], "content": message["content"]}, ...]

Args:
Expand Down Expand Up @@ -826,15 +838,18 @@ def _gen_model_input(
logging.debug(encoded)
return encoded, batch


def chat(
self,
generator_args: GeneratorArgs,
):
if generator_args.chat_mode:
print("Starting Interactive Chat")

encoded, batch = self._gen_model_input(generator_args.prompt, generator_args.image_prompts, generator_args.max_new_tokens)

encoded, batch = self._gen_model_input(
generator_args.prompt,
generator_args.image_prompts,
generator_args.max_new_tokens,
)

model_size = sum(
[
Expand Down Expand Up @@ -900,7 +915,7 @@ def chat(
if text_transformer_args is not None
else 2048
),
max_seq_length
max_seq_length,
)

max_seq_length = (
Expand Down
12 changes: 7 additions & 5 deletions torchchat/usages/openai_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@

from PIL import Image

from torchtune.data import Message, padded_collate_tiled_images_and_mask

from torchtune.models.llama3_2_vision._model_builders import llama3_2_vision_transform

from torchchat.cli.download import is_model_downloaded, load_model_configs
from torchchat.generate import Generator, GeneratorArgs
from torchchat.model import FlamingoModel

from torchchat.utils.build_utils import device_sync

from torchtune.data import Message, padded_collate_tiled_images_and_mask

from torchtune.models.llama3_2_vision._model_builders import llama3_2_vision_transform


"""Dataclasses defined around the objects used the OpenAI API Chat specification.

Expand Down Expand Up @@ -291,7 +291,9 @@ def __init__(self, *args, **kwargs):
)
except:
self.max_seq_length = 2048
print(f"can not find max_seq_length in model config, use default value: {self.max_seq_length}")
print(
f"can not find max_seq_length in model config, use default value: {self.max_seq_length}"
)
# The System fingerprint is a unique identifier for the model and its configuration.
self.system_fingerprint = (
f"{self.builder_args.device}_{self.builder_args.precision}"
Expand Down
Loading