Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/models/llava/export_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def __init__(self, llava):
super().__init__()
self.text_model = llava.text_model

def forward(self, input_pos, embeddings):
def forward(self, embeddings, input_pos):
return self.text_model(None, {"input_pos": input_pos}, embeddings)

llava_text_model = LlavaTextModel(llava)
Expand All @@ -88,7 +88,7 @@ def forward(self, input_pos, embeddings):
max_seq_len=llava.text_model_args.max_seq_len,
dtype=DType.fp32,
use_kv_cache=True,
example_inputs=(torch.tensor([0], dtype=torch.int64), embeddings),
example_inputs=(embeddings, torch.tensor([0], dtype=torch.int64)),
dynamic_shapes=dynamic_shapes,
)

Expand Down
2 changes: 1 addition & 1 deletion examples/models/llava/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,5 +405,5 @@ def _get_image_dynamic_shapes(self):

def _get_prompt_dynamic_shapes(self):
dim = torch.export.Dim("token_dim", min=2, max=self.max_seq_len)
text_model_dynamic_shapes = ({0: 1}, {1: dim})
text_model_dynamic_shapes = ({1: dim}, {0: 1})
return text_model_dynamic_shapes
2 changes: 1 addition & 1 deletion examples/models/llava/runner/llava_image_prefiller.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class ET_EXPERIMENTAL LlavaImagePrefiller {

// Run text model
auto outputs_res = ET_UNWRAP(module_->execute(
kTextModelMethod, {start_pos_tensor, image_encoder_outputs[0]}));
kTextModelMethod, {image_encoder_outputs[0], start_pos_tensor}));
ET_CHECK_MSG(
outputs_res[0].isTensor(),
"Non Tensor Output returned from executing image prefill");
Expand Down
2 changes: 1 addition & 1 deletion examples/models/llava/runner/llava_text_decoder_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class ET_EXPERIMENTAL LlavaTextDecoderRunner
&start_pos, {1}, executorch::aten::ScalarType::Long);
// run text model
auto outputs_res = ET_UNWRAP(module_->execute(
kTextModelMethod, {start_pos_tensor, token_embedding_outputs[0]}));
kTextModelMethod, {token_embedding_outputs[0], start_pos_tensor}));

ET_CHECK_MSG(
outputs_res.size() == 1,
Expand Down
8 changes: 4 additions & 4 deletions examples/models/llava/test/test_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def test_llava_export(self):
)[0]
llava_module.run_method(
"text_decoder",
(torch.tensor([start_pos], dtype=torch.int64), pte_embeds_before_img),
(pte_embeds_before_img, torch.tensor([start_pos], dtype=torch.int64)),
)

# Update the start_pos. start_pos is used in kv cache. The source of truth
Expand All @@ -109,8 +109,8 @@ def test_llava_export(self):
llava_module.run_method(
"text_decoder",
(
torch.tensor([start_pos], dtype=torch.int64),
pte_embeds_img,
torch.tensor([start_pos], dtype=torch.int64),
),
)

Expand All @@ -123,7 +123,7 @@ def test_llava_export(self):
)[0]
pte_prefill_after_img = llava_module.run_method(
"text_decoder",
(torch.tensor([start_pos], dtype=torch.int64), pte_embeds_after_img),
(pte_embeds_after_img, torch.tensor([start_pos], dtype=torch.int64)),
)[0]

# Update the logits for each prefill (kv cache) step.
Expand All @@ -140,7 +140,7 @@ def test_llava_export(self):
)[0]
logits = llava_module.run_method(
"text_decoder",
(torch.tensor([start_pos + i], dtype=torch.int64), token_embeds),
(token_embeds, torch.tensor([start_pos + i], dtype=torch.int64)),
)[0]
new_tokens.append(torch.argmax(logits).item())

Expand Down
Loading