Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions examples/models/llava/export_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,11 +226,11 @@ def export_all(llava_model: LlavaModel):
{
"image_encoder": image_encoder_ep,
"token_embedding": token_embedding_ep,
"text_model": text_model_ep,
"text_decoder": text_model_ep,
},
partitioner={
"image_encoder": [XnnpackPartitioner()],
"text_model": [
"text_decoder": [
# First partition the DQLinear nodes, then partition the rest of the nodes,
# to avoid multiple DQLinear nodes in the same partition,
# to avoid holding multiple unpacked and packed weight buffers in memory,
Expand All @@ -254,7 +254,7 @@ def export_all(llava_model: LlavaModel):
memory_planning_pass=MemoryPlanningPass(alloc_graph_input=False),
sym_shape_eval_pass={
"image_encoder": ConstraintBasedSymShapeEvalPass(),
"text_model": ConstraintBasedSymShapeEvalPass(),
"text_decoder": ConstraintBasedSymShapeEvalPass(),
"token_embedding": HintBasedSymShapeEvalPass(),
},
)
Expand Down
2 changes: 1 addition & 1 deletion examples/models/llava/runner/llava_text_decoder_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class ET_EXPERIMENTAL LlavaTextDecoderRunner
}

inline static const std::string kTokenEmbeddingMethod = "token_embedding";
inline static const std::string kTextModelMethod = "text_model";
inline static const std::string kTextModelMethod = "text_decoder";
};

} // namespace example
8 changes: 4 additions & 4 deletions examples/models/llava/test/test_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def test_llava_export(self):
"token_embedding", (prompt_before_image,)
)[0]
llava_module.run_method(
"text_model",
"text_decoder",
(torch.tensor([start_pos], dtype=torch.int64), pte_embeds_before_img),
)

Expand All @@ -107,7 +107,7 @@ def test_llava_export(self):
# pte prefill image
pte_embeds_img = llava_module.run_method("image_encoder", (resized,))[0]
llava_module.run_method(
"text_model",
"text_decoder",
(
torch.tensor([start_pos], dtype=torch.int64),
pte_embeds_img,
Expand All @@ -122,7 +122,7 @@ def test_llava_export(self):
"token_embedding", (prompt_after_image,)
)[0]
pte_prefill_after_img = llava_module.run_method(
"text_model",
"text_decoder",
(torch.tensor([start_pos], dtype=torch.int64), pte_embeds_after_img),
)[0]

Expand All @@ -139,7 +139,7 @@ def test_llava_export(self):
"token_embedding", (torch.tensor([[new_tokens[i]]], dtype=torch.int64),)
)[0]
logits = llava_module.run_method(
"text_model",
"text_decoder",
(torch.tensor([start_pos + i], dtype=torch.int64), token_embeds),
)[0]
new_tokens.append(torch.argmax(logits).item())
Expand Down
8 changes: 4 additions & 4 deletions examples/models/llava/test/test_pte.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def main():
"token_embedding", (prompt_before_image,)
)[0]
pte_prefill_before_img = llava_module.run_method(
"text_model",
"text_decoder",
(torch.tensor([start_pos], dtype=torch.int64), pte_embeds_before_img),
)[0]
print(pte_prefill_before_img)
Expand All @@ -60,7 +60,7 @@ def main():
logging.warning("Image encoder finished")
logging.warning("Image token prefill started")
pte_prefill_img = llava_module.run_method(
"text_model",
"text_decoder",
(
torch.tensor([start_pos], dtype=torch.int64),
pte_embeds_img,
Expand All @@ -77,7 +77,7 @@ def main():
"token_embedding", (prompt_after_image,)
)[0]
pte_prefill_after_img = llava_module.run_method(
"text_model",
"text_decoder",
(torch.tensor([start_pos], dtype=torch.int64), pte_embeds_after_img),
)[0]
logging.warning("Text token prefill finished")
Expand All @@ -91,7 +91,7 @@ def main():
"token_embedding", (torch.tensor([[new_tokens[i]]], dtype=torch.int64),)
)[0]
logits = llava_module.run_method(
"text_model",
"text_decoder",
(torch.tensor([start_pos + i], dtype=torch.int64), token_embeds),
)[0]
new_tokens.append(torch.argmax(logits[..., -1, :]).item())
Expand Down
98 changes: 98 additions & 0 deletions examples/models/voxtral/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

#
# Simple CMake build system for voxtral runner.
#
cmake_minimum_required(VERSION 3.24)
project(voxtral)

set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)

include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)

if(CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$")
set(CMAKE_TOOLCHAIN_IOS ON)
else()
set(CMAKE_TOOLCHAIN_IOS OFF)
endif()

# Let files say "include <executorch/path/to/header.h>"
set(_common_include_directories ${EXECUTORCH_ROOT}/..)

# Need this for gflags for some reason
set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
find_package(gflags REQUIRED)

# Find `executorch` libraries, same as for gflags
list(APPEND CMAKE_FIND_ROOT_PATH ${CMAKE_CURRENT_BINARY_DIR}/../../..)
find_package(executorch CONFIG REQUIRED FIND_ROOT_PATH_BOTH)
executorch_target_link_options_shared_lib(executorch)

set(link_libraries executorch gflags)
set(_srcs multimodal.cpp)

list(
APPEND
link_libraries
optimized_native_cpu_ops_lib
quantized_ops_lib
custom_ops
cpublas
eigen_blas
)
executorch_target_link_options_shared_lib(optimized_native_cpu_ops_lib)
executorch_target_link_options_shared_lib(quantized_ops_lib)
executorch_target_link_options_shared_lib(custom_ops)

# XNNPACK
if(TARGET xnnpack_backend)
set(xnnpack_backend_libs xnnpack_backend XNNPACK xnnpack-microkernels-prod)
if(TARGET kleidiai)
list(APPEND xnnpack_backend_libs kleidiai)
endif()
list(APPEND link_libraries ${xnnpack_backend_libs})
executorch_target_link_options_shared_lib(xnnpack_backend)
endif()

# Add LLM runner and extension module
if(NOT TARGET extension_llm_runner)
message(
FATAL_ERROR
"ExecuTorch must be installed with EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER enabled."
)
endif()

# Needed for cpuinfo where it uses android specific log lib
if(ANDROID)
list(APPEND link_libraries log)
endif()

# Add the required ExecuTorch extensions for multimodal LLM runner
list(
APPEND
link_libraries
extension_llm_runner
extension_module
extension_data_loader
extension_tensor
extension_flat_tensor
)

# Add tokenizers
list(APPEND link_libraries tokenizers::tokenizers)

add_executable(voxtral_runner ${_srcs})
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
target_link_options_gc_sections(voxtral_runner)
if(NOT APPLE)
target_link_options(voxtral_runner PRIVATE "LINKER:-s")
endif()
endif()

target_include_directories(voxtral_runner PUBLIC ${_common_include_directories})
target_link_libraries(voxtral_runner PUBLIC ${link_libraries})
target_compile_options(voxtral_runner PUBLIC ${_common_compile_options})
Loading
Loading