Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 842be23

Browse files
authored
Merge branch 'main' into multimodal-eval-2
2 parents 51b0e83 + 4d8bab5 commit 842be23

File tree

4 files changed

+53
-17
lines changed

4 files changed

+53
-17
lines changed

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -582,11 +582,13 @@ We provide
582582
583583
## Community Contributions
584584
585-
We really value our community and the contributions made by our wonderful users. We'll use this section to call out some of these contributions! If you'd like to help out as well, please see the [CONTRIBUTING](CONTRIBUTING.md) guide.
585+
We really value our community and the contributions made by our wonderful users!
586586
587-
To connect with us and other community members, we invite you to join our Slack community by filling out this [form](https://docs.google.com/forms/d/e/1FAIpQLSeADnUNW36fjKjYzyHDOzEB_abKQE9b6gqqW9NXse6O0MWh0A/viewform). Once you've joined, you can:
587+
If you'd like to help out, connect with us and other community members by joining our [Discord](https://discord.gg/hm2Keduk3v). Once you've joined, you can:
588588
* Head to the `#torchchat-general` channel for general questions, discussion, and community support.
589-
* Join the `#torchchat-contributors` channel if you're interested in contributing directly to project development.
589+
* Hop in the `#torchchat-contributors` channel if you're interested in contributing directly to project development.
590+
591+
Also give our [CONTRIBUTING](CONTRIBUTING.md) guide a read.
590592

591593
Looking forward to discussing with you about torchchat future!
592594

runner/run.cpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ using executorch::extension::TensorPtr;
5353
using torch::executor::EValue;
5454
using torch::executor::Module;
5555
using torch::executor::Result;
56+
using executorch::runtime::MemoryManager;
57+
using executorch::runtime::MemoryAllocator;
58+
using executorch::runtime::Error;
5659
#endif
5760

5861
using tokenizers::SPTokenizer;
@@ -867,7 +870,26 @@ int main(int argc, char *argv[]) {
867870
: torch::Device(torch::kCUDA);
868871
ModelType model_type = get_model_type(std::stoi(aoti_metadata["tokenizer_type"]));
869872
#else // __ET_MODEL__
870-
ModelType model_type = get_model_type(llama_ver);
873+
Error load_status = transformer.runner->load();
874+
ET_CHECK_MSG(
875+
load_status == torch::executor::Error::Ok,
876+
"program::load() failed with status 0x%" PRIx32,
877+
static_cast<uint32_t>(load_status));
878+
879+
static std::array<uint8_t, 4 * 1024U * 1024U> method_allocator_pool; // 4MB
880+
MemoryAllocator method_allocator{MemoryAllocator(
881+
sizeof(method_allocator_pool), method_allocator_pool.data())};
882+
MemoryManager memory_manager(&method_allocator, nullptr);
883+
auto tokenizer_method = transformer.runner->program()->load_method("tokenizer_type", &memory_manager);
884+
885+
Error execute_status = tokenizer_method->execute();
886+
ET_CHECK_MSG(
887+
execute_status == torch::executor::Error::Ok,
888+
"method::execute() failed with status 0x%" PRIx32,
889+
static_cast<uint32_t>(execute_status));
890+
891+
auto tokenizer_type = tokenizer_method->get_output(0).toInt();
892+
ModelType model_type = get_model_type(tokenizer_type);
871893
#endif
872894

873895
if (model_type == UNKNOWN_MODEL) {

torchchat/export.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ def export_to_edge(
313313
core_aten_ep, edge_constant_methods, edge_compile_config, verbose=verbose
314314
)
315315

316-
def export_for_et(model, device, output_path) -> str:
316+
def export_for_et(model, device, output_path, edge_constant_methods) -> str:
317317

318318
input = (
319319
torch.tensor([[1]], dtype=torch.long, device=device),
@@ -344,12 +344,15 @@ def export_for_et(model, device, output_path) -> str:
344344
with torch.nn.attention.sdpa_kernel(
345345
[torch.nn.attention.SDPBackend.MATH]
346346
), torch.no_grad():
347-
m = export_for_training(model, input, dynamic_shapes=dynamic_shapes).module()
347+
m = export_for_training(
348+
model, input, dynamic_shapes=dynamic_shapes
349+
).module()
348350

349351
edge_manager = export_to_edge(
350352
m,
351353
input,
352354
dynamic_shapes=dynamic_shapes,
355+
edge_constant_methods=edge_constant_methods,
353356
edge_compile_config=edge_config,
354357
)
355358
edge_manager = edge_manager.to_backend(XnnpackDynamicallyQuantizedPartitioner())
@@ -365,6 +368,7 @@ def export_for_et(model, device, output_path) -> str:
365368
)
366369

367370
print("The methods are: ", export_program.methods)
371+
print("The config methods are: ", export_program.config_methods)
368372
with open(output_path, "wb") as f:
369373
export_program.write_to_file(f)
370374

@@ -407,7 +411,9 @@ def main(args):
407411
f"Warning! ExecuTorch export target is controlled by export recipe, not device setting. Ignoring device={builder_args.device} setting."
408412
)
409413
builder_args.device = "cpu"
410-
elif (output_pte_path or output_dso_path or output_aoti_package_path) and "mps" in builder_args.device:
414+
elif (
415+
output_pte_path or output_dso_path or output_aoti_package_path
416+
) and "mps" in builder_args.device:
411417
print("Warning! Device MPS not supported for export. Exporting for device CPU.")
412418
builder_args.device = "cpu"
413419

@@ -473,13 +479,26 @@ def main(args):
473479
support_tensor_subclass=False,
474480
)
475481
_unset_gguf_kwargs(builder_args)
476-
482+
483+
if tokenizer_args is None:
484+
tokenizer_type = "0"
485+
elif tokenizer_args.is_sentencepiece:
486+
tokenizer_type = "2" # Corresponding to llama2
487+
else:
488+
tokenizer_type = "3" # Corresponding to llama3
489+
477490
with torch.no_grad():
478491
if output_pte_path:
479492
output_pte_path = str(os.path.abspath(output_pte_path))
480493
if executorch_export_available:
481494
print(f"Exporting model using ExecuTorch to {output_pte_path}")
482-
export_for_et(model_to_pte, builder_args.device, args.output_pte_path)
495+
print(f"Tokenizer type is {tokenizer_type}")
496+
export_for_et(
497+
model_to_pte,
498+
builder_args.device,
499+
args.output_pte_path,
500+
{"tokenizer_type": int(tokenizer_type)},
501+
)
483502
else:
484503
print(
485504
"Export with executorch requested but ExecuTorch could not be loaded"
@@ -503,13 +522,6 @@ def main(args):
503522
if output_aoti_package_path:
504523
output_aoti_package_path = str(os.path.abspath(output_aoti_package_path))
505524

506-
if tokenizer_args is None:
507-
tokenizer_type = "0"
508-
elif tokenizer_args.is_sentencepiece:
509-
tokenizer_type = "2" # Corresponding to llama2
510-
else:
511-
tokenizer_type = "3" # Corresponding to llama3
512-
513525
metadata = {"tokenizer_type": tokenizer_type}
514526
print(
515527
"Exporting model using AOT Inductor to " f"{output_aoti_package_path}."

torchchat/utils/scripts/build_native.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,4 +100,4 @@ else
100100
fi
101101
cmake --build ./cmake-out --target "${TARGET}"_run
102102

103-
printf "Build finished. Please run: \n./cmake-out/${TARGET}_run model.<pte|so> -z tokenizer.model -l <llama version (2 or 3)> -i <prompt>\n"
103+
printf "Build finished. Please run: \n./cmake-out/${TARGET}_run model.<pte|so> -z tokenizer.model > -i <prompt>\n"

0 commit comments

Comments
 (0)