Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 8684fb6

Browse files
authored
Merge branch 'pytorch:main' into patch-10
2 parents b3952ab + fff956c commit 8684fb6

33 files changed

+400
-1656
lines changed

.ci/scripts/run-docs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,23 @@ if [ "$1" == "evaluation" ]; then
9191
echo "*******************************************"
9292
bash -x ./run-evaluation.sh
9393
fi
94+
95+
if [ "$1" == "multimodal" ]; then
96+
97+
# Expecting that this might fail this test as-is, because
98+
# it's the first on-pr test depending on githib secrets for access with HF token access
99+
100+
echo "::group::Create script to run multimodal"
101+
python3 torchchat/utils/scripts/updown.py --file docs/multimodal.md > ./run-multimodal.sh
102+
# for good measure, if something happened to updown processor,
103+
# and it did not error out, fail with an exit 1
104+
echo "exit 1" >> ./run-multimodal.sh
105+
echo "::endgroup::"
106+
107+
echo "::group::Run multimodal"
108+
echo "*******************************************"
109+
cat ./run-multimodal.sh
110+
echo "*******************************************"
111+
bash -x ./run-multimodal.sh
112+
echo "::endgroup::"
113+
fi

.github/workflows/pull.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,7 @@ jobs:
731731
732732
git clone https://github.com/ggerganov/llama.cpp.git
733733
pushd llama.cpp
734+
git checkout 64ed2091b24b2f9747148fdf49a34ed5938762c3
734735
make
735736
popd
736737
@@ -941,7 +942,7 @@ jobs:
941942
path: |
942943
./et-build
943944
./torchchat/utils/scripts
944-
key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}-${{ hashFiles('**/install_et.sh') }}
945+
key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}-${{ hashFiles('**/install_et.sh', '**/build_native.sh') }}
945946
- if: ${{ steps.install-et.outputs.cache-hit != 'true' }}
946947
continue-on-error: true
947948
run: |
@@ -1052,7 +1053,7 @@ jobs:
10521053
10531054
# Pull submodules (re2, abseil) for Tiktoken
10541055
git submodule sync
1055-
git submodule update --init
1056+
git submodule update --init --recursive
10561057
./runner/build_android.sh
10571058
echo "Tests complete."
10581059

.github/workflows/run-readme-pr.yml

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,4 +243,47 @@ jobs:
243243
echo "::group::Completion"
244244
echo "tests complete"
245245
echo "*******************************************"
246-
echo "::endgroup::"
246+
echo "::endgroup::"
247+
248+
test-multimodal-any:
249+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
250+
with:
251+
runner: linux.g5.4xlarge.nvidia.gpu
252+
gpu-arch-type: cuda
253+
gpu-arch-version: "12.1"
254+
timeout: 60
255+
script: |
256+
echo "::group::Print machine info"
257+
uname -a
258+
echo "::endgroup::"
259+
260+
echo "::group::Install newer objcopy that supports --set-section-alignment"
261+
yum install -y devtoolset-10-binutils
262+
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
263+
echo "::endgroup::"
264+
265+
.ci/scripts/run-docs multimodal
266+
267+
echo "::group::Completion"
268+
echo "tests complete"
269+
echo "*******************************************"
270+
echo "::endgroup::"
271+
272+
test-multimodal-cpu:
273+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
274+
with:
275+
runner: linux.g5.4xlarge.nvidia.gpu
276+
gpu-arch-type: cuda
277+
gpu-arch-version: "12.1"
278+
timeout: 60
279+
script: |
280+
echo "::group::Print machine info"
281+
uname -a
282+
echo "::endgroup::"
283+
284+
echo "::group::Install newer objcopy that supports --set-section-alignment"
285+
yum install -y devtoolset-10-binutils
286+
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
287+
echo "::endgroup::"
288+
289+
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs multimodal

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ runner-et/cmake-out/*
1919
runner-aoti/cmake-out/*
2020
cmake-out/
2121

22+
# Example project Android Studio ignore
23+
torchchat/edge/android/torchchat/.idea/*
24+
25+
2226
# pte files
2327
*.pte
2428

.gitmodules

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,3 @@
1-
[submodule "tokenizer/third-party/abseil-cpp"]
2-
path = tokenizer/third-party/abseil-cpp
3-
url = https://github.com/abseil/abseil-cpp.git
4-
[submodule "tokenizer/third-party/re2"]
5-
path = tokenizer/third-party/re2
6-
url = https://github.com/google/re2.git
7-
[submodule "tokenizer/third-party/sentencepiece"]
8-
path = tokenizer/third-party/sentencepiece
9-
url = https://github.com/google/sentencepiece.git
1+
[submodule "runner/third-party/tokenizers"]
2+
path = runner/third-party/tokenizers
3+
url = https://github.com/pytorch-labs/tokenizers

CMakeLists.txt

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,21 @@ ELSE()
77
ENDIF()
88

99
project(Torchchat)
10+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes")
1011

1112
# include tokenizer
12-
add_subdirectory(tokenizer)
13+
add_subdirectory(runner/third-party/tokenizers)
1314

1415
# include et_run executable
1516
include(runner/et.cmake)
1617
if(TARGET et_run)
17-
target_link_libraries(et_run PUBLIC tokenizer microkernels-prod)
18+
target_link_libraries(et_run PUBLIC tokenizers microkernels-prod)
19+
target_include_directories(et_run PUBLIC runner/third-party/tokenizers/include)
1820
endif()
1921

2022
# include aoti_run executable
2123
include(runner/aoti.cmake)
2224
if(TARGET aoti_run)
23-
target_link_libraries(aoti_run tokenizer)
25+
target_link_libraries(aoti_run tokenizers)
26+
target_include_directories(aoti_run PUBLIC runner/third-party/tokenizers/include)
2427
endif()

docs/ADVANCED-USERS.md

Lines changed: 18 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ Torchchat is currently in a pre-release state and under extensive development.
1818
[shell default]: TORCHCHAT_ROOT=${PWD} ./torchchat/utils/scripts/install_et.sh
1919

2020

21-
This is the advanced users guide, if you're looking to get started
21+
This is the advanced users' guide, if you're looking to get started
2222
with LLMs, please refer to the README at the root directory of the
2323
torchchat distro. This is an advanced user guide, so we will have
24-
many more concepts and options to discuss and taking advantage of them
24+
many more concepts and options to discuss and take advantage of them
2525
may take some effort.
2626

2727
We welcome community contributions of all kinds. If you find
@@ -41,7 +41,7 @@ While we strive to support a broad range of models, we can't test them
4141
all. We classify supported models as tested ✅, work in progress 🚧 or
4242
some restrictions ❹.
4343

44-
We invite community contributions of new model suport and test results!
44+
We invite community contributions of new model support and test results!
4545

4646
| Model | Tested | Eager | torch.compile | AOT Inductor | ExecuTorch | Fits on Mobile |
4747
|-----|--------|-------|-----|-----|-----|-----|
@@ -86,7 +86,7 @@ Server C++ runtime | n/a | run.cpp model.pte | ✅ |
8686
Mobile C++ runtime | n/a | app model.pte | ✅ |
8787
Mobile C++ runtime | n/a | app + AOTI | 🚧 |
8888

89-
**Getting help:** Each command implements the --help option to give addititonal information about available options:
89+
**Getting help:** Each command implements the --help option to give additional information about available options:
9090

9191
[skip default]: begin
9292
```
@@ -96,8 +96,8 @@ python3 torchchat.py [ export | generate | chat | eval | ... ] --help
9696

9797
Exported models can be loaded back into torchchat for chat or text
9898
generation, letting you experiment with the exported model and valid
99-
model quality. The python interface is the same in all cases and is
100-
used for testing nad test harnesses too.
99+
model quality. The Python interface is the same in all cases and is
100+
used for testing and test harnesses, too.
101101

102102
Torchchat comes with server C++ runtimes to execute AOT Inductor and
103103
ExecuTorch models. A mobile C++ runtimes allow you to deploy
@@ -115,7 +115,7 @@ Some common models are recognized by torchchat based on their filename
115115
through `Model.from_name()` to perform a fuzzy match against a
116116
table of known model architectures. Alternatively, you can specify the
117117
index into that table with the option `--params-table ${INDEX}` where
118-
the index is the lookup key key in the [the list of known
118+
the index is the lookup key in the [the list of known
119119
pconfigurations](https://github.com/pytorch/torchchat/tree/main/torchchat/model_params)
120120
For example, for the stories15M model, this would be expressed as
121121
`--params-table stories15M`. (We use the model constructor
@@ -237,7 +237,7 @@ which chooses the best 16-bit floating point type.
237237

238238
The virtual device fast and virtual floating point data types fast and
239239
fast16 are best used for eager/torch.compiled execution. For export,
240-
specify the your device choice for the target system with --device for
240+
specify your device choice for the target system with --device for
241241
AOTI-exported DSO models, and using ExecuTorch delegate selection for
242242
ExecuTorch-exported PTE models.
243243

@@ -250,8 +250,7 @@ python3 torchchat.py generate [--compile] --checkpoint-path ${MODEL_PATH} --prom
250250
To improve performance, you can compile the model with `--compile`
251251
trading off the time to first token processed with time per token. To
252252
improve performance further, you may also compile the prefill with
253-
`--compile_prefill`. This will increase further compilation times though. The
254-
`--compile-prefill` option is not compatible with `--prefill-prefill`.
253+
`--compile-prefill`. This will increase further compilation times though.
255254

256255
Parallel prefill is not yet supported by exported models, and may be
257256
supported in a future release.
@@ -265,7 +264,7 @@ the introductory README.
265264
In addition to running eval on models in eager mode and JIT-compiled
266265
mode with `torch.compile()`, you can also load dso and pte models back
267266
into the PyTorch to evaluate the accuracy of exported model objects
268-
(e.g., after applying quantization or other traqnsformations to
267+
(e.g., after applying quantization or other transformations to
269268
improve speed or reduce model size).
270269

271270
Loading exported models back into a Python-based Pytorch allows you to
@@ -297,14 +296,14 @@ for ExecuTorch.)
297296

298297
We export the stories15M model with the following command for
299298
execution with the ExecuTorch runtime (and enabling execution on a
300-
wide range of community and vendor supported backends):
299+
wide range of community and vendor-supported backends):
301300

302301
```
303302
python3 torchchat.py export --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_NAME}.pte
304303
```
305304

306305
Alternatively, we may generate a native instruction stream binary
307-
using AOT Inductor for CPU oor GPUs (the latter using Triton for
306+
using AOT Inductor for CPU or GPUs (the latter using Triton for
308307
optimizations such as operator fusion):
309308

310309
```
@@ -319,10 +318,10 @@ the exported model artifact back into a model container with a
319318
compatible API surface for the `model.forward()` function. This
320319
enables users to test, evaluate and exercise the exported model
321320
artifact with familiar interfaces, and in conjunction with
322-
pre-exiisting Python model unit tests and common environments such as
321+
pre-existing Python model unit tests and common environments such as
323322
Jupyter notebooks and/or Google colab.
324323

325-
Here is how to load an exported model into the python environment on the example of using an exported model with `generate.oy`.
324+
Here is how to load an exported model into the Python environment using an exported model with the `generate` command.
326325

327326
```
328327
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --pte-path ${MODEL_NAME}.pte --device cpu --prompt "Once upon a time"
@@ -452,7 +451,7 @@ strategies:
452451
You can find instructions for quantizing models in
453452
[docs/quantization.md](file:///./quantization.md). Advantageously,
454453
quantization is available in eager mode as well as during export,
455-
enabling you to do an early exploration of your quantization setttings
454+
enabling you to do an early exploration of your quantization settings
456455
in eager mode. However, final accuracy should always be confirmed on
457456
the actual execution target, since all targets have different build
458457
processes, compilers, and kernel implementations with potentially
@@ -464,9 +463,8 @@ significant impact on accuracy.
464463

465464
## Native (Stand-Alone) Execution of Exported Models
466465

467-
Refer to the [README](README.md] for an introduction toNative
468-
execution on servers, desktops and laptops is described under
469-
[runner-build.md]. Mobile and Edge executipon for Android and iOS are
466+
Refer to the [README](README.md] for an introduction to native
467+
execution on servers, desktops, and laptops. Mobile and Edge execution for Android and iOS are
470468
described under [torchchat/edge/docs/Android.md] and [torchchat/edge/docs/iOS.md], respectively.
471469

472470

@@ -475,7 +473,7 @@ described under [torchchat/edge/docs/Android.md] and [torchchat/edge/docs/iOS.md
475473

476474
PyTorch and ExecuTorch support a broad range of devices for running
477475
PyTorch with python (using either eager or eager + `torch.compile`) or
478-
in a python-free environment with AOT Inductor and ExecuTorch.
476+
in a Python-free environment with AOT Inductor and ExecuTorch.
479477

480478

481479
| Hardware | OS | Eager | Eager + Compile | AOT Compile | ET Runtime |
@@ -499,58 +497,6 @@ in a python-free environment with AOT Inductor and ExecuTorch.
499497
*Key*: n/t -- not tested
500498

501499

502-
## Runtime performance with Llama 7B, in tokens per second (4b quantization)
503-
504-
| Hardware | OS | eager | eager + compile | AOT compile | ET Runtime |
505-
|-----|------|-----|-----|-----|-----|
506-
| x86 | Linux | ? | ? | ? | ? |
507-
| x86 | macOS | ? | ? | ? | ? |
508-
| aarch64 | Linux | ? | ? | ? | ? |
509-
| aarch64 | macOS | ? | ? | ? | ? |
510-
| AMD GPU | Linux | ? | ? | ? | ? |
511-
| Nvidia GPU | Linux | ? | ? | ? | ? |
512-
| MPS | macOS | ? | ? | ? | ? |
513-
| MPS | iOS | ? | ? | ? | ? |
514-
| aarch64 | Android | ? | ? | ? | ? |
515-
| Mobile GPU (Vulkan) | Android | ? | ? | ? | ? |
516-
| CoreML | iOS | | ? | ? | ? | ? |
517-
| Hexagon DSP | Android | | ? | ? | ? | ? |
518-
| Raspberry Pi 4/5 | Raspbian | ? | ? | ? | ? |
519-
| Raspberry Pi 4/5 | Android | ? | ? | ? | ? |
520-
| ARM 32b (up to v7) | any | | ? | ? | ? | ? |
521-
522-
523-
## Runtime performance with Llama3, in tokens per second (4b quantization)
524-
525-
| Hardware | OS | eager | eager + compile | AOT compile | ET Runtime |
526-
|-----|------|-----|-----|-----|-----|
527-
| x86 | Linux | ? | ? | ? | ? |
528-
| x86 | macOS | ? | ? | ? | ? |
529-
| aarch64 | Linux | ? | ? | ? | ? |
530-
| aarch64 | macOS | ? | ? | ? | ? |
531-
| AMD GPU | Linux | ? | ? | ? | ? |
532-
| Nvidia GPU | Linux | ? | ? | ? | ? |
533-
| MPS | macOS | ? | ? | ? | ? |
534-
| MPS | iOS | ? | ? | ? | ? |
535-
| aarch64 | Android | ? | ? | ? | ? |
536-
| Mobile GPU (Vulkan) | Android | ? | ? | ? | ? |
537-
| CoreML | iOS | | ? | ? | ? | ? |
538-
| Hexagon DSP | Android | | ? | ? | ? | ? |
539-
| Raspberry Pi 4/5 | Raspbian | ? | ? | ? | ? |
540-
| Raspberry Pi 4/5 | Android | ? | ? | ? | ? |
541-
| ARM 32b (up to v7) | any | | ? | ? | ? | ? |
542-
543-
544-
545-
546-
# CONTRIBUTING to torchchat
547-
548-
We welcome any feature requests, bug reports, or pull requests from
549-
the community. See the [CONTRIBUTING](CONTRIBUTING.md) for
550-
instructions how to contribute to torchchat.
551-
552-
553-
554500
# LICENSE
555501

556502
Torchchat is released under the [BSD 3 license](./LICENSE). However

docs/multimodal.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@ This page goes over the different commands you can run with LLama 3.2 11B Vision
1414

1515
While we strongly encourage you to use the Hugging Face checkpoint (which is the default for torchchat when utilizing the commands with the argument `llama3.2-11B`), we also provide support for manually providing the checkpoint. This can be done by replacing the `llama3.2-11B` argument in the commands below with the following:
1616

17+
[skip default]: begin
1718
```
1819
--checkpoint-path <file.pth> --tokenizer-path <tokenizer.model> --params-path torchchat/model_params/Llama-3.2-11B-Vision.json
1920
```
21+
[skip default]: end
2022

2123
## Generation
2224
This generates text output based on a text prompt and (optional) image prompt.
@@ -39,6 +41,9 @@ python3 torchchat.py server llama3.2-11B
3941
```
4042
[skip default]: end
4143

44+
[shell default]: python3 torchchat.py server llama3.2-11B & server_pid=$!
45+
46+
4247
In another terminal, query the server using `curl`. This query might take a few minutes to respond.
4348

4449
<details>
@@ -71,10 +76,13 @@ curl http://127.0.0.1:5000/v1/chat/completions \
7176
"max_tokens": 300
7277
}'
7378
```
74-
79+
[skip default]: begin
7580
```
7681
{"id": "chatcmpl-cb7b39af-a22e-4f71-94a8-17753fa0d00c", "choices": [{"message": {"role": "assistant", "content": "The image depicts a simple black and white cartoon-style drawing of an animal face. It features a profile view, complete with two ears, expressive eyes, and a partial snout. The animal looks to the left, with its eye and mouth implied, suggesting that the drawn face might belong to a rabbit, dog, or pig. The graphic face has a bold black outline and a smaller, solid black nose. A small circle, forming part of the face, has a white background with two black quirkly short and long curved lines forming an outline of what was likely a mouth, complete with two teeth. The presence of the curve lines give the impression that the animal is smiling or speaking. Grey and black shadows behind the right ear and mouth suggest that this face is looking left and upwards. Given the prominent outline of the head and the outline of the nose, it appears that the depicted face is most likely from the side profile of a pig, although the ears make it seem like a dog and the shape of the nose makes it seem like a rabbit. Overall, it seems that this image, possibly part of a character illustration, is conveying a playful or expressive mood through its design and positioning."}, "finish_reason": "stop"}], "created": 1727487574, "model": "llama3.2", "system_fingerprint": "cpu_torch.float16", "object": "chat.completion"}%
7782
```
83+
[skip default]: end
84+
85+
[shell default]: kill ${server_pid}
7886

7987
</details>
8088

@@ -90,6 +98,8 @@ First, follow the steps in the Server section above to start a local server. The
9098
streamlit run torchchat/usages/browser.py
9199
```
92100

101+
[skip default]: end
102+
93103
---
94104

95105
# Future Work

0 commit comments

Comments
 (0)