Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
409 commits
Select commit Hold shift + click to select a range
8618607
Fix phi-3-mini build (#5513)
dbort Sep 20, 2024
7de3f81
Fix broken images in docs (#5514)
Riandy Sep 20, 2024
613cfd6
Add CMake instructions to apple-runtime.md (#5533)
dbort Sep 20, 2024
c50f9fe
update copy_offset to new layout specifier gen & axis mapping (#5505)
nathanaelsee Sep 21, 2024
0eee42a
Don't require -march compiler flags to use bfdot (#5444)
swolchok Sep 21, 2024
d5fdbd4
update conv1d to new layout specifier gen, axis mapping, and use non-…
nathanaelsee Sep 21, 2024
3ec4161
Fix optimized kernels build. (#5534)
shoumikhin Sep 21, 2024
45210bb
Fix tensor cloning when data is null. (#5535)
shoumikhin Sep 21, 2024
55d6b0d
Fix Xcode project. (#5539)
shoumikhin Sep 22, 2024
b2517d6
Remove TIP Format and Replaced with Subheader in README (#5517)
cmodi-meta Sep 22, 2024
e12b37e
Arm backend: Track target flash size metrics (#5342)
zingo Sep 23, 2024
0ec003b
Add "px" unit to image sizes in readme (#5540)
cmodi-meta Sep 23, 2024
182f138
Move examples/mediatek out from under the torch namespace (#5478)
dbort Sep 23, 2024
b361f91
Remove `torch::` namespace reference from LLaMMARunner.mm (#5516)
dbort Sep 23, 2024
3b63839
Fix duplicating latest prompt (#5546)
cmodi-meta Sep 23, 2024
abe9c36
Remove stray uses of `torch::executor::` from examples/... (#5512)
dbort Sep 23, 2024
f68a138
Remove `torch::` references from devtools/example_runner (#5495)
dbort Sep 23, 2024
cab6335
Allow using custom SDPA for non-float32 dtypes in llama demo (#5548)
swolchok Sep 23, 2024
b611d59
add CI job for phi-3-mini (#5532)
helunwencser Sep 23, 2024
8be3ce5
Fix image sizes in README.md (#5550)
svekars Sep 23, 2024
2eae7a9
Move QMat2 to buffer storage and scales_and_zeros to Channels Packed …
SS-JIA Sep 23, 2024
0a72cb0
Support bfloat16 in op_index (#5499)
swolchok Sep 23, 2024
badd76e
Support bfloat16 in op_index_put (#5500)
swolchok Sep 23, 2024
28c2ab6
add BFloat16 to aten_bridge (#5519)
swolchok Sep 23, 2024
286799c
Include optimized kernels in pybindings' portable_lib if building the…
swolchok Sep 23, 2024
61cb5b0
Adding support to demo prompt classification with Llama Guard (#5553)
Riandy Sep 23, 2024
ca0e48c
Refactor codegen components to prepare for benchmark generation (#5560)
SS-JIA Sep 24, 2024
5a984cc
Generate benchmarks automatically (#5561)
SS-JIA Sep 24, 2024
f4728f4
Add all relevant testcases for Arm Ethos-U85 (#5346)
zingo Sep 24, 2024
df72b8c
Use TensorMeta to check if inputs and outputs are memory planned (#5565)
JacobSzwejbka Sep 24, 2024
8660faf
Add New Ethos-U85 support to Arm aot_arm_compiler (#5345)
robell Sep 24, 2024
90dcea5
Qualcomm AI Engine Direct - Fix aihub path failing due to memory plan…
winskuo-quic Sep 24, 2024
3f447d7
Add transpose op as view operator (#5589)
SS-JIA Sep 24, 2024
2060434
Implement slice as a view (#5590)
SS-JIA Sep 24, 2024
f1c5fc6
Use `MemoryAccessFlags` instead of `MemoryAccessType` when binding (#…
SS-JIA Sep 24, 2024
b206b97
Introduce convenience constexpr for memory access types (#5592)
SS-JIA Sep 24, 2024
ce74024
update copy_channel_offset to axis mapping (#5587)
nathanaelsee Sep 24, 2024
72245c3
Add tiktoken dep (#5586)
jackzhxng Sep 24, 2024
3e79ea4
Transform embedding from SpinQuant checkpoint (#5552)
Sep 24, 2024
99ee547
Add documentation. (#5562)
shoumikhin Sep 24, 2024
5c56f96
Fix typos in docs. (#5613)
shoumikhin Sep 24, 2024
85e7458
Update NDK version to r26d in docs (#5612)
kirklandsign Sep 25, 2024
6f9cd8c
Add Int8DynActInt8WeightLinear module (#5605)
mcr229 Sep 25, 2024
341545c
add option to quantize output layer perchannel for SpinQuant (#5614)
Sep 25, 2024
d516309
Add possibility to collect all TOSA tests to a specified path (#5028)
per Sep 25, 2024
e425dbb
Arm backend: Track target memory usage (#5341)
zingo Sep 25, 2024
d2ba238
Revert D62874650: Arm backend: Track target memory usage
Sep 25, 2024
82a505b
Update Android XNNPack demo app doc for Llama 3.2 and Llama Guard 3 (…
Sep 25, 2024
9b6d4b4
Update iOS XNNPack demo app docs for Llama 3.2 (#5641)
cmodi-meta Sep 25, 2024
a914446
Improve Llama page (#5639)
mergennachin Sep 25, 2024
6e9efa1
Demo app android xnnpack quick-fix for the bookmark link (#5642)
Sep 25, 2024
cd46721
Linter fix (#5643)
kirklandsign Sep 25, 2024
b9dadee
Add llama3.2 1B and 3B instructions (#5647)
mergennachin Sep 25, 2024
52f9e03
build cadence hifi flow as a stand-alone cmake dependency (#5551)
zonglinpeng Sep 25, 2024
7ab977e
build cadence cpu flow as a stand-alone cmake dependency (#5555)
zonglinpeng Sep 25, 2024
52d5218
update export SpinQuant checkpoint to align with the new format (#5645)
Sep 25, 2024
88c2407
Fix typo
mergennachin Sep 25, 2024
984986e
Add some Llava related stuff (#5659)
larryliu0820 Sep 25, 2024
ba0958a
Add Llama 3.2 and subsections to Example dir README (#5661)
cmodi-meta Sep 25, 2024
f3fa9fa
Add animated gif for Llama3.2 1B bf16 (#5671)
mergennachin Sep 25, 2024
dd8d5be
Cleanup xnnpack_README.md (#5662)
cmodi-meta Sep 25, 2024
dacbba7
Add llama 3.2 model type on Android (#5646)
Riandy Sep 25, 2024
7c647cd
add instruction for quantizing with SpinQuant (#5672)
Sep 26, 2024
6259a29
IMprove README page
mergennachin Sep 26, 2024
e57bbbb
Pin Xcode projects package deps on main to a particular commit instea…
shoumikhin Sep 26, 2024
3bedd8b
Add buck targets for QNN AOT export (#5476)
Abhi-hpp Sep 26, 2024
4dcee85
Add MethodMeta object for python visibility (#5571)
dulinriley Sep 26, 2024
1cf3da3
Fix linker flags. (#5689)
shoumikhin Sep 26, 2024
13869ec
Add a workflow dispatch for uploading release artifact (#5606)
kirklandsign Sep 26, 2024
ff6607e
Document update (#5692)
Sep 26, 2024
953ab51
Small improvements for module usage. (#5705)
shoumikhin Sep 26, 2024
985f92d
Some updated to kv cache (#5663)
kimishpatel Sep 26, 2024
9d224a5
Fix dequantize per channel to handle double scale type (#5524)
kimishpatel Sep 26, 2024
7e9eaa8
Readme docs update (#5695)
Riandy Sep 26, 2024
e172c5c
add performance number for 1B/3B (#5704)
Sep 26, 2024
a1ed265
Allow softmax and log_softmax to operate on any dimension (#5694)
mcremon-meta Sep 27, 2024
61c421f
Bump nightly torch (#5660)
jackzhxng Sep 27, 2024
d4afbe7
Show loading model UI during model switch (#5691)
cmodi-meta Sep 27, 2024
7127ea9
New doc for the memory planning inspection util function (#5430)
Olivia-liu Sep 27, 2024
fc6b8ea
Add BUCK file to coreml export script (#5702)
cccclai Sep 27, 2024
bdaeede
Remove unused includes from operators (#5538)
mcremon-meta Sep 27, 2024
8f3a83b
Fix rope source transformation error message (#5630)
swolchok Sep 27, 2024
bd03d6b
Improve llama README with SPinQuant
mergennachin Sep 27, 2024
a5c9dee
Fix non-reentrant threadpool (#5714)
swolchok Sep 27, 2024
c3460e5
Use aliasing constructor instead of a custom deleter in TensorImplPtr…
shoumikhin Sep 27, 2024
1fedffc
Make export file compatible with buck (#5703)
cccclai Sep 27, 2024
43b03f6
migrate cadence cpu executor to use the existing ET sample (#5644)
zonglinpeng Sep 27, 2024
c5ced71
Remove extract_constant_segment (#5680)
neuropilot-captain Sep 27, 2024
fcdfe06
Add model files support for android tokenizer (#5727)
Riandy Sep 27, 2024
e6237f7
Add Buck config to disable XNNPACK Workspace Sharing (#5696)
digantdesai Sep 27, 2024
53936dc
Store the Tensor inline in TensorPtr (#5684)
swolchok Sep 27, 2024
4ee0437
Remove TensorPtr::get() (#5687)
swolchok Sep 27, 2024
87dc49d
Qualcomm AI Engine Direct - add tutorial for op builder & quantizer (…
haowhsu-quic Sep 27, 2024
3f04c3c
Update README for MediaTek backend (#5386)
neuropilot-captain Sep 27, 2024
c1c5080
Fix a typo in the memory planning doc (#5723)
Olivia-liu Sep 27, 2024
bdaad8e
Add dtype arg to the script for exporting HuggingFace models (#5716)
Sep 27, 2024
55cc430
Always use two XNNPACK Partitioners (#5573)
digantdesai Sep 27, 2024
2fff171
Enable AHB extension for Android builds (#5729)
Sep 27, 2024
dacd0a2
c10::optional -> std::optional
r-barnes Sep 27, 2024
b4a6148
Migrate from capture_pre_autograd_graph to torch.export.export_for_tr…
mcremon-meta Sep 27, 2024
8b5cf96
Fix `VK_NULL_HANDLE` comparison style (#5733)
Sep 28, 2024
e31e0b6
Merge TensorImplPtr into TensorPtr.
shoumikhin Sep 28, 2024
8093d51
Update packages commit. (#5742)
shoumikhin Sep 28, 2024
fe0e676
Add quantize option to the coreml script (#5710)
cccclai Sep 28, 2024
905b88c
Rename executorch_no_prim_ops to executorch_core (#5740)
GregoryComer Sep 29, 2024
77e7ad1
Add access to edge_program in ArmPassManager (#5542)
Erik-Lundell Sep 30, 2024
b71926f
Arm backend: Add options when using run.sh (#5627)
zingo Sep 30, 2024
09f13c0
Arm backend: Updated depthwise/conv2d test lists for Ethos Ux5 (#5625)
zingo Sep 30, 2024
c222a44
Arm backend: Add rsqrt lowering (#5577)
oscarandersson8218 Sep 30, 2024
06ce226
Update aot_arm_compiler to use export_for_training (#5581)
SaoirseARM Sep 30, 2024
a9ad3c6
Arm backend: Improve memory config and documentation in the runtime (…
zingo Sep 30, 2024
4bf7e2f
Input name bugfix in runner_utils (#5071)
Erik-Lundell Sep 30, 2024
0d96f75
Improve ArmTester logging (#5629)
Erik-Lundell Sep 30, 2024
68548e5
Fix input setting and invocation. (#5752)
shoumikhin Sep 30, 2024
e19677c
Arm backend: Add squeeze-op (#5681)
oscarandersson8218 Sep 30, 2024
1c6dbb6
Qualcomm AI Engine Direct - support Conv2dTranspose (#5461)
haowhsu-quic Sep 30, 2024
2dd88fb
Support dim order in Arm backend (#5576)
oscarandersson8218 Sep 30, 2024
b2f20cb
removing autodeps suppression tags (#5593)
azad-meta Sep 30, 2024
48d586c
indention fix (#5738)
cccclai Sep 30, 2024
e2f1aca
Fix missing cstdint in vec_base.h (#5747)
swolchok Sep 30, 2024
9720715
throw instead of segfault with invalid args in pybindings (#5726)
JacobSzwejbka Sep 30, 2024
944bd67
Add animated gif for 3B SpinQuant (#5763)
mergennachin Sep 30, 2024
51e79a0
add command_alias in runtime_wrapper (#5737)
cccclai Sep 30, 2024
5605954
Fix unqualified uses of executorch functions (#5709)
dbort Sep 30, 2024
6ff52cc
Move etensor types to their new namespace (#5569)
dbort Sep 30, 2024
a5a76f7
Introduce `virtual_clone` API to support view of view use cases + fix…
SS-JIA Sep 30, 2024
9c38cf7
Add API to read value of `SymInt` and`ParamsBuffer` (#5754)
SS-JIA Sep 30, 2024
418c4c3
Polish CoreML Llama Doc (#5745)
Sep 30, 2024
e186bc9
Corstone-320 support (#5628)
per Sep 30, 2024
f0662bb
Fix out of OOB error for ConversationHistory (#5770)
cmodi-meta Sep 30, 2024
b60fa71
buckify eval_llama (#5437)
Sep 30, 2024
038e701
Fix merge conflict
kirklandsign Sep 30, 2024
c350675
merge conflict
kirklandsign Sep 30, 2024
a91eb8a
Generalize softmax for packed dim vs non packed dim (#5755)
SS-JIA Oct 1, 2024
26dc9fd
Ensure iOS benchmark app build is tested in CI (#5609)
Oct 1, 2024
660ef77
Add warmup for Llama (#5756)
digantdesai Oct 1, 2024
8079eb7
Kleidi Integration (#5162)
mcr229 Oct 1, 2024
04669a1
Fix load time. (#5781)
shoumikhin Oct 1, 2024
829ba3e
generalize tests for unary_ufunc_realhb_to_floath ops (1/2) (#5674)
swolchok Oct 1, 2024
b63c68e
migrate all unary_ufunc_realhb_to_floath op tests to general infra (2…
swolchok Oct 1, 2024
07bcd7f
UnaryUfuncRealHBToFloatHTest: test Half more widely (#5676)
swolchok Oct 1, 2024
085b817
Make optimized op_exp support bf16 (#5677)
swolchok Oct 1, 2024
6a27589
s/unary_ufunc_realhb_to_floath/unary_ufunc_realhbbf16_to_floathbf16/ …
swolchok Oct 1, 2024
ee32848
Support bf16 for isinf/isnan (#5690)
swolchok Oct 1, 2024
7183f19
Add more instructions to Xcode setup (#5757)
larryliu0820 Oct 1, 2024
2d0237c
set env in buck file (#5736)
cccclai Oct 1, 2024
d62c7ad
Package headers into pip wheel (#5734)
larryliu0820 Oct 1, 2024
fbcd332
Fix android build (#5796)
kirklandsign Oct 1, 2024
aced6d7
add microkernels-prod to backend_xnnpack in build_apple_frameworks (#…
mcr229 Oct 1, 2024
6923ae5
Use TensorPtr in aten_bridge (#5789)
JacobSzwejbka Oct 1, 2024
8ddb846
Wrap server generated yaml files inside et_operator_library (#5778)
Olivia-liu Oct 1, 2024
055bed5
Add quantized kv cache to llama (#5664)
kimishpatel Oct 2, 2024
d459011
Refactor custom SDPA op to separate kv cache update from the custom s…
kimishpatel Oct 2, 2024
5f324ce
Add update_quantized_cache op (#5527)
kimishpatel Oct 2, 2024
bca3ad6
Update SDPA op to use quantized kv cache (#5666)
kimishpatel Oct 2, 2024
29364c4
Refactoring sdpa (#5667)
kimishpatel Oct 2, 2024
fdacfaa
Update EXECUTORCH_LIBRARY macro (#5668)
kimishpatel Oct 2, 2024
43d7662
Add custom_sdpa and use that instead of sdpa_with_kv_cache (#5669)
kimishpatel Oct 2, 2024
393553c
Remove preprocess xplat build (#5801)
cccclai Oct 2, 2024
5877c2a
Corstone-320 download and tests (#5787)
per Oct 2, 2024
d708b94
Allow Inspector to accept ETDump bytes directly (#5657)
sxu Oct 2, 2024
c48d867
update spinquant quantization options to be general purposed pre-quan…
Oct 2, 2024
0d6a098
Support bf16 for binary logical ops (#5706)
swolchok Oct 2, 2024
152e22d
c10::optional -> std::optional
r-barnes Oct 2, 2024
c10c96a
Add fake mode in verifier (#5805)
angelayi Oct 2, 2024
9dcd71f
Disable animation in hot path of iOS example again (#5821)
swolchok Oct 2, 2024
3aa6b14
op_clamp: add downcasting tests & fix (#5798)
manuelcandales Oct 2, 2024
68c33c7
Fix llama demo app internal build (#5820)
kirklandsign Oct 2, 2024
450aece
New URL for the ETRecord documentation page (#5764)
Olivia-liu Oct 2, 2024
d80ebd5
New URL for the Bundled IO documentation page (#5767)
Olivia-liu Oct 2, 2024
dc24983
New URL for the Delegates Debugging documentation page (#5769)
Olivia-liu Oct 2, 2024
5acd5c9
Less Noisy Pybindings (#5828)
JacobSzwejbka Oct 2, 2024
1abecce
Refactor the test suite.
shoumikhin Oct 2, 2024
bc4fd8a
Update the job name to reflect the new generic benchmark app (#5829)
Oct 3, 2024
436afce
Migrate extension/threadpool to new namespace (#5825)
dbort Oct 3, 2024
9263d88
Use new threadpool namespace for all of //executorch/... (#5826)
dbort Oct 3, 2024
793f79b
Update install_requirements.py: Bumping PT pin to dev20240925 (#5824)
Jack-Khuu Oct 3, 2024
9ff3351
add more options for loading checkpoints (#5823)
Oct 3, 2024
011d42b
New URL for the Model Debugging page (#5817)
Olivia-liu Oct 3, 2024
6f17947
New URL for the ETDump page (#5809)
Olivia-liu Oct 3, 2024
835bd34
New URL for the Inspector page (#5810)
Olivia-liu Oct 3, 2024
79b7896
New URL for the Profiling page (#5819)
Olivia-liu Oct 3, 2024
e2e2129
Implement `repeat_interleave` (#5830)
SS-JIA Oct 3, 2024
7559ddd
Fix tensor views with tensors that use deferred memory allocation (#5…
SS-JIA Oct 3, 2024
8ac2608
Add support for Arm tests on MacOS (#5786)
per Oct 3, 2024
b78ec1b
Arm backend: Track target memory usage (#5788)
zingo Oct 3, 2024
3da2658
Arm backend: Change run.sh to let cmake decide number of parallel job…
zingo Oct 3, 2024
b9aaf96
Split up split test to collect artifacts correctly (#5785)
per Oct 3, 2024
ee5d099
Arm backend: Mark test_block_bottleneck_residual_tosa_BI unit test fl…
zingo Oct 3, 2024
92d1d1e
Enable Ethos-U85 and Corstone-320 in Arm run.sh flow (#5818)
robell Oct 3, 2024
98c5efa
Daily `arc lint --take CLANGFORMAT`
zertosh Oct 3, 2024
70aee72
Properly kill the buck2 daemon
dbort Oct 3, 2024
0e5b92d
Add OptionalIntArrayRef used by torchgen. (#5735)
mcremon-meta Oct 3, 2024
13408b9
Arm backend: Avoid failing sigmoid unit tests after Pytorch 2.6 updat…
zingo Oct 3, 2024
abae470
fix the format in this README (#5853)
Oct 3, 2024
2584e72
Specify NDK version 26 (#5855)
kirklandsign Oct 3, 2024
876c665
Update xnnpack_README.md adding warning for unsupported ndk version (…
larryliu0820 Oct 3, 2024
0ddd913
Add a script to download frameworks. (#5858)
shoumikhin Oct 3, 2024
9c3ebfe
Add readme to Resources dir. (#5857)
shoumikhin Oct 3, 2024
aa8a93c
Dramatically improve op_clamp build time (#5784)
swolchok Oct 3, 2024
b1fd74c
Simplify function pointers for apply_ternary_elementwise_fn (#5833)
swolchok Oct 3, 2024
433ead0
Just pass SupportedTensorDtypes for each tensor to apply_ternary_elem…
swolchok Oct 3, 2024
a4ee59a
Adding executorch_prim::mod.Scalar (#5721)
tarun292 Oct 4, 2024
3a25651
MIgrate some random files away from the torch:: namespace (#5836)
dbort Oct 4, 2024
20a157f
Rename flamingo to llama3_2_mm (#5759)
jackzhxng Oct 4, 2024
c2969f1
add 16a8w matmul custom annotation (#5864)
cccclai Oct 4, 2024
d34cc4e
Qualcomm AI Egine Direct -- add ssg2115p (#5867)
Oct 4, 2024
be4b7f4
Add missing Pyre mode headers] [batch:32/1018] [shard:9/N]
Oct 4, 2024
b9e9479
Migrate backends/vulkan to the new namespace (#5876)
dbort Oct 4, 2024
f102d06
Add llama tests. (#5874)
shoumikhin Oct 4, 2024
00d804c
Arm backend: Add argument to list used fallback ops for run.sh (#5815)
zingo Oct 4, 2024
98a58e0
Add generic annotator for data layout ops (#5814)
benkli01 Oct 4, 2024
a4fcdcd
PT Pin Bump: 20241002 (#5880)
Jack-Khuu Oct 4, 2024
a6d67c7
Move LLaMA tests to a subdir.
shoumikhin Oct 4, 2024
34e7ad8
Migrate backends/xnnpack to the new namespace (#5865)
dbort Oct 4, 2024
784eb51
Correct Core ML perf metrics (#5862)
Oct 4, 2024
d9aeca5
Update compiler-backend-dialect.md (#5890)
larryliu0820 Oct 4, 2024
acfcdd5
Add option to disable operator profiling (#5720)
tarun292 Oct 4, 2024
84498b2
Update kernel-library-selective-build.md (#5895)
larryliu0820 Oct 4, 2024
94289ad
Clean up organization of supported_ops (#5885)
SS-JIA Oct 4, 2024
1052e3b
Unbreak optimized sub in the case where one input is a scalar and dty…
swolchok Oct 4, 2024
2f9f94a
Use .bin extension for tokenizer. (#5907)
shoumikhin Oct 4, 2024
4651d65
Upload Android benchmark results to OSS benchmark database (#5808)
huydhn Oct 5, 2024
5abfe13
Update runtime-build-and-cross-compilation.md (#5452)
jackzhxng Oct 5, 2024
17c2f36
Release docs proofreading (#5909)
jackzhxng Oct 5, 2024
84f5a56
Handle empty Android benchmark results (#5916)
huydhn Oct 6, 2024
8fc3e20
Cleanup export_model API calls (#5882)
mcremon-meta Oct 7, 2024
c06a708
Revert "Add quantize option to the coreml script (#5710)" (#5906)
huydhn Oct 7, 2024
d174637
Qualcomm AI Engine Direct - oss model enablement (fastvit) (#5543)
haowhsu-quic Oct 7, 2024
e194feb
update the tested qnn version (#5903)
cccclai Oct 7, 2024
af6f3ed
Add documentation for the apple benchmarking app. (#5935)
shoumikhin Oct 7, 2024
478a9b6
Improve the qcomm aot part docs (#5868)
cccclai Oct 7, 2024
2726bdb
use --use_sdpa_with_kv_cache for 1B/3B bf16 (#5861)
Oct 7, 2024
0a11e99
Update README.md (#5945)
shoumikhin Oct 7, 2024
a9cbb38
Enable `uint8` dtype in shaders (#5932)
Oct 7, 2024
0186c7f
aten.flip (#5879)
Oct 7, 2024
6e871c3
Implement SDPA + KV-Cache operator (#5799)
SS-JIA Oct 7, 2024
59cc817
Move xnnpack.passes to xnnpack._passes (#5917)
tarun292 Oct 7, 2024
c86d0d0
Use source_fn_stack in xnnpack tutorial (#5948)
tarun292 Oct 7, 2024
37a1397
Fix missing export_for_training import in bundled io tutorial (#5949)
tarun292 Oct 7, 2024
9c4032b
Bump numpy from 1.21.3 to 1.22.0 in /.ci/docker (#4514)
dependabot[bot] Oct 7, 2024
f005dd5
Migrate to training IR in executorch tests (#5941)
yushangdi Oct 7, 2024
0424eef
Update docs on Module new APIs. (#5952)
shoumikhin Oct 7, 2024
aad548c
Fix delegate debug handle generation (#5953)
tarun292 Oct 7, 2024
03e4516
Migrate backends/arm to the new namespace (#5904)
dbort Oct 8, 2024
7337f8e
Find portable_lib.so in pip package during cmake build (#5961)
larryliu0820 Oct 8, 2024
fe2b138
Merge remote-tracking branch 'origin/main' into mtk-5
kirklandsign Oct 8, 2024
57f0201
Update build/build_android_llm_demo.sh
kirklandsign Oct 8, 2024
954c78e
Should still use executorch-llama.aar
kirklandsign Oct 8, 2024
d4784a5
Clean up cmakelist
kirklandsign Oct 8, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion .ci/docker/ci_commit_pins/pytorch.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
00e3eea170ce5db8ea9c62ce5e48f13886cd6d20
d1b87e26e5c4343f5b56bb1e6f89b479b389bfac
2 changes: 1 addition & 1 deletion .ci/docker/requirements-ci.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
mpmath==1.3.0
numpy==1.21.3; python_version == '3.10'
numpy==1.22.0; python_version == '3.10'
numpy==1.23.2; python_version == '3.11'
numpy; python_version >= '3.12'
PyYAML==6.0.1
Expand Down
2 changes: 1 addition & 1 deletion .ci/scripts/build-qnn-sdk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ set_up_aot() {
-DCMAKE_INSTALL_PREFIX=$PWD \
-DEXECUTORCH_BUILD_QNN=ON \
-DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
-DEXECUTORCH_BUILD_SDK=ON \
-DEXECUTORCH_BUILD_DEVTOOLS=ON \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
Expand Down
10 changes: 6 additions & 4 deletions .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"

MODEL_NAME=$1 # stories110M
BUILD_TOOL=$2 # buck2 or cmake
DTYPE=$3 # fp16 or fp32
DTYPE=$3 # fp16, bf16, or fp32
MODE=${4:-"xnnpack+custom"} # portable or xnnpack+custom or xnnpack+custom+qe
UPLOAD_DIR=${5:-}
if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
Expand All @@ -29,7 +29,7 @@ if [[ -z "${BUILD_TOOL:-}" ]]; then
fi

if [[ -z "${DTYPE:-}" ]]; then
echo "Missing dtype, choose fp16 or fp32, exiting..."
echo "Missing dtype, choose fp16, bf16, or fp32, exiting..."
exit 1
fi

Expand Down Expand Up @@ -174,6 +174,8 @@ fi
EXPORTED_MODEL_NAME="llama2"
if [[ "${DTYPE}" == "fp16" ]]; then
EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}_h"
elif [[ "${DTYPE}" == "bf16" ]]; then
EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}_bf"
elif [[ "${DTYPE}" == "fp32" ]]; then
:
else
Expand All @@ -186,7 +188,7 @@ EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
echo "Exporting ${EXPORTED_MODEL_NAME}"
EXPORT_ARGS="-c ${CHECKPOINT_FILE_NAME} -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME} -kv"
if [[ "${XNNPACK}" == "ON" ]]; then
EXPORT_ARGS="${EXPORT_ARGS} -X -qmode 8da4w -G 128"
EXPORT_ARGS="${EXPORT_ARGS} -X --xnnpack-extended-ops -qmode 8da4w -G 128"
fi
if [[ "${CUSTOM}" == "ON" ]]; then
EXPORT_ARGS="${EXPORT_ARGS} --use_sdpa_with_kv_cache"
Expand All @@ -211,7 +213,7 @@ echo "Creating tokenizer.bin"
$PYTHON_EXECUTABLE -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin


RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=tokenizer.bin --prompt=Once --temperature=0 --seq_len=10"
RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=tokenizer.bin --prompt=Once --temperature=0 --seq_len=10 --warmup=1"
# Check build tool.
echo "Running ${EXPORTED_MODEL_NAME} in portable mode"
if [[ "${BUILD_TOOL}" == "buck2" ]]; then
Expand Down
17 changes: 8 additions & 9 deletions .ci/scripts/test_model.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,23 +50,21 @@ prepare_artifacts_upload() {

build_cmake_executor_runner() {
echo "Building executor_runner"
(rm -rf ${CMAKE_OUTPUT_DIR} \
&& mkdir ${CMAKE_OUTPUT_DIR} \
&& cd ${CMAKE_OUTPUT_DIR} \
&& retry cmake -DCMAKE_BUILD_TYPE=Release \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
rm -rf ${CMAKE_OUTPUT_DIR}
cmake -DCMAKE_BUILD_TYPE=Debug \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-B${CMAKE_OUTPUT_DIR} .

cmake --build ${CMAKE_OUTPUT_DIR} -j4
cmake --build ${CMAKE_OUTPUT_DIR} -j4 --config Debug
}

run_portable_executor_runner() {
# Run test model
if [[ "${BUILD_TOOL}" == "buck2" ]]; then
buck2 run //examples/portable/executor_runner:executor_runner -- --model_path "./${MODEL_NAME}.pte"
elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
if [[ ! -f ${CMAKE_OUTPUT_DIR}/executor_runner ]]; then
build_cmake_executor_runner
fi
build_cmake_executor_runner
./${CMAKE_OUTPUT_DIR}/executor_runner --model_path "./${MODEL_NAME}.pte"
else
echo "Invalid build tool ${BUILD_TOOL}. Only buck2 and cmake are supported atm"
Expand Down Expand Up @@ -176,6 +174,7 @@ test_model_with_qnn() {
fi

# Use SM8450 for S22, SM8550 for S23, and SM8560 for S24
# TODO(guangyang): Make QNN chipset matches the target device
QNN_CHIPSET=SM8450

"${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only
Expand Down
116 changes: 116 additions & 0 deletions .ci/scripts/test_phi_3_mini.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -exu

BUILD_TYPE=${1:-Debug}
BUILD_DIR=${3:-cmake-out}
MODEL_DIR=examples/models/phi-3-mini

echo "Building with BUILD_TYPE: $BUILD_TYPE, BUILD_DIR: $BUILD_DIR"

if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
PYTHON_EXECUTABLE=python3
fi

# Number of processes for a parallel build
NPROC=8
if hash nproc &> /dev/null; then NPROC=$(nproc); fi

cmake_install_executorch_libraries() {
cmake -DPYTHON_EXECUTABLE=python \
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
-DEXECUTORCH_ENABLE_LOGGING=1 \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-B${BUILD_DIR} .

cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE}
}

cmake_build_phi_3_mini() {
cmake -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-B${BUILD_DIR}/${MODEL_DIR} \
${MODEL_DIR}

cmake --build ${BUILD_DIR}/${MODEL_DIR} -j${NPROC} --config ${BUILD_TYPE}
}

# Download and convert tokenizer.model
prepare_tokenizer() {
echo "Downloading and converting tokenizer.model"
wget -O tokenizer.model "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/resolve/main/tokenizer.model?download=true"
$PYTHON_EXECUTABLE -m executorch.extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
}

# Export phi-3-mini model to pte
export_phi_3_mini () {
echo "Exporting phi-3-mini. This will take a few minutes"
$PYTHON_EXECUTABLE -m executorch.examples.models.phi-3-mini.export_phi-3-mini -c "4k" -s 128 -o phi-3-mini.pte
}

run_and_verify() {
NOW=$(date +"%H:%M:%S")
echo "Starting to run phi-3-mini runner at ${NOW}"
if [[ ! -f "phi-3-mini.pte" ]]; then
echo "Export failed. Abort"
exit 1
fi
if [[ ! -f "tokenizer.bin" ]]; then
echo "tokenizer.bin is missing."
exit 1
fi

${BUILD_DIR}/${MODEL_DIR}/phi_3_mini_runner \
--model_path=phi-3-mini.pte \
--tokenizer_path=tokenizer.bin \
--seq_len=128 \
--temperature=0 \
--prompt="<|system|>
You are a helpful assistant.<|end|>
<|user|>
What is the capital of France?<|end|>
<|assistant|>" > result.txt

# verify result.txt
RESULT=$(cat result.txt)
EXPECTED_RESULT="The capital of France is Paris."
if [[ "${RESULT}" == *"${EXPECTED_RESULT}"* ]]; then
echo "Expected result prefix: ${EXPECTED_RESULT}"
echo "Actual result: ${RESULT}"
echo "Success"
exit 0
else
echo "Expected result prefix: ${EXPECTED_RESULT}"
echo "Actual result: ${RESULT}"
echo "Failure; results not the same"
exit 1
fi
}

# Step 1. Build ExecuTorch and phi-3-mini runner
cmake_install_executorch_libraries
cmake_build_phi_3_mini

# Step 2. Export the tokenizer and model
prepare_tokenizer
export_phi_3_mini

# Step 3. Run and verify result
run_and_verify
Loading
Loading