Skip to content

Commit cf8c7b0

Browse files
authored
Merge branch 'main' into cuda-err-msg
2 parents bede3a9 + c9339e2 commit cf8c7b0

File tree

76 files changed

+6635
-1353
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+6635
-1353
lines changed

.ci/scripts/test-cuda-build.sh

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@ test_executorch_cuda_build() {
2727
nvcc --version || echo "nvcc not found"
2828
nvidia-smi || echo "nvidia-smi not found"
2929

30-
# Set CMAKE_ARGS to enable CUDA build - ExecuTorch will handle PyTorch installation automatically
31-
export CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
32-
3330
echo "=== Starting ExecuTorch Installation ==="
3431
# Install ExecuTorch with CUDA support with timeout and error handling
3532
timeout 5400 ./install_executorch.sh || {

.ci/scripts/test_llama_lora.sh

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ else
9494
exit 1
9595
fi
9696

97-
# Export LoRA PTE, PTD file.
97+
# Export LoRA PTE, foundation PTD file.
9898
MODEL_SEPARATE="${MODEL_NAME}_separate"
9999
$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
100100
base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
@@ -114,20 +114,62 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
114114
NOW=$(date +"%H:%M:%S")
115115
echo "Starting to run llama runner at ${NOW}"
116116
# shellcheck source=/dev/null
117-
cmake-out/examples/models/llama/llama_main --model_path=${MODEL_SEPARATE}.pte --data_path=${MODEL_SEPARATE}.ptd --prompt="${PROMPT}" ${RUNTIME_ARGS} > result2.txt
117+
cmake-out/examples/models/llama/llama_main --model_path=${MODEL_SEPARATE}.pte --data_paths=${MODEL_SEPARATE}.ptd --prompt="${PROMPT}" ${RUNTIME_ARGS} > result2.txt
118118
NOW=$(date +"%H:%M:%S")
119119
echo "Finished at ${NOW}"
120120

121121
RESULT2=$(cat result2.txt)
122122
if [[ "${RESULT2}" == "${EXPECTED_PREFIX}"* ]]; then
123123
echo "Expected result prefix: ${EXPECTED_PREFIX}"
124124
echo "Actual result: ${RESULT2}"
125+
# Do not clean up files if test passes, as they're re-used in the next test.
125126
echo "Success"
126-
cleanup_files
127127
else
128128
echo "Expected result prefix: ${EXPECTED_PREFIX}"
129129
echo "Actual result: ${RESULT2}"
130130
echo "Failure; results not the same"
131131
cleanup_files
132132
exit 1
133133
fi
134+
135+
# Export LoRA PTE, LoRA PTD, foundation PTD file.
136+
MODEL_PROGRAM_ONLY="${MODEL_NAME}_program"
137+
MODEL_LORA_WEIGHTS="lora_weights"
138+
MODEL_FOUNDATION_WEIGHTS="foundation_weights"
139+
$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
140+
base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
141+
base.params="${DOWNLOADED_PATH}/params.json" \
142+
base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \
143+
base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \
144+
base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
145+
model.use_kv_cache=true \
146+
model.use_sdpa_with_kv_cache=true \
147+
model.dtype_override="fp32" \
148+
backend.xnnpack.enabled=true \
149+
backend.xnnpack.extended_ops=true \
150+
export.output_name="${MODEL_PROGRAM_ONLY}.pte" \
151+
export.foundation_weights_file="${MODEL_FOUNDATION_WEIGHTS}.ptd" \
152+
export.lora_weights_file="${MODEL_LORA_WEIGHTS}.ptd"
153+
154+
# Run llama runner.
155+
NOW=$(date +"%H:%M:%S")
156+
echo "Starting to run llama runner at ${NOW}"
157+
# shellcheck source=/dev/null
158+
cmake-out/examples/models/llama/llama_main --model_path=${MODEL_PROGRAM_ONLY}.pte --data_paths="${MODEL_FOUNDATION_WEIGHTS}.ptd,${MODEL_LORA_WEIGHTS}.ptd" --prompt="${PROMPT}" ${RUNTIME_ARGS} > result3.txt
159+
NOW=$(date +"%H:%M:%S")
160+
echo "Finished at ${NOW}"
161+
162+
RESULT3=$(cat result3.txt)
163+
if [[ "${RESULT3}" == "${EXPECTED_PREFIX}"* ]]; then
164+
echo "Expected result prefix: ${EXPECTED_PREFIX}"
165+
echo "Actual result: ${RESULT3}"
166+
echo "Success"
167+
else
168+
echo "Expected result prefix: ${EXPECTED_PREFIX}"
169+
echo "Actual result: ${RESULT3}"
170+
echo "Failure; results not the same"
171+
cleanup_files
172+
exit 1
173+
fi
174+
175+
cleanup_files

.github/workflows/cuda.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Test ExecuTorch CUDA Build Compatibility
22
# This workflow tests whether ExecuTorch can be successfully built with CUDA support
33
# across different CUDA versions (12.6, 12.8, 12.9) using the command:
4-
# CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh
4+
# ./install_executorch.sh
55
#
66
# Note: ExecuTorch automatically detects the system CUDA version using nvcc and
77
# installs the appropriate PyTorch wheel. No manual CUDA/PyTorch installation needed.
@@ -43,7 +43,7 @@ jobs:
4343
set -eux
4444
4545
# Test ExecuTorch CUDA build - ExecuTorch will automatically detect CUDA version
46-
# and install the appropriate PyTorch wheel when CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON"
46+
# and install the appropriate PyTorch wheel
4747
source .ci/scripts/test-cuda-build.sh "${{ matrix.cuda-version }}"
4848
4949
# This job will fail if any of the CUDA versions fail
@@ -71,7 +71,7 @@ jobs:
7171
strategy:
7272
fail-fast: false
7373
matrix:
74-
model: [linear, add, add_mul, resnet18]
74+
model: [linear, add, add_mul, resnet18, conv1d]
7575
with:
7676
timeout: 90
7777
runner: linux.g5.4xlarge.nvidia.gpu
@@ -83,7 +83,7 @@ jobs:
8383
script: |
8484
set -eux
8585
86-
PYTHON_EXECUTABLE=python CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh
86+
PYTHON_EXECUTABLE=python ./install_executorch.sh
8787
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
8888
PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda
8989
@@ -110,7 +110,7 @@ jobs:
110110
set -eux
111111
112112
echo "::group::Setup ExecuTorch"
113-
CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_executorch.sh
113+
./install_executorch.sh
114114
echo "::endgroup::"
115115
116116
echo "::group::Setup Huggingface"

.github/workflows/lint.yml

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -143,19 +143,28 @@ jobs:
143143
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
144144
timeout: 90
145145
script: |
146-
FILES_NEEDS_FORMAT=$(/opt/google-java-format -n \
147-
extension/android/executorch_android/src/main/java/org/pytorch/executorch/*.java \
148-
extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/*.java \
149-
extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/*.java \
150-
extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/*.java \
151-
extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/*.java \
152-
extension/benchmark/android/benchmark/app/src/androidTest/java/org/pytorch/minibench/*.java)
146+
FILES_NEEDS_FORMAT=$(find extension/android/executorch_android/src/main/java/org/pytorch/executorch \
147+
extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm \
148+
extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations \
149+
extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch \
150+
extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench \
151+
extension/benchmark/android/benchmark/app/src/androidTest/java/org/pytorch/minibench \
152+
-type f -name "*.java" 2>/dev/null | \
153+
xargs -r /opt/google-java-format -n)
154+
153155
if [ -n "$FILES_NEEDS_FORMAT" ]; then
154-
echo "Warning: The following files need formatting. Please use google-java-format."
155-
echo "Use a binary from https://github.com/google/google-java-format/releases/"
156-
echo "For example:"
157-
echo "wget https://github.com/google/google-java-format/releases/download/v1.23.0/google-java-format_linux-x86-64"
158-
echo "chmod +x google-java-format_linux-x86-64"
159-
echo "./google-java-format_linux-x86-64 -i $FILES_NEEDS_FORMAT"
156+
echo "Warning: The following files need formatting:"
157+
echo "$FILES_NEEDS_FORMAT"
158+
echo ""
159+
echo "Please use google-java-format from https://github.com/google/google-java-format/releases/"
160+
echo ""
161+
echo "To fix, run one of these commands:"
162+
echo " # Using xargs (recommended):"
163+
echo " find <paths> -type f -name '*.java' | xargs google-java-format -i"
164+
echo ""
165+
echo " # Or format specific files:"
166+
echo "$FILES_NEEDS_FORMAT" | while IFS= read -r file; do
167+
echo " google-java-format -i \"$file\""
168+
done
160169
exit 1
161170
fi

README.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,14 +104,16 @@ outputs = method.execute([torch.randn(1, 3, 224, 224)])
104104

105105
Module module("model.pte");
106106
auto tensor = make_tensor_ptr({2, 2}, {1.0f, 2.0f, 3.0f, 4.0f});
107-
auto outputs = module.forward({tensor});
107+
auto outputs = module.forward(tensor);
108108
```
109109
110110
**[Swift (iOS)](https://docs.pytorch.org/executorch/main/ios-section.html)**
111111
```swift
112+
import ExecuTorch
113+
112114
let module = Module(filePath: "model.pte")
113-
let input = Tensor<Float>([1.0, 2.0, 3.0, 4.0])
114-
let outputs: [Value] = try module.forward([input])
115+
let input = Tensor<Float>([1.0, 2.0, 3.0, 4.0], shape: [2, 2])
116+
let outputs = try module.forward(input)
115117
```
116118

117119
**[Kotlin (Android)](https://docs.pytorch.org/executorch/main/android-section.html)**
@@ -151,6 +153,8 @@ runner->generate("Hello, how are you?", config);
151153
152154
**[Swift (iOS)](https://docs.pytorch.org/executorch/main/llm/run-on-ios.html)**
153155
```swift
156+
import ExecuTorchLLM
157+
154158
let runner = TextRunner(modelPath: "llama.pte", tokenizerPath: "tiktoken.bin")
155159
try runner.generate("Hello, how are you?", Config {
156160
$0.sequenceLength = 128

backends/aoti/common_shims.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,18 @@ int32_t aoti_torch_dtype_bfloat16() {
172172
return 15; // PyTorch's bfloat16 dtype code
173173
}
174174

175+
int32_t aoti_torch_dtype_int8() {
176+
return 1; // PyTorch's int32 dtype code
177+
}
178+
179+
int32_t aoti_torch_dtype_int16() {
180+
return 2; // PyTorch's int32 dtype code
181+
}
182+
183+
int32_t aoti_torch_dtype_int32() {
184+
return 3; // PyTorch's int32 dtype code
185+
}
186+
175187
int32_t aoti_torch_dtype_int64() {
176188
return 4; // PyTorch's int64 dtype code
177189
}

backends/aoti/common_shims.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ int32_t aoti_torch_device_type_cpu();
5959
int32_t aoti_torch_layout_strided();
6060
int32_t aoti_torch_dtype_float32();
6161
int32_t aoti_torch_dtype_bfloat16();
62+
int32_t aoti_torch_dtype_int8();
63+
int32_t aoti_torch_dtype_int16();
64+
int32_t aoti_torch_dtype_int32();
6265
int32_t aoti_torch_dtype_int64();
6366

6467
// Dtype utility function needed by Metal backend

backends/aoti/utils.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ inline executorch::aten::ScalarType dtype_to_scalar_type(int32_t dtype) {
3434
// Convert based on known PyTorch dtype codes (without CUDA-specific
3535
// dependency)
3636
switch (dtype) {
37+
case 1: // PyTorch's int8 dtype code
38+
return executorch::aten::ScalarType::Char;
39+
case 2: // PyTorch's int16 dtype code
40+
return executorch::aten::ScalarType::Short;
41+
case 3: // PyTorch's int32 dtype code
42+
return executorch::aten::ScalarType::Int;
3743
case 4: // PyTorch's int64 dtype code
3844
return executorch::aten::ScalarType::Long;
3945
case 6: // PyTorch's float32 dtype code

backends/apple/metal/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Metal Backend
2+
3+
⚠️ **EXPERIMENTAL BACKEND**
4+
5+
This backend is currently in experimental development and may not be fully functional or stable. Use with caution.

0 commit comments

Comments
 (0)