Skip to content

Commit efc641d

Browse files
authored
Merge branch 'pytorch:main' into main
2 parents f776870 + 4bf3869 commit efc641d

File tree

161 files changed

+8861
-2073
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

161 files changed

+8861
-2073
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
7a064ed3eafa43f17412d434b395240c727b3000
1+
7a79b41e29a790ebb4b530eb98a89381e2d7de29

.ci/scripts/setup-samsung-linux-deps.sh

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@ install_devicefarm_cli() {
101101
}
102102

103103
reserve_if_needed() {
104+
# Set default value
105+
export DEVICE_RESERVED=0
106+
104107
if ! command -v devicefarm-cli >/dev/null 2>&1; then
105108
echo "[WARN] devicefarm-cli is not installed." >&2
106109
return 1
@@ -144,7 +147,11 @@ reserve_if_needed() {
144147

145148
if (( any_below_threshold )); then
146149
echo "[INFO] Reserving now."
147-
devicefarm-cli -R
150+
if ! devicefarm-cli -R; then
151+
echo "::warning::Failed to reserve a device. No devices are currently available." >&2
152+
echo "[WARN] Device reservation failed - continuing without device." >&2
153+
return 0
154+
fi
148155
else
149156
echo "[INFO] Don't need to be reserved."
150157
fi
@@ -174,7 +181,10 @@ reserve_if_needed() {
174181
if [[ -n "$reservation_id" ]]; then
175182
devicefarm-cli -C "$reservation_id"
176183
devicefarm-cli -E "ls /"
184+
export DEVICE_RESERVED=1
185+
echo "[INFO] Device successfully reserved and connected."
177186
else
187+
echo "::warning::No available devices found." >&2
178188
echo "[WARN] There is no available devices."
179189
fi
180190
}

.github/workflows/add-unanswered-to-project.yml

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,20 @@ name: Add Open External Contributor PRs and Issues to PyTorch Org Project 136
22

33
on:
44
workflow_dispatch:
5-
pull_request:
6-
paths:
7-
.github/workflows/add-unanswered-to-project.yml
5+
schedule:
6+
# GitHub Actions cron uses UTC. These run at:
7+
# - 14:00 UTC -> 08:00 CST (UTC-6)
8+
# - 19:00 UTC -> 13:00 CST (UTC-6)
9+
- cron: "0 14 * * *"
10+
- cron: "0 19 * * *"
11+
pull_request:
12+
paths:
13+
- .github/workflows/add-unanswered-to-project.yml
814
jobs:
915
add_to_project:
1016
runs-on: ubuntu-latest
1117
steps:
12-
- name: Add open issues and open, non-draft PRs to org project (excluding certain authors)
18+
- name: Add open issues and open, non-draft PRs to org project (excluding certain authors and bots)
1319
uses: actions/github-script@v7
1420
with:
1521
github-token: ${{ secrets.ET_EXT_CONTRIB }}
@@ -41,13 +47,26 @@ jobs:
4147
"app/dependabot", "Erik-Lundell", "zingo", "AdrianLundell", "oscarandersson8218", "per", "Sebastian-Larsson", "SaoirseARM",
4248
"robell", "mansnils", "martinlsm", "freddan80", "YufengShi-dudu", "tom-arm", "perheld", "Jerry-Ge", "gggekov", "fumchin",
4349
"wwwind", "benkli01", "Tessil", "maddun01", "Michiel-Olieslagers", "armwaheed", "agrima1304", "emmakujala", "annietllnd",
44-
"MatthiasHertel80", "AlexTawseArm", "jmahbs", "morgolock", "Christoffer-JL", "ArmRyan", "xingguo01", "haowhsu-quic",
45-
"shewu-quic", "winskuo-quic", "chunit-quic", "DannyYuyang-quic", "chuntl", "thchenqti", "jethroqti", "chenweng-quic",
46-
"cymbalrush", "DenisVieriu97", "billmguo", "StrycekSimon", "jirioc", "robert-kalmar", "skywall", "MartinPavella",
47-
"roman-janik-nxp", "novak-vaclav ", "neuropilot-captain", "dijopaul", "cad-rlc", "cad-audio", "ynimmaga", "daniil-lyakhov",
48-
"emmanuel-ferdman", "cavusmustafa", "anzr299", "Jiseong-oh", "alexdean08"
50+
"MatthiasHertel80", "AlexTawseArm", "jmahbs", "morgolock", "Christoffer-JL", "ArmRyan", "xingguo01", "tgonzalezorlandoarm",
51+
"haowhsu-quic", "shewu-quic", "winskuo-quic", "chunit-quic", "DannyYuyang-quic", "chuntl", "thchenqti", "jethroqti",
52+
"chenweng-quic", "cymbalrush", "DenisVieriu97", "billmguo", "StrycekSimon", "jirioc", "robert-kalmar", "skywall",
53+
"MartinPavella", "roman-janik-nxp", "novak-vaclav", "neuropilot-captain", "dijopaul", "cad-rlc", "cad-audio", "ynimmaga",
54+
"daniil-lyakhov", "emmanuel-ferdman", "cavusmustafa", "anzr299", "Jiseong-oh", "alexdean08",
55+
// explicitly include the dependabot bot login seen in PRs
56+
"dependabot[bot]"
4957
]);
5058
59+
function isBotOrExcluded(user) {
60+
if (!user) return false;
61+
// GitHub sometimes marks bots with user.type === "Bot"
62+
if (user.type && user.type.toLowerCase() === "bot") return true;
63+
// Some bots use logins that end with [bot], e.g. dependabot[bot]
64+
if (user.login && user.login.endsWith("[bot]")) return true;
65+
// Explicit excluded list
66+
if (excludedAuthors.has(user.login)) return true;
67+
return false;
68+
}
69+
5170
async function addItem(contentId, type, number) {
5271
try {
5372
await github.graphql(`
@@ -69,7 +88,7 @@ jobs:
6988
}
7089
7190
try {
72-
// Add open issues (not PRs) and exclude by author
91+
// Add open issues (not PRs) and exclude by author/bots
7392
const issues = await github.paginate(
7493
github.rest.issues.listForRepo,
7594
{
@@ -80,12 +99,14 @@ jobs:
8099
}
81100
);
82101
for (const issue of issues) {
83-
if (!issue.pull_request && !excludedAuthors.has(issue.user.login)) {
102+
if (!issue.pull_request && !isBotOrExcluded(issue.user)) {
84103
await addItem(issue.node_id, 'issue', issue.number);
104+
} else {
105+
console.log(`Skipping issue #${issue.number} by ${issue.user && issue.user.login}`);
85106
}
86107
}
87108
88-
// Add open, non-draft PRs (regardless of review state), exclude by author
109+
// Add open, non-draft PRs (regardless of review state), exclude by author/bots
89110
const prs = await github.paginate(
90111
github.rest.pulls.list,
91112
{
@@ -95,8 +116,10 @@ jobs:
95116
}
96117
);
97118
for (const pr of prs) {
98-
if (!pr.draft && !excludedAuthors.has(pr.user.login)) {
119+
if (!pr.draft && !isBotOrExcluded(pr.user)) {
99120
await addItem(pr.node_id, 'pr', pr.number);
121+
} else {
122+
console.log(`Skipping PR #${pr.number} by ${pr.user && pr.user.login}`);
100123
}
101124
}
102125
} catch (error) {

.github/workflows/cuda.yml

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ jobs:
8787
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
8888
PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda
8989
90-
test-cuda-shims:
91-
name: test-cuda-shims
90+
unittest-cuda:
91+
name: unittest-cuda
9292
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
9393
permissions:
9494
id-token: write
@@ -103,17 +103,20 @@ jobs:
103103
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
104104
script: |
105105
set -eux
106-
# Install requirements
107-
bash ./install_requirements.sh
106+
# Install executorch in editable mode so custom op libs land in-tree
107+
bash ./install_executorch.sh
108108
109109
# Build ExecuTorch with CUDA support
110110
cmake --workflow --preset llm-release-cuda
111111
112-
# Build and run CUDA shim tests
112+
# Build and run CUDA shim tests (C++)
113113
pushd backends/cuda/runtime/shims/tests
114114
cmake --workflow --preset default
115115
popd
116116
117+
# Run CUDA backend Python tests, overrides addopts so that we don't run all tests in pytest.ini
118+
python -m pytest backends/cuda/tests backends/cuda/passes/tests -v -o "addopts="
119+
117120
export-model-cuda-artifact:
118121
name: export-model-cuda-artifact
119122
# Skip this job if the pull request is from a fork (HuggingFace secrets are not available)

.github/workflows/pull.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -945,6 +945,12 @@ jobs:
945945
export SAMSUNG_AI_LITECORE_KEY=$SECRET_SAMSUNG_AI_LITECORE_KEY
946946
source .ci/scripts/setup-samsung-linux-deps.sh
947947
948+
# Check if device was reserved
949+
if [[ "${DEVICE_RESERVED:-0}" != "1" ]]; then
950+
echo "::warning::Skipping tests - no Samsung device available"
951+
exit 0
952+
fi
953+
948954
# Test quant models
949955
model_scripts="deeplab_v3 edsr inception_v3 inception_v4 mobilenet_v2 mobilenet_v3 resnet18 resnet50 vit wav2letter"
950956
for m_script in $model_scripts; do
@@ -981,6 +987,12 @@ jobs:
981987
export SAMSUNG_AI_LITECORE_KEY=$SECRET_SAMSUNG_AI_LITECORE_KEY
982988
source .ci/scripts/setup-samsung-linux-deps.sh
983989
990+
# Check if device was reserved
991+
if [[ "${DEVICE_RESERVED:-0}" != "1" ]]; then
992+
echo "::warning::Skipping tests - no Samsung device available"
993+
exit 0
994+
fi
995+
984996
# Test models
985997
python -m unittest discover -s backends/samsung/test/models -p "test_*.py"
986998

backends/aoti/aoti_backend.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,10 @@ def preprocess(
156156
# Apply custom backend-specific passes
157157
custom_passes = cls.get_custom_passes(compile_specs)
158158
for custom_pass in custom_passes:
159-
custom_pass(device_edge_program.graph_module)
159+
if getattr(custom_pass, "requires_exported_program", False):
160+
custom_pass(device_edge_program)
161+
else:
162+
custom_pass(device_edge_program.graph_module)
160163

161164
# Run decompositions if any
162165
if decomposition_table:

backends/arm/MODELS.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# The following file contains all models that have been confirmed to be functional and tested for the Arm backend:
2+
- Conformer
3+
- Deit Tiny
4+
- DeepLab v3 (DL3)
5+
- Inception v3 (IC3)
6+
- Llama
7+
- Long Short-Term Memory (LSTM)
8+
- MobileNet v2 (MV2)
9+
- MobileNet v3 (MV3)
10+
- Some popular torch.nn.functional models (NN functional)
11+
- Some popular torch.nn.modules models (NN modules)
12+
- Some popular torch ops (Torch Functions)
13+
- Neural Super Sampler (NSS)
14+
- ResNet 18
15+
- Wav2Letter (W2L)
16+
- Stable Diffusion:
17+
* CLIP Text Encoder (CLIP Text with Projection)
18+
* Stable Diffusion 3 Transformer (SD3 Transformer)
19+
* T5 Encoder
20+
* VAE Encoder/Decoder (VAE)

backends/arm/_passes/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@
113113
from .replace_scalar_with_tensor_pass import ( # noqa
114114
ReplaceScalarWithTensorByProfilePass,
115115
)
116+
from .rewrite_bool_bitwise_not_to_logical_not_pass import ( # noqa
117+
RewriteBoolBitwiseNotToLogicalNotPass,
118+
)
116119
from .rewrite_bool_to_fp32_cast_via_int8_pass import ( # noqa
117120
RewriteBoolToFp32CastViaInt8Pass,
118121
)

backends/arm/_passes/arm_pass.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from abc import abstractmethod
99
from typing import Any, List, Optional, Set, Type
1010

11+
from executorch.backends.arm.constants import DISALLOW_TFA_META_KEY
1112
from executorch.exir.pass_base import ExportPass, NodeMetadata
1213
from torch.fx import GraphModule
1314
from torch.fx.passes.infra.pass_base import PassResult
@@ -16,9 +17,23 @@
1617
class ArmPass(ExportPass):
1718
"""Base class for Arm passes"""
1819

19-
def __init__(self) -> None:
20-
super().__init__()
20+
def __init__(self, tfa_pass: bool = False, *args, **kwargs) -> None:
21+
super().__init__(*args, **kwargs)
2122
self.submodule_depth = 0
23+
self.is_tfa_pass = tfa_pass
24+
25+
def allowed_to_transform(self, meta: NodeMetadata | dict[str, Any]) -> bool:
26+
if not self.is_tfa_pass:
27+
return True
28+
29+
if isinstance(meta, NodeMetadata):
30+
meta_dict = meta.data
31+
else:
32+
meta_dict = meta
33+
34+
disallow_tfa = meta_dict.get(DISALLOW_TFA_META_KEY, False)
35+
36+
return not disallow_tfa
2237

2338
@property
2439
@abstractmethod

backends/arm/_passes/arm_pass_manager.py

Lines changed: 39 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@
103103
RemoveNoopPass,
104104
ReplaceInfAndLimitValuesPass,
105105
ReplaceScalarWithTensorByProfilePass,
106+
RewriteBoolBitwiseNotToLogicalNotPass,
106107
RewriteBoolToFp32CastViaInt8Pass,
107108
RewriteConvPass,
108109
RewriteMatmulPass,
@@ -222,6 +223,7 @@ def _tosa_pipeline(
222223
self.add_passes(
223224
[
224225
FuseQuantizedActivationPass(),
226+
RewriteBoolBitwiseNotToLogicalNotPass(),
225227
RewriteBoolToFp32CastViaInt8Pass(),
226228
ConvertToClampPass(),
227229
DecomposeTOSAUnsupportedClampPass(),
@@ -376,65 +378,65 @@ def transform_to_backend_pipeline(
376378

377379
def transform_for_annotation_pipeline(self, graph_module: GraphModule):
378380
# Preprocessing passes
379-
self.add_pass(RemoveGraphAssertsPass())
381+
self.add_pass(RemoveGraphAssertsPass(tfa_pass=True))
380382

381383
# Transformation passes (pre scalar -> tensor)
382384
self.add_passes(
383385
[
384-
DecomposeSelectScatterPass(),
385-
ConvertInt64ConstOpsToInt32Pass(),
386-
ConvertInt64OutputOpsToInt32Pass(),
387-
InsertInt32CastsAfterInt64PlaceholdersPass(),
388-
DecomposeEmbeddingPass(),
389-
DecomposeScaledDotProductAttentionPass(),
390-
DecomposeRoundPass(),
391-
DecomposeLogitPass(),
392-
PromoteBoolOperandsPass(),
393-
DecomposeSignPass(),
394-
DecomposeAddmmPass(),
395-
DecomposeRemainderPass(),
396-
DecomposeFloorDividePass(),
397-
DecomposeDivTensorModePass(),
386+
DecomposeSelectScatterPass(tfa_pass=True),
387+
ConvertInt64ConstOpsToInt32Pass(tfa_pass=True),
388+
ConvertInt64OutputOpsToInt32Pass(tfa_pass=True),
389+
InsertInt32CastsAfterInt64PlaceholdersPass(tfa_pass=True),
390+
DecomposeEmbeddingPass(tfa_pass=True),
391+
DecomposeScaledDotProductAttentionPass(tfa_pass=True),
392+
DecomposeRoundPass(tfa_pass=True),
393+
DecomposeLogitPass(tfa_pass=True),
394+
PromoteBoolOperandsPass(tfa_pass=True),
395+
DecomposeSignPass(tfa_pass=True),
396+
DecomposeAddmmPass(tfa_pass=True),
397+
DecomposeRemainderPass(tfa_pass=True),
398+
DecomposeFloorDividePass(tfa_pass=True),
399+
DecomposeDivTensorModePass(tfa_pass=True),
398400
]
399401
)
400402

401403
# Scalars -> tensors
402404
self.add_passes(
403405
[
404-
ReplaceScalarWithTensorByProfilePass(),
405-
ScalarsToAttributePass(),
406+
ReplaceScalarWithTensorByProfilePass(tfa_pass=True),
407+
ScalarsToAttributePass(tfa_pass=True),
406408
]
407409
)
408410

409411
# Transformation passes (post scalar removal)
410412
self.add_passes(
411413
[
412-
NormalizeWhileInitialArgsPass(use_exir_clone=False),
413-
DecomposeAddSubAlphaPass(),
414-
DecomposeGroupNormPass(),
415-
DecomposeLayerNormPass(),
416-
DecomposeVarPass(),
417-
DecomposeMeanDimPass(graph_module, self.tosa_spec),
418-
DecomposeNotEqualPass(),
419-
DecomposeCosineSimilarityPass(),
420-
DecomposeGluPass(),
421-
DecomposeDivPass(),
422-
DecomposeLeakyReLUPass(),
423-
DecomposeLinalgVectorNormPass(),
424-
DecomposeSqrtPass(),
425-
DecomposeSiluPass(),
426-
DecomposeAvgPool2dPass(),
427-
DecomposeSoftmaxUnstablePass(),
428-
DecomposeSoftmaxPass(),
429-
ConvertMinMaxPass(),
414+
NormalizeWhileInitialArgsPass(use_exir_clone=False, tfa_pass=True),
415+
DecomposeAddSubAlphaPass(tfa_pass=True),
416+
DecomposeGroupNormPass(tfa_pass=True),
417+
DecomposeLayerNormPass(tfa_pass=True),
418+
DecomposeVarPass(tfa_pass=True),
419+
DecomposeMeanDimPass(graph_module, self.tosa_spec, tfa_pass=True),
420+
DecomposeNotEqualPass(tfa_pass=True),
421+
DecomposeCosineSimilarityPass(tfa_pass=True),
422+
DecomposeGluPass(tfa_pass=True),
423+
DecomposeDivPass(tfa_pass=True),
424+
DecomposeLeakyReLUPass(tfa_pass=True),
425+
DecomposeLinalgVectorNormPass(tfa_pass=True),
426+
DecomposeSqrtPass(tfa_pass=True),
427+
DecomposeSiluPass(tfa_pass=True),
428+
DecomposeAvgPool2dPass(tfa_pass=True),
429+
DecomposeSoftmaxUnstablePass(tfa_pass=True),
430+
DecomposeSoftmaxPass(tfa_pass=True),
431+
ConvertMinMaxPass(tfa_pass=True),
430432
]
431433
)
432434

433435
# Postprocessing passes
434436
self.add_passes(
435437
[
436-
ReplaceInfAndLimitValuesPass(),
437-
DecomposeMaskedFillPass(),
438+
ReplaceInfAndLimitValuesPass(tfa_pass=True),
439+
DecomposeMaskedFillPass(tfa_pass=True),
438440
]
439441
)
440442

0 commit comments

Comments
 (0)