Skip to content

Commit 5ffb50a

Browse files
committed
Update base for Update on "[slimtensor] Add SlimTensor class with basic properties and CPU copy operation"
**Key components:** 1. **`c10/core/Contiguity.h`** - Contiguity checking utility: - `_compute_contiguous<T>()` - computes whether a tensor with given sizes/strides is contiguous in memory (row-major order) 2. **`core/SlimTensor.h`** - Main SlimTensor class with: - **Constructors**: Default (undefined tensor) and full constructor with storage, sizes, strides, dtype, and storage_offset - **Property accessors**: - `sizes()`, `size(dim)` - get tensor dimensions with negative indexing support - `strides()`, `stride(dim)` - get tensor strides with negative indexing support - `dtype()`, `device()`, `device_type()`, `device_index()` - `numel()`, `dim()`, `nbytes()`, `itemsize()` - `data_ptr()` - returns pointer to tensor data (adjusted for storage_offset) - `storage_offset()`, `storage()` - **State queries**: `defined()`, `is_cpu()`, `is_contiguous()`, `is_empty()` - **Copy operation**: `copy_(other)` - copies data from another tensor - Fast path: uses memcpy for both-contiguous tensors - Slow path: element-wise copy respecting strides for non-contiguous tensors - **Setters**: `reset()`, `set_storage()`, `set_sizes_and_strides()` **Curretnt constraints:** - Only CPU device supported - Only Float32 dtype tested - copy_() only supports CPU-to-CPU copy Those contraints will be further improved in the following diffs Differential Revision: [D89750150](https://our.internmc.facebook.com/intern/diff/D89750150/) [ghstack-poisoned]
2 parents 1a61a03 + fd6fa87 commit 5ffb50a

File tree

71 files changed

+2752
-601
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+2752
-601
lines changed

.github/workflows/add-unanswered-to-project.yml

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,20 @@ name: Add Open External Contributor PRs and Issues to PyTorch Org Project 136
22

33
on:
44
workflow_dispatch:
5-
pull_request:
6-
paths:
7-
.github/workflows/add-unanswered-to-project.yml
5+
schedule:
6+
# GitHub Actions cron uses UTC. These run at:
7+
# - 14:00 UTC -> 08:00 CST (UTC-6)
8+
# - 19:00 UTC -> 13:00 CST (UTC-6)
9+
- cron: "0 14 * * *"
10+
- cron: "0 19 * * *"
11+
pull_request:
12+
paths:
13+
- .github/workflows/add-unanswered-to-project.yml
814
jobs:
915
add_to_project:
1016
runs-on: ubuntu-latest
1117
steps:
12-
- name: Add open issues and open, non-draft PRs to org project (excluding certain authors)
18+
- name: Add open issues and open, non-draft PRs to org project (excluding certain authors and bots)
1319
uses: actions/github-script@v7
1420
with:
1521
github-token: ${{ secrets.ET_EXT_CONTRIB }}
@@ -41,13 +47,26 @@ jobs:
4147
"app/dependabot", "Erik-Lundell", "zingo", "AdrianLundell", "oscarandersson8218", "per", "Sebastian-Larsson", "SaoirseARM",
4248
"robell", "mansnils", "martinlsm", "freddan80", "YufengShi-dudu", "tom-arm", "perheld", "Jerry-Ge", "gggekov", "fumchin",
4349
"wwwind", "benkli01", "Tessil", "maddun01", "Michiel-Olieslagers", "armwaheed", "agrima1304", "emmakujala", "annietllnd",
44-
"MatthiasHertel80", "AlexTawseArm", "jmahbs", "morgolock", "Christoffer-JL", "ArmRyan", "xingguo01", "haowhsu-quic",
45-
"shewu-quic", "winskuo-quic", "chunit-quic", "DannyYuyang-quic", "chuntl", "thchenqti", "jethroqti", "chenweng-quic",
46-
"cymbalrush", "DenisVieriu97", "billmguo", "StrycekSimon", "jirioc", "robert-kalmar", "skywall", "MartinPavella",
47-
"roman-janik-nxp", "novak-vaclav ", "neuropilot-captain", "dijopaul", "cad-rlc", "cad-audio", "ynimmaga", "daniil-lyakhov",
48-
"emmanuel-ferdman", "cavusmustafa", "anzr299", "Jiseong-oh", "alexdean08"
50+
"MatthiasHertel80", "AlexTawseArm", "jmahbs", "morgolock", "Christoffer-JL", "ArmRyan", "xingguo01", "tgonzalezorlandoarm",
51+
"haowhsu-quic", "shewu-quic", "winskuo-quic", "chunit-quic", "DannyYuyang-quic", "chuntl", "thchenqti", "jethroqti",
52+
"chenweng-quic", "cymbalrush", "DenisVieriu97", "billmguo", "StrycekSimon", "jirioc", "robert-kalmar", "skywall",
53+
"MartinPavella", "roman-janik-nxp", "novak-vaclav", "neuropilot-captain", "dijopaul", "cad-rlc", "cad-audio", "ynimmaga",
54+
"daniil-lyakhov", "emmanuel-ferdman", "cavusmustafa", "anzr299", "Jiseong-oh", "alexdean08",
55+
// explicitly include the dependabot bot login seen in PRs
56+
"dependabot[bot]"
4957
]);
5058
59+
function isBotOrExcluded(user) {
60+
if (!user) return false;
61+
// GitHub sometimes marks bots with user.type === "Bot"
62+
if (user.type && user.type.toLowerCase() === "bot") return true;
63+
// Some bots use logins that end with [bot], e.g. dependabot[bot]
64+
if (user.login && user.login.endsWith("[bot]")) return true;
65+
// Explicit excluded list
66+
if (excludedAuthors.has(user.login)) return true;
67+
return false;
68+
}
69+
5170
async function addItem(contentId, type, number) {
5271
try {
5372
await github.graphql(`
@@ -69,7 +88,7 @@ jobs:
6988
}
7089
7190
try {
72-
// Add open issues (not PRs) and exclude by author
91+
// Add open issues (not PRs) and exclude by author/bots
7392
const issues = await github.paginate(
7493
github.rest.issues.listForRepo,
7594
{
@@ -80,12 +99,14 @@ jobs:
8099
}
81100
);
82101
for (const issue of issues) {
83-
if (!issue.pull_request && !excludedAuthors.has(issue.user.login)) {
102+
if (!issue.pull_request && !isBotOrExcluded(issue.user)) {
84103
await addItem(issue.node_id, 'issue', issue.number);
104+
} else {
105+
console.log(`Skipping issue #${issue.number} by ${issue.user && issue.user.login}`);
85106
}
86107
}
87108
88-
// Add open, non-draft PRs (regardless of review state), exclude by author
109+
// Add open, non-draft PRs (regardless of review state), exclude by author/bots
89110
const prs = await github.paginate(
90111
github.rest.pulls.list,
91112
{
@@ -95,8 +116,10 @@ jobs:
95116
}
96117
);
97118
for (const pr of prs) {
98-
if (!pr.draft && !excludedAuthors.has(pr.user.login)) {
119+
if (!pr.draft && !isBotOrExcluded(pr.user)) {
99120
await addItem(pr.node_id, 'pr', pr.number);
121+
} else {
122+
console.log(`Skipping PR #${pr.number} by ${pr.user && pr.user.login}`);
100123
}
101124
}
102125
} catch (error) {

.github/workflows/cuda.yml

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ jobs:
8787
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
8888
PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda
8989
90-
test-cuda-shims:
91-
name: test-cuda-shims
90+
unittest-cuda:
91+
name: unittest-cuda
9292
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
9393
permissions:
9494
id-token: write
@@ -103,17 +103,20 @@ jobs:
103103
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
104104
script: |
105105
set -eux
106-
# Install requirements
107-
bash ./install_requirements.sh
106+
# Install executorch in editable mode so custom op libs land in-tree
107+
bash ./install_executorch.sh
108108
109109
# Build ExecuTorch with CUDA support
110110
cmake --workflow --preset llm-release-cuda
111111
112-
# Build and run CUDA shim tests
112+
# Build and run CUDA shim tests (C++)
113113
pushd backends/cuda/runtime/shims/tests
114114
cmake --workflow --preset default
115115
popd
116116
117+
# Run CUDA backend Python tests, overrides addopts so that we don't run all tests in pytest.ini
118+
python -m pytest backends/cuda/tests backends/cuda/passes/tests -v -o "addopts="
119+
117120
export-model-cuda-artifact:
118121
name: export-model-cuda-artifact
119122
# Skip this job if the pull request is from a fork (HuggingFace secrets are not available)

backends/aoti/aoti_backend.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,10 @@ def preprocess(
156156
# Apply custom backend-specific passes
157157
custom_passes = cls.get_custom_passes(compile_specs)
158158
for custom_pass in custom_passes:
159-
custom_pass(device_edge_program.graph_module)
159+
if getattr(custom_pass, "requires_exported_program", False):
160+
custom_pass(device_edge_program)
161+
else:
162+
custom_pass(device_edge_program.graph_module)
160163

161164
# Run decompositions if any
162165
if decomposition_table:

backends/aoti/slim/core/test/targets.bzl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ def define_common_targets():
44
"""Define test targets for SlimTensor core module."""
55

66
runtime.cxx_test(
7-
name = "test_storage_cpu",
7+
name = "test_storage",
88
srcs = [
9-
"test_storage_cpu.cpp",
9+
"test_storage.cpp",
1010
],
1111
deps = [
1212
"//executorch/backends/aoti/slim/core:storage",
File renamed without changes.

backends/arm/_passes/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@
113113
from .replace_scalar_with_tensor_pass import ( # noqa
114114
ReplaceScalarWithTensorByProfilePass,
115115
)
116+
from .rewrite_bool_bitwise_not_to_logical_not_pass import ( # noqa
117+
RewriteBoolBitwiseNotToLogicalNotPass,
118+
)
116119
from .rewrite_bool_to_fp32_cast_via_int8_pass import ( # noqa
117120
RewriteBoolToFp32CastViaInt8Pass,
118121
)

backends/arm/_passes/arm_pass_manager.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@
103103
RemoveNoopPass,
104104
ReplaceInfAndLimitValuesPass,
105105
ReplaceScalarWithTensorByProfilePass,
106+
RewriteBoolBitwiseNotToLogicalNotPass,
106107
RewriteBoolToFp32CastViaInt8Pass,
107108
RewriteConvPass,
108109
RewriteMatmulPass,
@@ -222,6 +223,7 @@ def _tosa_pipeline(
222223
self.add_passes(
223224
[
224225
FuseQuantizedActivationPass(),
226+
RewriteBoolBitwiseNotToLogicalNotPass(),
225227
RewriteBoolToFp32CastViaInt8Pass(),
226228
ConvertToClampPass(),
227229
DecomposeTOSAUnsupportedClampPass(),
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
7+
from typing import Set, Type
8+
9+
import torch
10+
from executorch.backends.arm._passes import ArmPass
11+
from executorch.exir.dialects._ops import ops as exir_ops
12+
from executorch.exir.pass_base import ExportPass
13+
14+
15+
class RewriteBoolBitwiseNotToLogicalNotPass(ArmPass):
16+
"""
17+
Rewrites ``aten.bitwise_not`` on boolean tensors to ``aten.logical_not``.
18+
19+
TOSA ``bitwise_not`` does not support boolean inputs. On boolean tensors,
20+
``bitwise_not`` is equivalent to ``logical_not``, so this rewrite preserves
21+
semantics while enabling lowering.
22+
"""
23+
24+
_passes_required_after: Set[Type[ExportPass]] = set()
25+
26+
_TARGET_OPS = {
27+
exir_ops.edge.aten.bitwise_not.default,
28+
}
29+
30+
def call_operator(self, op, args, kwargs, meta):
31+
if op not in self._TARGET_OPS:
32+
return super().call_operator(op, args, kwargs, meta)
33+
34+
if meta["val"].dtype == torch.bool:
35+
x = args[0]
36+
return super().call_operator(
37+
exir_ops.edge.aten.logical_not.default,
38+
(x,),
39+
{},
40+
meta,
41+
)
42+
43+
return super().call_operator(op, args, kwargs, meta)

backends/arm/ethosu/compile_spec.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ def __init__(
7777
if "u55" in target_lower:
7878
# Add the Ethos-U55 extension marker
7979
base_tosa_version += "+u55"
80+
if "u85" in self.target:
81+
base_tosa_version += "+cf"
8082
tosa_spec = TosaSpecification.create_from_string(base_tosa_version)
8183

8284
self._set_compile_specs(tosa_spec, compiler_flags)

backends/arm/requirements-arm-ethos-u.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@
33
# This source code is licensed under the BSD-style license found in the
44
# LICENSE file in the root directory of this source tree.
55

6-
ethos-u-vela == 4.4.1
6+
ethos-u-vela == 4.5.0
77
pte-adapter-model-explorer == 0.0.2

0 commit comments

Comments
 (0)