Skip to content

Commit 6b95ce7

Browse files
committed
Update
[ghstack-poisoned]
2 parents 35c56cc + 0856f59 commit 6b95ce7

File tree

10 files changed

+160
-36
lines changed

10 files changed

+160
-36
lines changed

.ci/scripts/unittest-buck2.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@ set -eux
1111
# TODO: can't query //kernels/prim_ops because of non-buckified stuff in OSS.
1212
buck2 query "//backends/apple/... + //backends/example/... + \
1313
//backends/mediatek/... + //backends/transforms/... + \
14-
//backends/xnnpack/... + //configurations/... + //kernels/aten/... + \
15-
//kernels/optimized/... + //kernels/portable/... + //kernels/quantized/... + \
16-
//kernels/test/... + //runtime/... + //schema/... + //test/... + //util/..."
14+
//backends/xnnpack/... + //configurations/... + //extension/flat_tensor: + \
15+
//kernels/aten/... + //kernels/optimized/... + //kernels/portable/... + \
16+
//kernels/quantized/... + //kernels/test/... + //runtime/... + //schema/... \
17+
+ //test/... + //util/..."
1718

1819
# TODO: optimized ops are unbuildable because they now use ATen; put
1920
# them back after we can use PyTorch in OSS buck.

.github/workflows/add-unanswered-to-project.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
name: Add Open External Contributor PRs and Issues to PyTorch Org Project 136
22

33
on:
4-
# schedule:
5-
# - cron: '0 * * * *'
64
workflow_dispatch:
7-
5+
pull_request:
6+
paths:
7+
.github/workflows/add-unanswered-to-project.yml
88
jobs:
99
add_to_project:
1010
runs-on: ubuntu-latest

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ dist/
2020
ethos-u-scratch/
2121
executorch.egg-info
2222
pip-out/
23+
build-profiling/
2324

2425
# Any exported models and profiling outputs
2526
*.bin
@@ -60,6 +61,7 @@ xcuserdata/
6061
/share/
6162
/version.py
6263
*.csv
64+
*_etdump
6365

6466
# Android
6567
*.aar

CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -817,9 +817,9 @@ if(EXECUTORCH_BUILD_PYBIND)
817817
list(APPEND _dep_libs openvino_backend)
818818
endif()
819819

820-
if(EXECUTORCH_BUILD_QNN)
821-
list(APPEND _dep_libs qnn_executorch_backend)
822-
endif()
820+
if(EXECUTORCH_BUILD_QNN)
821+
list(APPEND _dep_libs qnn_executorch_backend)
822+
endif()
823823

824824
if(EXECUTORCH_BUILD_XNNPACK)
825825
# need to explicitly specify XNNPACK and xnnpack-microkernels-prod here

backends/cadence/hifi/operators/op_softmax.cpp

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ Tensor& _softmax_out(
7272
if (optimized) {
7373
int* p_inp = (int*)in.const_data_ptr<float>();
7474
int* out_data = (int*)out.mutable_data_ptr<float>();
75-
7675
int num_inp_dims = in.dim();
7776
int num_out_dims = num_inp_dims;
7877

@@ -99,6 +98,37 @@ Tensor& _softmax_out(
9998

10099
outer_stride = size;
101100

101+
WORD32 ret_val = 0;
102+
103+
// Check if the input is permuted. If not, then we don't need to transpose
104+
bool is_permuted = false;
105+
for (int i = 0; i < num_inp_dims; i++) {
106+
if (p_permute_vec[i] != i) {
107+
is_permuted = true;
108+
break;
109+
}
110+
}
111+
112+
if (!is_permuted) {
113+
const float* p_inpf = in.const_data_ptr<float>();
114+
float* out_dataf = out.mutable_data_ptr<float>();
115+
116+
for (size_t outer_idx = 0; outer_idx < outer_size; ++outer_idx) {
117+
size_t outer = outer_idx * outer_stride;
118+
for (size_t inner_idx = 0; inner_idx < stride; ++inner_idx) {
119+
size_t base = outer + inner_idx;
120+
121+
float* p_in_data = (float*)&p_inpf[base];
122+
float* p_out_data = (float*)&out_dataf[base];
123+
124+
ret_val = xa_nn_vec_softmax_f32_f32(p_out_data, p_in_data, size);
125+
126+
ET_KERNEL_CHECK(ctx, ret_val == 0, Internal, out);
127+
}
128+
}
129+
return out;
130+
}
131+
102132
int* p_out =
103133
(int*)kernels::allocate_temp_memory(ctx, out.numel() * sizeof(int));
104134

@@ -109,7 +139,7 @@ Tensor& _softmax_out(
109139

110140
ET_KERNEL_CHECK(ctx, p_out1 != nullptr, MemoryAllocationFailed, out);
111141

112-
WORD32 ret_val = xa_nn_transpose_32_32(
142+
ret_val = xa_nn_transpose_32_32(
113143
p_out,
114144
p_out_shape,
115145
p_inp,
@@ -142,9 +172,7 @@ Tensor& _softmax_out(
142172
p_permute_vec,
143173
num_out_dims,
144174
num_inp_dims);
145-
146175
ET_KERNEL_CHECK(ctx, ret_val == 0, Internal, out);
147-
148176
return out;
149177
}
150178

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# Copyright 2024-25 Arm Limited and/or its affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
import argparse
9+
10+
from executorch.devtools import Inspector
11+
12+
13+
def generate_csv(etdump_path, output):
14+
"""
15+
Generate a CSV file from ETDump profiling data.
16+
17+
Args:
18+
etdump_path (str): Path to the ETDump file generated by executor_runner
19+
output (str): Path for the output CSV file
20+
"""
21+
inspector = Inspector(etdump_path)
22+
df = inspector.to_dataframe()
23+
df.to_csv(output)
24+
25+
26+
def main():
27+
"""
28+
Main function to parse command line arguments and generate profiling CSV.
29+
30+
Usage:
31+
python generate_profiling_csv.py --etdump_path="my_etdump" --output="profiling.csv"
32+
33+
Example:
34+
python generate_profiling_csv.py --etdump_path="llama3_etdump" --output="op_profiling.csv"
35+
"""
36+
parser = argparse.ArgumentParser(
37+
description="Generate profiling CSV from a model's etdump"
38+
)
39+
parser.add_argument(
40+
"--etdump_path",
41+
type=str,
42+
default="./model.etdump",
43+
help="Path to the etdump file",
44+
required=False,
45+
)
46+
47+
parser.add_argument(
48+
"--output",
49+
type=str,
50+
default="./model_profiling.csv",
51+
help="Path to the output CSV file",
52+
required=False,
53+
)
54+
55+
args = parser.parse_args()
56+
print(f"Generating CSV from {args.etdump_path}")
57+
generate_csv(args.etdump_path, args.output)
58+
print(f"Saved CSV to {args.output}")
59+
60+
61+
if __name__ == "__main__":
62+
main()

devtools/scripts/profile_model.sh

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# Copyright 2024-25 Arm Limited and/or its affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
#!/bin/bash
9+
10+
# ExecutorTorch Model Profiling Script
11+
#
12+
# This script automates the process of building executor_runner with profiling enabled,
13+
# running model inference with ETDump collection, and generating CSV profiling reports.
14+
#
15+
# Usage:
16+
# ./devtools/scripts/profile_model.sh [model_path] [etdump_path]
17+
#
18+
# Arguments:
19+
# model_path - Path to the .pte model file (default: "my_model")
20+
# etdump_path - Path for ETDump output file (default: "path_to_et_dump")
21+
#
22+
# Examples:
23+
# ./devtools/scripts/profile_model.sh
24+
# ./devtools/scripts/profile_model.sh llama3.pte llama3_etdump
25+
#
26+
# Note: This script must be run from the top-level executorch directory.
27+
28+
set -e
29+
30+
echo "Building executor_runner with profiling enabled..."
31+
32+
cmake --preset profiling -B build-profiling -DCMAKE_BUILD_TYPE=Release
33+
cmake --build build-profiling --target executor_runner
34+
35+
echo "Build completed successfully!"
36+
37+
MODEL_PATH=${1:-"my_model"}
38+
ETDUMP_PATH=${2:-"path_to_et_dump"}
39+
40+
echo "Running and profiling model: $MODEL_PATH"
41+
echo "ETDump output path: $ETDUMP_PATH"
42+
43+
./build-profiling/executor_runner --model_path="$MODEL_PATH" --etdump_path="$ETDUMP_PATH"
44+
45+
echo "Profiling run completed!"
46+
47+
echo "Generating profiling CSV..."
48+
python devtools/scripts/generate_profiling_csv.py --etdump_path="$ETDUMP_PATH" --output="op_profiling.csv"
49+
50+
echo "Profiling CSV generated: op_profiling.csv"
51+
echo "Profiling workflow completed successfully!"

extension/flat_tensor/targets.bzl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
1+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime")
22

33
def define_common_targets():
4-
for aten_mode in [True, False]:
4+
for aten_mode in get_aten_mode_options():
55
aten_suffix = "_aten" if aten_mode else ""
66
runtime.cxx_library(
77
name = "flat_tensor_data_map" + aten_suffix,

shim_et/xplat/executorch/build/build_variables.bzl

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -290,11 +290,6 @@ QUANTIZED_KERNELS_SRCS = [
290290
"kernels/quantized/cpu/op_quantize.cpp",
291291
]
292292

293-
PROGRAM_SCHEMA_SRCS = [
294-
"schema/program.fbs",
295-
"schema/scalar_type.fbs",
296-
]
297-
298293
OPTIMIZED_CPUBLAS_SRCS = [
299294
"kernels/optimized/blas/BlasKernel.cpp",
300295
"kernels/optimized/blas/CPUBlas.cpp",
@@ -375,27 +370,14 @@ THREADPOOL_SRCS = [
375370
EXTENSION_THREADPOOL_SRCS = ["extension/threadpool/" + x for x in THREADPOOL_SRCS]
376371

377372
EXTENSION_TRAINING_SRCS = [
378-
"extension/data_loader/file_data_loader.cpp",
379-
"extension/data_loader/mmap_data_loader.cpp",
380-
"extension/flat_tensor/flat_tensor_data_map.cpp",
381-
"extension/flat_tensor/serialize/flat_tensor_header.cpp",
382-
"extension/module/module.cpp",
383373
"extension/training/module/training_module.cpp",
384374
"extension/training/optimizer/sgd.cpp",
385375
]
386376

387377
TRAIN_XOR_SRCS = [
388-
"extension/data_loader/file_data_loader.cpp",
389-
"extension/data_loader/mmap_data_loader.cpp",
390-
"extension/flat_tensor/flat_tensor_data_map.cpp",
391-
"extension/flat_tensor/serialize/flat_tensor_header.cpp",
378+
# REVIEW: removing this breaks the build; where is it supposed to come from?
392379
"extension/flat_tensor/serialize/serialize.cpp",
393-
"extension/module/module.cpp",
394-
"extension/tensor/tensor_ptr.cpp",
395-
"extension/tensor/tensor_ptr_maker.cpp",
396380
"extension/training/examples/XOR/train.cpp",
397-
"extension/training/module/training_module.cpp",
398-
"extension/training/optimizer/sgd.cpp",
399381
]
400382

401383
EXECUTOR_RUNNER_SRCS = [

tools/cmake/Codegen.cmake

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,6 @@ function(executorch_load_build_variables)
386386
KERNELS_UTIL_ALL_DEPS_SRCS
387387
OPTIMIZED_KERNELS_SRCS
388388
QUANTIZED_KERNELS_SRCS
389-
PROGRAM_SCHEMA_SRCS
390389
OPTIMIZED_CPUBLAS_SRCS
391390
OPTIMIZED_NATIVE_CPU_OPS_SRCS
392391
TEST_BACKEND_COMPILER_LIB_SRCS
@@ -419,7 +418,6 @@ function(executorch_load_build_variables)
419418
_kernels_util_all_deps__srcs
420419
_optimized_kernels__srcs
421420
_quantized_kernels__srcs
422-
_program_schema__srcs
423421
_optimized_cpublas__srcs
424422
_optimized_native_cpu_ops__srcs
425423
_test_backend_compiler_lib__srcs

0 commit comments

Comments
 (0)