Skip to content

Commit c455f1b

Browse files
authored
Add 1MB temp allocator for executor
Differential Revision: D80578578 Pull Request resolved: #13533
1 parent aefdc8d commit c455f1b

File tree

2 files changed

+16
-5
lines changed

2 files changed

+16
-5
lines changed

backends/cadence/fusion_g3/operators/op_native_layer_norm.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,12 +221,15 @@ std::tuple<Tensor&, Tensor&, Tensor&> native_layer_norm_out(
221221
num_elm *= normalized_shape[i];
222222
}
223223

224+
constexpr size_t kAlignment =
225+
16; // 16-byte alignment for vectorized operations
226+
224227
float* weight_data;
225228
if (weight.has_value()) {
226229
weight_data = weight.value().mutable_data_ptr<float>();
227230
} else {
228231
executorch::runtime::Result<void*> temp_mem_weight =
229-
ctx.allocate_temp(num_elm * sizeof(float));
232+
ctx.allocate_temp(num_elm * sizeof(float), kAlignment);
230233
weight_data = (float*)(temp_mem_weight.get());
231234

232235
for (int i = 0; i < num_elm; i++) {
@@ -238,7 +241,7 @@ std::tuple<Tensor&, Tensor&, Tensor&> native_layer_norm_out(
238241
bias_data = bias.value().mutable_data_ptr<float>();
239242
} else {
240243
executorch::runtime::Result<void*> temp_mem_bias =
241-
ctx.allocate_temp(num_elm * sizeof(float));
244+
ctx.allocate_temp(num_elm * sizeof(float), kAlignment);
242245
bias_data = (float*)(temp_mem_bias.get());
243246

244247
for (int i = 0; i < num_elm; i++) {

backends/cadence/hifi/kernels/kernels.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,19 @@ memcpy(void* dst, const void* src, size_t num_bytes) {
2121
}
2222

2323
void* allocate_temp_memory(KernelRuntimeContext& ctx, size_t size) {
24-
ET_LOG(Info, "Attempting to allocate %zu bytes of temp memory", size);
25-
Result<void*> temp_mem_res = ctx.allocate_temp(size);
24+
constexpr size_t kAlignment =
25+
16; // 16-byte alignment for vectorized operations
26+
ET_LOG(
27+
Info,
28+
"Attempting to allocate %zu bytes of temp memory (16-byte aligned)",
29+
size);
30+
Result<void*> temp_mem_res = ctx.allocate_temp(size, kAlignment);
2631
if (temp_mem_res.ok()) {
2732
void* ptr = temp_mem_res.get();
28-
ET_LOG(Info, "Successfully allocated temp memory at %p", ptr);
33+
ET_LOG(
34+
Info,
35+
"Successfully allocated temp memory at %p (16-byte aligned)",
36+
ptr);
2937
return ptr;
3038
} else {
3139
ET_LOG(

0 commit comments

Comments
 (0)