Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
60 commits
Select commit Hold shift + click to select a range
6420712
Update
manuelcandales Oct 10, 2025
d036c07
Update
manuelcandales Oct 10, 2025
1a22c5e
Update
manuelcandales Oct 10, 2025
d6f0bc9
Update
manuelcandales Oct 10, 2025
7e11615
Update
manuelcandales Oct 10, 2025
dfa435a
Update
manuelcandales Oct 10, 2025
648ee07
Update
manuelcandales Oct 10, 2025
3bea537
Update
manuelcandales Oct 10, 2025
ca5f1e5
Update
manuelcandales Oct 11, 2025
7e971b0
Update
manuelcandales Oct 11, 2025
f12117b
Update
manuelcandales Oct 11, 2025
5dfcd4f
Update
manuelcandales Oct 11, 2025
de83a9f
Update
manuelcandales Oct 11, 2025
c4c16aa
Update
manuelcandales Oct 13, 2025
ce0f085
Update
manuelcandales Oct 13, 2025
e391e17
Update
manuelcandales Oct 13, 2025
3572de8
Update
manuelcandales Oct 13, 2025
bcd7655
Update
manuelcandales Oct 13, 2025
71a079d
Update
manuelcandales Oct 13, 2025
2f092af
Update
manuelcandales Oct 13, 2025
89d3f14
Update
manuelcandales Oct 13, 2025
7590e37
Update
manuelcandales Oct 13, 2025
bea144f
Update
manuelcandales Oct 13, 2025
ade75f0
Update
manuelcandales Oct 13, 2025
094b8bb
Update
manuelcandales Oct 13, 2025
e9b3372
Update
manuelcandales Oct 13, 2025
81c4588
Update
manuelcandales Oct 13, 2025
8b1d309
Update
manuelcandales Oct 13, 2025
aec8796
Update
manuelcandales Oct 13, 2025
422e4ba
Update
manuelcandales Oct 13, 2025
d075361
Update
manuelcandales Oct 13, 2025
3229b92
Update
manuelcandales Oct 13, 2025
971a762
Update
manuelcandales Oct 15, 2025
3425f17
Update
manuelcandales Oct 15, 2025
c837491
Update
manuelcandales Oct 15, 2025
aea11e8
Update
manuelcandales Oct 15, 2025
7f178d3
Update
manuelcandales Oct 15, 2025
f46adc5
Update
manuelcandales Oct 15, 2025
16d863c
Update
manuelcandales Oct 15, 2025
c80142d
Update
manuelcandales Oct 15, 2025
c3e9d0a
Update
manuelcandales Oct 15, 2025
780d883
Update
manuelcandales Oct 15, 2025
b782bb5
Update
manuelcandales Oct 15, 2025
cf93ffd
Update
manuelcandales Oct 15, 2025
4eaa345
Update
manuelcandales Oct 15, 2025
61ead64
Update
manuelcandales Oct 15, 2025
750badf
Update
manuelcandales Oct 15, 2025
71f87b6
Update
manuelcandales Oct 15, 2025
930f6b9
Update
manuelcandales Oct 15, 2025
2667a0c
Update
manuelcandales Oct 15, 2025
6a6ba04
Update
manuelcandales Oct 15, 2025
95a7024
Update
manuelcandales Oct 15, 2025
f214162
Update
manuelcandales Oct 15, 2025
e8b9828
Update
manuelcandales Oct 15, 2025
7c1b9b2
Update
manuelcandales Oct 15, 2025
d37e7ef
Update
manuelcandales Oct 15, 2025
1506e5f
Update
manuelcandales Oct 15, 2025
6f6fd58
Update
manuelcandales Oct 15, 2025
4367977
Update
manuelcandales Oct 15, 2025
9d69769
Update
manuelcandales Oct 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions backends/apple/metal/runtime/shims/et_metal_ops.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <executorch/backends/apple/metal/runtime/shims/types.h>

namespace executorch {
namespace backends {
namespace metal {

#ifdef __cplusplus
extern "C" {
#endif

/**
* ExecutorTorch implementation of aoti_torch_mps_mm_out.
* Performs simple matrix multiplication: out = self @ mat2
*/
AOTITorchError aoti_torch_mps_mm_out(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does custom ops use caching mechanism like the ETMetalShaderLibrary?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, not yet. These fallback ops are implemented using MPSGraph, so, here we would be caching the graph. This is something I want to look into later when optimizing performance. But this deserves time. In particular, since I never understood why MPSGraph operations have a non-trivial CPU overhead in PyTorch, in spite of PyTorch having a caching mechanism for MPSGraphs.

AOTITensorHandle out,
AOTITensorHandle self,
AOTITensorHandle mat2);

/**
* ExecutorTorch implementation of aoti_torch_mps_convolution.
* Performs 2D convolution operation - matches PyTorch AOTI signature
*/
AOTITorchError aoti_torch_mps_convolution(
AOTITensorHandle input,
AOTITensorHandle weight,
AOTITensorHandle* bias,
const int64_t* stride,
int64_t stride_len_,
const int64_t* padding,
int64_t padding_len_,
const int64_t* dilation,
int64_t dilation_len_,
int32_t transposed,
const int64_t* output_padding,
int64_t output_padding_len_,
int64_t groups,
AOTITensorHandle* ret0);

/**
* ExecutorTorch implementation of
* aoti_torch_mps__scaled_dot_product_attention_math_for_mps. Performs scaled
* dot product attention calculation - matches PyTorch AOTI signature
*/
AOTITorchError aoti_torch_mps__scaled_dot_product_attention_math_for_mps(
AOTITensorHandle query,
AOTITensorHandle key,
AOTITensorHandle value,
AOTITensorHandle* attn_mask,
double dropout_p,
int32_t is_causal,
AOTITensorHandle* dropout_mask,
double* scale,
AOTITensorHandle* ret0,
AOTITensorHandle* ret1);

#ifdef __cplusplus
} // extern "C"
#endif

} // namespace metal
} // namespace backends
} // namespace executorch
Loading
Loading