pytorch · cymbalrush · Sep 11, 2024
@@ -8,10 +8,6 @@
 #import <CoreML/CoreML.h>
 #import <vector>
 
-#if !defined(MODEL_STATE_IS_SUPPORTED) && __has_include(<CoreML/MLModel+MLState.h>)
-#define MODEL_STATE_IS_SUPPORTED 1
-#endif
-
 NS_ASSUME_NONNULL_BEGIN
 
 @class ETCoreMLAsset;
@@ -45,7 +41,7 @@ __attribute__((objc_subclassing_restricted))
 @property (strong, readonly, nonatomic) MLModel* mlModel;
 
 /// The model state.
-@property (strong, readonly, nonatomic) id state API_AVAILABLE(macos(15.0), ios(18.0), tvos(18.0), watchos(11.0));
+@property (strong, readonly, nonatomic, nullable) id state;
 
 /// The asset from which the model is loaded.
 @property (strong, readonly, nonatomic) ETCoreMLAsset* asset;

@@ -157,6 +157,19 @@ size_t get_number_of_bytes(MLMultiArrayDataType data_type) {
     return get_multi_array_constraints_by_name(description.outputDescriptionsByName);
 }
 
+#if MODEL_STATE_IS_SUPPORTED
+API_AVAILABLE(macos(15.0), ios(18.0), tvos(18.0), watchos(11.0))
+void reset_state_for_feature_name(NSString *feature_name, MLState *state) {
+    [state getMultiArrayForStateNamed:feature_name handler:^(MLMultiArray *buffer) {
+        [buffer getMutableBytesWithHandler:^(void *mutableBytes, NSInteger size, NSArray<NSNumber *> * __unused strides) {
+            uint8_t *start = reinterpret_cast<uint8_t *>(mutableBytes);
+            uint8_t *end = start + size;
+            std::fill(start, end, uint8_t(0));
+        }];
+    }];
+}
+#endif
+
 }
 
 #pragma mark - ETCoreMLModel
@@ -282,7 +295,6 @@ MultiArray buffer(mutableBytes, MultiArray::MemoryLayout(to_multiarray_data_type
 - (nullable id<MLFeatureProvider>)predictionFromFeatures:(id<MLFeatureProvider>)input
                                                  options:(MLPredictionOptions *)options
                                                    error:(NSError **)error {
-
 #if MODEL_STATE_IS_SUPPORTED
     if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) {
         if (self.state != nil) {
@@ -294,28 +306,33 @@ MultiArray buffer(mutableBytes, MultiArray::MemoryLayout(to_multiarray_data_type
     }
 #endif
 
-    return [self.mlModel predictionFromFeatures:input
-                                        options:options
-                                          error:error];
+    id<MLFeatureProvider> result = [self.mlModel predictionFromFeatures:input
+                                                                options:options
+                                                                  error:error];
+
+    return result;
 }
 
 - (BOOL)prewarmAndReturnError:(NSError* __autoreleasing*)error {
-    BOOL prewarm = YES;
-#if MODEL_STATE_IS_SUPPORTED
-    if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) {
-        prewarm = (self.mlModel.modelDescription.stateDescriptionsByName.count == 0);
-    }
-#endif
-
     NSError *localError = nil;
-    BOOL result = prewarm ? [self.mlModel prewarmAndReturnError:&localError] : NO;
+    BOOL result = [self.mlModel prewarmUsingState:self.state error:error];
     if (!result) {
         ETCoreMLLogError(localError,
                          "%@: Failed to prewarm model with identifier = %@",
                          NSStringFromClass(self.class),
                          self.identifier);
     }
 
+#if MODEL_STATE_IS_SUPPORTED
+    if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) {
+        NSDictionary<NSString *, MLFeatureDescription *> *stateDescriptions = self.mlModel.modelDescription.stateDescriptionsByName;
+        [stateDescriptions enumerateKeysAndObjectsUsingBlock:^(NSString *featureName, MLFeatureDescription * __unused obj, BOOL * __unused stop) {
+            reset_state_for_feature_name(featureName, (MLState *) self.state);
+        }];
+    }
+#endif
+
+
     if (error) {
         *error = localError;
     }

@@ -669,16 +669,15 @@ - (void)addPrewarmedAsset:(ETCoreMLAsset *)asset {
                                                                        error:&localError];
     // Try without output backings.
     if (!modelOutputs && predictionOptions.outputBackings.count > 0) {
-        localError = nil;
         executor.ignoreOutputBackings = YES;
+        localError = nil;
+        modelOutputs = [executor executeModelWithInputs:inputFeatures
+                                      predictionOptions:predictionOptions
+                                         loggingOptions:loggingOptions
+                                            eventLogger:eventLogger
+                                                  error:&localError];
     }
-
-    modelOutputs = [executor executeModelWithInputs:inputFeatures
-                                  predictionOptions:predictionOptions
-                                     loggingOptions:loggingOptions
-                                        eventLogger:eventLogger
-                                              error:&localError];
-
+
     if (error) {
         *error = localError;
     }

@@ -8,16 +8,20 @@
 
 #import <CoreML/CoreML.h>
 
+#if !defined(MODEL_STATE_IS_SUPPORTED) && __has_include(<CoreML/MLModel+MLState.h>)
+#define MODEL_STATE_IS_SUPPORTED 1
+#endif
 
 NS_ASSUME_NONNULL_BEGIN
 
 @interface MLModel (Prewarm)
 
 /// Pre-warms the model by running a prediction with zeroed-out inputs.
 ///
+/// @param state The model state.
 /// @param error   On failure, error is filled with the failure information.
 /// @retval `YES` if the prediction succeeded otherwise `NO`.
-- (BOOL)prewarmAndReturnError:(NSError* __autoreleasing*)error;
+- (BOOL)prewarmUsingState:(nullable id)state error:(NSError* __autoreleasing*)error;
 
 @end
 

@@ -71,16 +71,28 @@ + (MLMultiArray *)zeroedMultiArrayWithShape:(NSArray<NSNumber *> *)shape
 
 @implementation MLModel (Prewarm)
 
-- (BOOL)prewarmAndReturnError:(NSError * __autoreleasing *)error {
+- (BOOL)prewarmUsingState:(nullable id)state error:(NSError * __autoreleasing *)error {
     @autoreleasepool {
         id<MLFeatureProvider> inputs = ::get_zeroed_inputs(self, error);
         if (!inputs) {
             return NO;
         }
 
-        id<MLFeatureProvider> outputs = [self predictionFromFeatures:inputs error:error];
+
+        id<MLFeatureProvider> outputs = nil;
+        if (state != nil) {
+#if MODEL_STATE_IS_SUPPORTED
+            if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *)) {
+                outputs = [self predictionFromFeatures:inputs usingState:(MLState *)state error:error];
+                return outputs != nil;
+            }
+#endif
+        }
+
+        outputs = [self predictionFromFeatures:inputs error:error];
         return outputs != nil;
     }
 }
 
+
 @end
@@ -13,6 +13,8 @@
 #import <executorch/runtime/platform/runtime.h>
 #import <string>
 
+#import "MLModel_Prewarm.h"
+
 static constexpr size_t kRuntimeMemorySize = 50 * 1024U * 1024U; // 50 MB
 
 using namespace torch::executor;
@@ -184,20 +186,28 @@ - (void)executeModelAtURL:(NSURL *)modelURL nLoads:(NSUInteger)nLoads nExecution
 - (void)testAddProgramExecute {
     NSURL *modelURL = [[self class] bundledResourceWithName:@"add_coreml_all" extension:@"pte"];
     XCTAssertNotNil(modelURL);
-    [self executeModelAtURL:modelURL nLoads:5 nExecutions:2];
+    [self executeModelAtURL:modelURL nLoads:1 nExecutions:2];
 }
 
 - (void)testMulProgramExecute {
     NSURL *modelURL = [[self class] bundledResourceWithName:@"mul_coreml_all" extension:@"pte"];
     XCTAssertNotNil(modelURL);
-    [self executeModelAtURL:modelURL nLoads:5 nExecutions:2];
+    [self executeModelAtURL:modelURL nLoads:1 nExecutions:2];
 }
 
 - (void)testMV3ProgramExecute {
     NSURL *modelURL = [[self class] bundledResourceWithName:@"mv3_coreml_all" extension:@"pte"];
     XCTAssertNotNil(modelURL);
-    [self executeModelAtURL:modelURL nLoads:5 nExecutions:2];
+    [self executeModelAtURL:modelURL nLoads:1 nExecutions:2];
+}
+
+#if MODEL_STATE_IS_SUPPORTED
+- (void)testStateProgramExecute {
+    NSURL *modelURL = [[self class] bundledResourceWithName:@"state_coreml_all" extension:@"pte"];
+    XCTAssertNotNil(modelURL);
+    [self executeModelAtURL:modelURL nLoads:1 nExecutions:2];
 }
+#endif
 
 - (void)executeMultipleModelsConcurrently:(NSArray<NSURL *> *)modelURLs
                                    nLoads:(NSUInteger)nLoads

@@ -0,0 +1,77 @@
+# Copyright © 2024 Apple Inc. All rights reserved.
+#
+# Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+import os
+from pathlib import Path
+
+import coremltools as ct
+import executorch.exir as exir
+
+import torch
+
+from executorch.backends.apple.coreml.compiler import CoreMLBackend
+from executorch.backends.apple.coreml.partition import CoreMLPartitioner
+from torch.export import export
+
+
+class StatefulModel(torch.nn.Module):
+    def __init__(
+        self,
+        embedding_dim: int,
+        max_seq_len: int,
+    ):
+        super().__init__()
+        self.register_buffer(
+            "cache", torch.zeros((max_seq_len, embedding_dim), dtype=torch.float32)
+        )
+
+    def forward(
+        self,
+        q: torch.Tensor,
+        k_val: torch.Tensor,
+        input_pos: torch.Tensor,
+    ):
+        q_T = q.transpose(0, 1)
+        k = torch.ops.aten.index_put_(self.cache, [input_pos, None], k_val)
+        attn = k.mm(q_T)
+        return attn
+
+
+def main() -> None:
+    embedding_dim = 3
+    max_seq_len = 2
+    model = StatefulModel(embedding_dim=embedding_dim, max_seq_len=max_seq_len)
+    example_inputs = (
+        torch.randn((1, embedding_dim)),
+        torch.randn((1, embedding_dim)),
+        torch.tensor([0]),
+    )
+    exported_model = export(model, example_inputs)
+    edge_program_manager = exir.to_edge(exported_model)
+    compile_specs = CoreMLBackend.generate_compile_specs(
+        compute_precision=ct.precision.FLOAT16,
+        compute_unit=ct.ComputeUnit.ALL,
+        minimum_deployment_target=ct.target.iOS18,
+    )
+
+    partitioner = CoreMLPartitioner(
+        skip_ops_for_coreml_delegation=None,
+        compile_specs=compile_specs,
+    )
+
+    delegated_program_manager = edge_program_manager.to_backend(partitioner)
+    exec_program = delegated_program_manager.to_executorch(
+        config=exir.ExecutorchBackendConfig(extract_delegate_segments=True)
+    )
+
+    buffer = exec_program.buffer
+    models_dir = Path(os.path.dirname(os.path.realpath(__file__))) / "models"
+    models_dir.mkdir(parents=False, exist_ok=True)
+    file_path = models_dir / "state_coreml_all.pte"
+    with open(file_path.resolve(), "wb") as file:
+        file.write(buffer)
+
+
+if __name__ == "__main__":
+    main()  # pragma: no cover
@@ -7,6 +7,7 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
+		8307EB8A2C9262060011AE6D /* state_coreml_all.pte in Resources */ = {isa = PBXBuildFile; fileRef = 8307EB892C9262060011AE6D /* state_coreml_all.pte */; };
 		83BB78A02C65DA7300274ED7 /* ETCoreMLModelDebugInfo.mm in Sources */ = {isa = PBXBuildFile; fileRef = 83BB789F2C65DA7300274ED7 /* ETCoreMLModelDebugInfo.mm */; };
 		83BB78BF2C66AAAE00274ED7 /* add_mul_coreml_all.bin in Resources */ = {isa = PBXBuildFile; fileRef = 83BB78BD2C66AAAE00274ED7 /* add_mul_coreml_all.bin */; };
 		83BB78C02C66AAAE00274ED7 /* add_mul_coreml_all.pte in Resources */ = {isa = PBXBuildFile; fileRef = 83BB78BE2C66AAAE00274ED7 /* add_mul_coreml_all.pte */; };
@@ -120,6 +121,7 @@
 /* End PBXCopyFilesBuildPhase section */
 
 /* Begin PBXFileReference section */
+		8307EB892C9262060011AE6D /* state_coreml_all.pte */ = {isa = PBXFileReference; lastKnownFileType = file; name = state_coreml_all.pte; path = ../test/models/state_coreml_all.pte; sourceTree = "<group>"; };
 		83BB789E2C65DA7300274ED7 /* ETCoreMLModelDebugInfo.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = ETCoreMLModelDebugInfo.h; path = ../sdk/ETCoreMLModelDebugInfo.h; sourceTree = "<group>"; };
 		83BB789F2C65DA7300274ED7 /* ETCoreMLModelDebugInfo.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; name = ETCoreMLModelDebugInfo.mm; path = ../sdk/ETCoreMLModelDebugInfo.mm; sourceTree = "<group>"; };
 		83BB78BD2C66AAAE00274ED7 /* add_mul_coreml_all.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; name = add_mul_coreml_all.bin; path = ../test/models/add_mul_coreml_all.bin; sourceTree = "<group>"; };
@@ -607,6 +609,7 @@
 				C98551982AD2542D009143F9 /* mv3_coreml_all.pte */,
 				83BB78BD2C66AAAE00274ED7 /* add_mul_coreml_all.bin */,
 				83BB78BE2C66AAAE00274ED7 /* add_mul_coreml_all.pte */,
+				8307EB892C9262060011AE6D /* state_coreml_all.pte */,
 			);
 			name = models;
 			sourceTree = "<group>";
@@ -677,6 +680,7 @@
 				C985519E2AD2542D009143F9 /* mv3_coreml_all.pte in Resources */,
 				C98551A02AD2542D009143F9 /* add_coreml_all.bin in Resources */,
 				C98551A22AD2542D009143F9 /* mul_coreml_all.pte in Resources */,
+				8307EB8A2C9262060011AE6D /* state_coreml_all.pte in Resources */,
 				C98551A32AD2542D009143F9 /* add_coreml_all.pte in Resources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;

@@ -17,14 +17,17 @@ cd "$EXECUTORCH_ROOT_PATH"
 
 mkdir "$COREML_DIR_PATH/runtime/test/models/"
 #Generate models
-echo "Executorch: Generating test models"
 cd "$EXECUTORCH_ROOT_PATH"
 
 MODELS=("add" "add_mul" "mul" "mv3")
 for MODEL in "${MODELS[@]}"
 do
+  echo "Executorch: Generating $MODEL model" 
   # TODO: Don't use the script in examples directory.
   python3 -m examples.apple.coreml.scripts.export --model_name "$MODEL" --save_processed_bytes
   mv -f "$MODEL""_coreml_all.pte" "$COREML_DIR_PATH/runtime/test/models"
   mv -f "$MODEL""_coreml_all.bin" "$COREML_DIR_PATH/runtime/test/models"
 done
+
+echo "Executorch: Generating stateful model"
+python3 "$SCRIPT_DIR_PATH/../runtime/test/export_stateful_model.py"