apple
diff --git a/‎.github/pull_request_template.md
Lines changed: 8 additions & 0 deletions b/‎.github/pull_request_template.md
Lines changed: 8 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md
Lines changed: 9 additions & 5 deletions b/‎CONTRIBUTING.md
Lines changed: 9 additions & 5 deletions
diff --git a/‎Package.swift
Lines changed: 3 additions & 3 deletions b/‎Package.swift
Lines changed: 3 additions & 3 deletions
diff --git a/‎README.md
Lines changed: 70 additions & 26 deletions b/‎README.md
Lines changed: 70 additions & 26 deletions
diff --git a/‎python_coreml_stable_diffusion/pipeline.py
Lines changed: 1 addition & 1 deletion b/‎python_coreml_stable_diffusion/pipeline.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎python_coreml_stable_diffusion/torch2coreml.py
Lines changed: 1 addition & 1 deletion b/‎python_coreml_stable_diffusion/torch2coreml.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎setup.py
Lines changed: 1 addition & 0 deletions b/‎setup.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift
Lines changed: 182 additions & 0 deletions b/‎swift/StableDiffusion/pipeline/DPMSolverMultistepScheduler.swift
Lines changed: 182 additions & 0 deletions
diff --git a/‎swift/StableDiffusion/pipeline/Decoder.swift
Lines changed: 24 additions & 10 deletions b/‎swift/StableDiffusion/pipeline/Decoder.swift
Lines changed: 24 additions & 10 deletions
diff --git a/‎swift/StableDiffusion/pipeline/ManagedMLModel.swift
Lines changed: 77 additions & 0 deletions b/‎swift/StableDiffusion/pipeline/ManagedMLModel.swift
Lines changed: 77 additions & 0 deletions
@@ -0,0 +1,8 @@
+Thank you for your interest in contributing to Core ML Stable Diffusion! Please review [CONTRIBUTING.md](../CONTRIBUTING.md) first. If you would like to proceed with making a pull request, please indicate your agreement to the terms outlined in CONTRIBUTING.md by checking the box below. If not, please go ahead and fork this repo and make your updates.
+
+We appreciate your interest in the project!
+
+Do not erase the below when submitting your pull request:
+#########
+
+- [ ] I agree to the terms outlined in CONTRIBUTING.md 
@@ -1,11 +1,15 @@
 # Contribution Guide
 
-Thanks for your interest in contributing. This project was released for system demonstration purposes and there are limited plans for future development of the repository.
+Thank you for your interest in contributing to Core ML Stable Diffusion! This project was released for system demonstration purposes and there are limited plans for future development of the repository. While we welcome new pull requests and issues please note that our response may be limited. Forks and out-of-tree improvements are strongly encouraged.
 
-While we welcome new pull requests and issues please note that our response may be limited. Forks and out-of-tree improvements are strongly encouraged.
+## Submitting a Pull Request
 
-## Before you get started
+Under the project's [LICENSE](LICENSE.md), confirmation of the following is required to merge your changes:
 
-By submitting a pull request, you represent that you have the right to license your contribution to Apple and the community, and agree by submitting the patch that your contributions are licensed under the [LICENSE](LICENSE).
+*"I agree that all information entered is original and owned by me, and I hereby provide an irrevocable, royalty-free license to Apple to use, modify, copy, publish, prepare derivate works of, distribute (including under the Apple Sample Code License), such information and all intellectual property therein in whole or part, in perpetuity and worldwide, without any attribution."*
 
-We ask that all community members read and observe our [Code of Conduct](CODE_OF_CONDUCT.md).
+By submitting a pull request, you represent that you have the right to license your contribution to Apple and the community, and agree by submitting the patch that your contributions are licensed under the [LICENSE](LICENSE.md).
+
+## Code of Conduct
+
+We ask that all community members read and observe our [Code of Conduct](CODE_OF_CONDUCT.md).
@@ -6,9 +6,9 @@ import PackageDescription
 let package = Package(
     name: "stable-diffusion",
     platforms: [
-        .macOS(.v13),
-        .iOS(.v16),
-       ],
+        .macOS(.v11),
+        .iOS(.v14),
+    ],
     products: [
         .library(
             name: "StableDiffusion",
 
@@ -38,7 +38,7 @@
 import time
 import torch  # Only used for `torch.from_tensor` in `pipe.scheduler.step()`
 from transformers import CLIPFeatureExtractor, CLIPTokenizer
-from typing import Union, Optional
+from typing import List, Optional, Union
 
 
 class CoreMLStableDiffusionPipeline(DiffusionPipeline):
 
@@ -576,7 +576,7 @@ def convert_unet(pipe, args):
         # Set the output descriptions
         coreml_unet.output_description["noise_pred"] = \
             "Same shape and dtype as the `sample` input. " \
-            "The predicted noise to faciliate the reverse diffusion (denoising) process"
+            "The predicted noise to facilitate the reverse diffusion (denoising) process"
 
         _save_mlpackage(coreml_unet, out_path)
         logger.info(f"Saved unet into {out_path}")
 
@@ -20,6 +20,7 @@
         "transformers",
         "scipy",
         "gradio",
+        "numpy<1.24",
     ],
     packages=find_packages(),
     classifiers=[
 
@@ -0,0 +1,182 @@
+// For licensing see accompanying LICENSE.md file.
+// Copyright (C) 2022 Apple Inc. and The HuggingFace Team. All Rights Reserved.
+
+import Accelerate
+import CoreML
+
+/// A scheduler used to compute a de-noised image
+///
+///  This implementation matches:
+///  [Hugging Face Diffusers DPMSolverMultistepScheduler](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py)
+///
+/// It uses the DPM-Solver++ algorithm: [code](https://github.com/LuChengTHU/dpm-solver) [paper](https://arxiv.org/abs/2211.01095).
+/// Limitations:
+///  - Only implemented for DPM-Solver++ algorithm (not DPM-Solver).
+///  - Second order only.
+///  - Assumes the model predicts epsilon.
+///  - No dynamic thresholding.
+///  - `midpoint` solver algorithm.
+@available(iOS 16.2, macOS 13.1, *)
+public final class DPMSolverMultistepScheduler: Scheduler {
+    public let trainStepCount: Int
+    public let inferenceStepCount: Int
+    public let betas: [Float]
+    public let alphas: [Float]
+    public let alphasCumProd: [Float]
+    public let timeSteps: [Int]
+
+    public let alpha_t: [Float]
+    public let sigma_t: [Float]
+    public let lambda_t: [Float]
+    
+    public let solverOrder = 2
+    private(set) var lowerOrderStepped = 0
+    
+    /// Whether to use lower-order solvers in the final steps. Only valid for less than 15 inference steps.
+    /// We empirically find this trick can stabilize the sampling of DPM-Solver, especially with 10 or fewer steps.
+    public let useLowerOrderFinal = true
+    
+    // Stores solverOrder (2) items
+    private(set) var modelOutputs: [MLShapedArray<Float32>] = []
+
+    /// Create a scheduler that uses a second order DPM-Solver++ algorithm.
+    ///
+    /// - Parameters:
+    ///   - stepCount: Number of inference steps to schedule
+    ///   - trainStepCount: Number of training diffusion steps
+    ///   - betaSchedule: Method to schedule betas from betaStart to betaEnd
+    ///   - betaStart: The starting value of beta for inference
+    ///   - betaEnd: The end value for beta for inference
+    /// - Returns: A scheduler ready for its first step
+    public init(
+        stepCount: Int = 50,
+        trainStepCount: Int = 1000,
+        betaSchedule: BetaSchedule = .scaledLinear,
+        betaStart: Float = 0.00085,
+        betaEnd: Float = 0.012
+    ) {
+        self.trainStepCount = trainStepCount
+        self.inferenceStepCount = stepCount
+        
+        switch betaSchedule {
+        case .linear:
+            self.betas = linspace(betaStart, betaEnd, trainStepCount)
+        case .scaledLinear:
+            self.betas = linspace(pow(betaStart, 0.5), pow(betaEnd, 0.5), trainStepCount).map({ $0 * $0 })
+        }
+        
+        self.alphas = betas.map({ 1.0 - $0 })
+        var alphasCumProd = self.alphas
+        for i in 1..<alphasCumProd.count {
+            alphasCumProd[i] *= alphasCumProd[i -  1]
+        }
+        self.alphasCumProd = alphasCumProd
+
+        // Currently we only support VP-type noise shedule
+        self.alpha_t = vForce.sqrt(self.alphasCumProd)
+        self.sigma_t = vForce.sqrt(vDSP.subtract([Float](repeating: 1, count: self.alphasCumProd.count), self.alphasCumProd))
+        self.lambda_t = zip(self.alpha_t, self.sigma_t).map { α, σ in log(α) - log(σ) }
+
+        self.timeSteps = linspace(0, Float(self.trainStepCount-1), stepCount).reversed().map { Int(round($0)) }
+    }
+    
+    /// Convert the model output to the corresponding type the algorithm needs.
+    /// This implementation is for second-order DPM-Solver++ assuming epsilon prediction.
+    func convertModelOutput(modelOutput: MLShapedArray<Float32>, timestep: Int, sample: MLShapedArray<Float32>) -> MLShapedArray<Float32> {
+        assert(modelOutput.scalars.count == sample.scalars.count)
+        let (alpha_t, sigma_t) = (self.alpha_t[timestep], self.sigma_t[timestep])
+        
+        // This could be optimized with a Metal kernel if we find we need to
+        let x0_scalars = zip(modelOutput.scalars, sample.scalars).map { m, s in
+            (s - m * sigma_t) / alpha_t
+        }
+        return MLShapedArray(scalars: x0_scalars, shape: modelOutput.shape)
+    }
+
+    /// One step for the first-order DPM-Solver (equivalent to DDIM).
+    /// See https://arxiv.org/abs/2206.00927 for the detailed derivation.
+    /// var names and code structure mostly follow https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
+    func firstOrderUpdate(
+        modelOutput: MLShapedArray<Float32>,
+        timestep: Int,
+        prevTimestep: Int,
+        sample: MLShapedArray<Float32>
+    ) -> MLShapedArray<Float32> {
+        let (p_lambda_t, lambda_s) = (Double(lambda_t[prevTimestep]), Double(lambda_t[timestep]))
+        let p_alpha_t = Double(alpha_t[prevTimestep])
+        let (p_sigma_t, sigma_s) = (Double(sigma_t[prevTimestep]), Double(sigma_t[timestep]))
+        let h = p_lambda_t - lambda_s
+        // x_t = (sigma_t / sigma_s) * sample - (alpha_t * (torch.exp(-h) - 1.0)) * model_output
+        let x_t = weightedSum(
+            [p_sigma_t / sigma_s, -p_alpha_t * (exp(-h) - 1)],
+            [sample, modelOutput]
+        )
+        return x_t
+    }
+
+    /// One step for the second-order multistep DPM-Solver++ algorithm, using the midpoint method.
+    /// var names and code structure mostly follow https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
+    func secondOrderUpdate(
+        modelOutputs: [MLShapedArray<Float32>],
+        timesteps: [Int],
+        prevTimestep t: Int,
+        sample: MLShapedArray<Float32>
+    ) -> MLShapedArray<Float32> {
+        let (s0, s1) = (timesteps[back: 1], timesteps[back: 2])
+        let (m0, m1) = (modelOutputs[back: 1], modelOutputs[back: 2])
+        let (p_lambda_t, lambda_s0, lambda_s1) = (Double(lambda_t[t]), Double(lambda_t[s0]), Double(lambda_t[s1]))
+        let p_alpha_t = Double(alpha_t[t])
+        let (p_sigma_t, sigma_s0) = (Double(sigma_t[t]), Double(sigma_t[s0]))
+        let (h, h_0) = (p_lambda_t - lambda_s0, lambda_s0 - lambda_s1)
+        let r0 = h_0 / h
+        let D0 = m0
+        
+        // D1 = (1.0 / r0) * (m0 - m1)
+        let D1 = weightedSum(
+            [1/r0, -1/r0],
+            [m0, m1]
+        )
+        
+        // See https://arxiv.org/abs/2211.01095 for detailed derivations
+        // x_t = (
+        //     (sigma_t / sigma_s0) * sample
+        //     - (alpha_t * (torch.exp(-h) - 1.0)) * D0
+        //     - 0.5 * (alpha_t * (torch.exp(-h) - 1.0)) * D1
+        // )
+        let x_t = weightedSum(
+            [p_sigma_t/sigma_s0, -p_alpha_t * (exp(-h) - 1), -0.5 * p_alpha_t * (exp(-h) - 1)],
+            [sample, D0, D1]
+        )
+        return x_t
+    }
+
+    public func step(output: MLShapedArray<Float32>, timeStep t: Int, sample: MLShapedArray<Float32>) -> MLShapedArray<Float32> {
+        let stepIndex = timeSteps.firstIndex(of: t) ?? timeSteps.count - 1
+        let prevTimestep = stepIndex == timeSteps.count - 1 ? 0 : timeSteps[stepIndex + 1]
+
+        let lowerOrderFinal = useLowerOrderFinal && stepIndex == timeSteps.count - 1 && timeSteps.count < 15
+        let lowerOrderSecond = useLowerOrderFinal && stepIndex == timeSteps.count - 2 && timeSteps.count < 15
+        let lowerOrder = lowerOrderStepped < 1 || lowerOrderFinal || lowerOrderSecond
+        
+        let modelOutput = convertModelOutput(modelOutput: output, timestep: t, sample: sample)
+        if modelOutputs.count == solverOrder { modelOutputs.removeFirst() }
+        modelOutputs.append(modelOutput)
+        
+        let prevSample: MLShapedArray<Float32>
+        if lowerOrder {
+            prevSample = firstOrderUpdate(modelOutput: modelOutput, timestep: t, prevTimestep: prevTimestep, sample: sample)
+        } else {
+            prevSample = secondOrderUpdate(
+                modelOutputs: modelOutputs,
+                timesteps: [timeSteps[stepIndex - 1], t],
+                prevTimestep: prevTimestep,
+                sample: sample
+            )
+        }
+        if lowerOrderStepped < solverOrder {
+            lowerOrderStepped += 1
+        }
+        
+        return prevSample
+    }
+}
@@ -6,21 +6,31 @@ import CoreML
 import Accelerate
 
 /// A decoder model which produces RGB images from latent samples
-public struct Decoder {
+@available(iOS 16.2, macOS 13.1, *)
+public struct Decoder: ResourceManaging {
 
     /// VAE decoder model
-    var model: MLModel
+    var model: ManagedMLModel
 
     /// Create decoder from Core ML model
     ///
-    /// - Parameters
-    ///     - model: Core ML model for VAE decoder
-    public init(model: MLModel) {
-        self.model = model
+    /// - Parameters:
+    ///     - url: Location of compiled VAE decoder Core ML model
+    ///     - configuration: configuration to be used when the model is loaded
+    /// - Returns: A decoder that will lazily load its required resources when needed or requested
+    public init(modelAt url: URL, configuration: MLModelConfiguration) {
+        self.model = ManagedMLModel(modelAt: url, configuration: configuration)
     }
 
-    /// Prediction queue
-    let queue = DispatchQueue(label: "decoder.predict")
+    /// Ensure the model has been loaded into memory
+    public func loadResources() throws {
+        try model.loadResources()
+    }
+
+    /// Unload the underlying model to free up memory
+    public func unloadResources() {
+       model.unloadResources()
+    }
 
     /// Batch decode latent samples into images
     ///
@@ -42,7 +52,9 @@ public struct Decoder {
         let batch = MLArrayBatchProvider(array: inputs)
 
         // Batch predict with model
-        let results = try queue.sync { try model.predictions(fromBatch: batch) }
+        let results = try model.perform { model in
+            try model.predictions(fromBatch: batch)
+        }
 
         // Transform the outputs to CGImages
         let images: [CGImage] = (0..<results.count).map { i in
@@ -57,7 +69,9 @@ public struct Decoder {
     }
 
     var inputName: String {
-        model.modelDescription.inputDescriptionsByName.first!.key
+        try! model.perform { model in
+            model.modelDescription.inputDescriptionsByName.first!.key
+        }
     }
 
     typealias PixelBufferPFx1 = vImage.PixelBuffer<vImage.PlanarF>
 
@@ -0,0 +1,77 @@
+// For licensing see accompanying LICENSE.md file.
+// Copyright (C) 2022 Apple Inc. All Rights Reserved.
+
+import CoreML
+
+/// A class to manage and gate access to a Core ML model
+///
+/// It will automatically load a model into memory when needed or requested
+/// It allows one to request to unload the model from memory
+@available(iOS 16.2, macOS 13.1, *)
+public final class ManagedMLModel: ResourceManaging {
+
+    /// The location of the model
+    var modelURL: URL
+
+    /// The configuration to be used when the model is loaded
+    var configuration: MLModelConfiguration
+
+    /// The loaded model (when loaded)
+    var loadedModel: MLModel?
+
+    /// Queue to protect access to loaded model
+    var queue: DispatchQueue
+
+    /// Create a managed model given its location and desired loaded configuration
+    ///
+    /// - Parameters:
+    ///     - url: The location of the model
+    ///     - configuration: The configuration to be used when the model is loaded/used
+    /// - Returns: A managed model that has not been loaded
+    public init(modelAt url: URL, configuration: MLModelConfiguration) {
+        self.modelURL = url
+        self.configuration = configuration
+        self.loadedModel = nil
+        self.queue = DispatchQueue(label: "managed.\(url.lastPathComponent)")
+    }
+
+    /// Instantiation and load model into memory
+    public func loadResources() throws {
+        try queue.sync {
+            try loadModel()
+        }
+    }
+
+    /// Unload the model if it was loaded
+    public func unloadResources() {
+        queue.sync {
+            loadedModel = nil
+        }
+    }
+
+    /// Perform an operation with the managed model via a supplied closure.
+    ///  The model will be loaded and supplied to the closure and should only be
+    ///  used within the closure to ensure all resource management is synchronized
+    ///
+    /// - Parameters:
+    ///     - body: Closure which performs and action on a loaded model
+    /// - Returns: The result of the closure
+    /// - Throws: An error if the model cannot be loaded or if the closure throws
+    public func perform<R>(_ body: (MLModel) throws -> R) throws -> R {
+        return try queue.sync {
+            try autoreleasepool {
+                try loadModel()
+                return try body(loadedModel!)
+            }
+        }
+    }
+
+    private func loadModel() throws {
+        if loadedModel == nil {
+            loadedModel = try MLModel(contentsOf: modelURL,
+                                      configuration: configuration)
+        }
+    }
+
+
+}