Skip to content

Commit fe71ff0

Browse files
committed
chore: resolve code conflicts
2 parents d3cd6d9 + c90b705 commit fe71ff0

24 files changed

+744
-174
lines changed

.github/pull_request_template.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Thank you for your interest in contributing to Core ML Stable Diffusion! Please review [CONTRIBUTING.md](../CONTRIBUTING.md) first. If you would like to proceed with making a pull request, please indicate your agreement to the terms outlined in CONTRIBUTING.md by checking the box below. If not, please go ahead and fork this repo and make your updates.
2+
3+
We appreciate your interest in the project!
4+
5+
Do not erase the below when submitting your pull request:
6+
#########
7+
8+
- [ ] I agree to the terms outlined in CONTRIBUTING.md

CONTRIBUTING.md

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
# Contribution Guide
22

3-
Thanks for your interest in contributing. This project was released for system demonstration purposes and there are limited plans for future development of the repository.
3+
Thank you for your interest in contributing to Core ML Stable Diffusion! This project was released for system demonstration purposes and there are limited plans for future development of the repository. While we welcome new pull requests and issues please note that our response may be limited. Forks and out-of-tree improvements are strongly encouraged.
44

5-
While we welcome new pull requests and issues please note that our response may be limited. Forks and out-of-tree improvements are strongly encouraged.
5+
## Submitting a Pull Request
66

7-
## Before you get started
7+
Under the project's [LICENSE](LICENSE.md), confirmation of the following is required to merge your changes:
88

9-
By submitting a pull request, you represent that you have the right to license your contribution to Apple and the community, and agree by submitting the patch that your contributions are licensed under the [LICENSE](LICENSE).
9+
*"I agree that all information entered is original and owned by me, and I hereby provide an irrevocable, royalty-free license to Apple to use, modify, copy, publish, prepare derivate works of, distribute (including under the Apple Sample Code License), such information and all intellectual property therein in whole or part, in perpetuity and worldwide, without any attribution."*
1010

11-
We ask that all community members read and observe our [Code of Conduct](CODE_OF_CONDUCT.md).
11+
By submitting a pull request, you represent that you have the right to license your contribution to Apple and the community, and agree by submitting the patch that your contributions are licensed under the [LICENSE](LICENSE.md).
12+
13+
## Code of Conduct
14+
15+
We ask that all community members read and observe our [Code of Conduct](CODE_OF_CONDUCT.md).

Package.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ import PackageDescription
66
let package = Package(
77
name: "stable-diffusion",
88
platforms: [
9-
.macOS(.v13),
10-
.iOS(.v16),
11-
],
9+
.macOS(.v11),
10+
.iOS(.v14),
11+
],
1212
products: [
1313
.library(
1414
name: "StableDiffusion",

README.md

Lines changed: 70 additions & 26 deletions
Large diffs are not rendered by default.

python_coreml_stable_diffusion/pipeline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
import time
3939
import torch # Only used for `torch.from_tensor` in `pipe.scheduler.step()`
4040
from transformers import CLIPFeatureExtractor, CLIPTokenizer
41-
from typing import Union, Optional
41+
from typing import List, Optional, Union
4242

4343

4444
class CoreMLStableDiffusionPipeline(DiffusionPipeline):

python_coreml_stable_diffusion/torch2coreml.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -576,7 +576,7 @@ def convert_unet(pipe, args):
576576
# Set the output descriptions
577577
coreml_unet.output_description["noise_pred"] = \
578578
"Same shape and dtype as the `sample` input. " \
579-
"The predicted noise to faciliate the reverse diffusion (denoising) process"
579+
"The predicted noise to facilitate the reverse diffusion (denoising) process"
580580

581581
_save_mlpackage(coreml_unet, out_path)
582582
logger.info(f"Saved unet into {out_path}")

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"transformers",
2121
"scipy",
2222
"gradio",
23+
"numpy<1.24",
2324
],
2425
packages=find_packages(),
2526
classifiers=[
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
// For licensing see accompanying LICENSE.md file.
2+
// Copyright (C) 2022 Apple Inc. and The HuggingFace Team. All Rights Reserved.
3+
4+
import Accelerate
5+
import CoreML
6+
7+
/// A scheduler used to compute a de-noised image
8+
///
9+
/// This implementation matches:
10+
/// [Hugging Face Diffusers DPMSolverMultistepScheduler](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py)
11+
///
12+
/// It uses the DPM-Solver++ algorithm: [code](https://github.com/LuChengTHU/dpm-solver) [paper](https://arxiv.org/abs/2211.01095).
13+
/// Limitations:
14+
/// - Only implemented for DPM-Solver++ algorithm (not DPM-Solver).
15+
/// - Second order only.
16+
/// - Assumes the model predicts epsilon.
17+
/// - No dynamic thresholding.
18+
/// - `midpoint` solver algorithm.
19+
@available(iOS 16.2, macOS 13.1, *)
20+
public final class DPMSolverMultistepScheduler: Scheduler {
21+
public let trainStepCount: Int
22+
public let inferenceStepCount: Int
23+
public let betas: [Float]
24+
public let alphas: [Float]
25+
public let alphasCumProd: [Float]
26+
public let timeSteps: [Int]
27+
28+
public let alpha_t: [Float]
29+
public let sigma_t: [Float]
30+
public let lambda_t: [Float]
31+
32+
public let solverOrder = 2
33+
private(set) var lowerOrderStepped = 0
34+
35+
/// Whether to use lower-order solvers in the final steps. Only valid for less than 15 inference steps.
36+
/// We empirically find this trick can stabilize the sampling of DPM-Solver, especially with 10 or fewer steps.
37+
public let useLowerOrderFinal = true
38+
39+
// Stores solverOrder (2) items
40+
private(set) var modelOutputs: [MLShapedArray<Float32>] = []
41+
42+
/// Create a scheduler that uses a second order DPM-Solver++ algorithm.
43+
///
44+
/// - Parameters:
45+
/// - stepCount: Number of inference steps to schedule
46+
/// - trainStepCount: Number of training diffusion steps
47+
/// - betaSchedule: Method to schedule betas from betaStart to betaEnd
48+
/// - betaStart: The starting value of beta for inference
49+
/// - betaEnd: The end value for beta for inference
50+
/// - Returns: A scheduler ready for its first step
51+
public init(
52+
stepCount: Int = 50,
53+
trainStepCount: Int = 1000,
54+
betaSchedule: BetaSchedule = .scaledLinear,
55+
betaStart: Float = 0.00085,
56+
betaEnd: Float = 0.012
57+
) {
58+
self.trainStepCount = trainStepCount
59+
self.inferenceStepCount = stepCount
60+
61+
switch betaSchedule {
62+
case .linear:
63+
self.betas = linspace(betaStart, betaEnd, trainStepCount)
64+
case .scaledLinear:
65+
self.betas = linspace(pow(betaStart, 0.5), pow(betaEnd, 0.5), trainStepCount).map({ $0 * $0 })
66+
}
67+
68+
self.alphas = betas.map({ 1.0 - $0 })
69+
var alphasCumProd = self.alphas
70+
for i in 1..<alphasCumProd.count {
71+
alphasCumProd[i] *= alphasCumProd[i - 1]
72+
}
73+
self.alphasCumProd = alphasCumProd
74+
75+
// Currently we only support VP-type noise shedule
76+
self.alpha_t = vForce.sqrt(self.alphasCumProd)
77+
self.sigma_t = vForce.sqrt(vDSP.subtract([Float](repeating: 1, count: self.alphasCumProd.count), self.alphasCumProd))
78+
self.lambda_t = zip(self.alpha_t, self.sigma_t).map { α, σ in log(α) - log(σ) }
79+
80+
self.timeSteps = linspace(0, Float(self.trainStepCount-1), stepCount).reversed().map { Int(round($0)) }
81+
}
82+
83+
/// Convert the model output to the corresponding type the algorithm needs.
84+
/// This implementation is for second-order DPM-Solver++ assuming epsilon prediction.
85+
func convertModelOutput(modelOutput: MLShapedArray<Float32>, timestep: Int, sample: MLShapedArray<Float32>) -> MLShapedArray<Float32> {
86+
assert(modelOutput.scalars.count == sample.scalars.count)
87+
let (alpha_t, sigma_t) = (self.alpha_t[timestep], self.sigma_t[timestep])
88+
89+
// This could be optimized with a Metal kernel if we find we need to
90+
let x0_scalars = zip(modelOutput.scalars, sample.scalars).map { m, s in
91+
(s - m * sigma_t) / alpha_t
92+
}
93+
return MLShapedArray(scalars: x0_scalars, shape: modelOutput.shape)
94+
}
95+
96+
/// One step for the first-order DPM-Solver (equivalent to DDIM).
97+
/// See https://arxiv.org/abs/2206.00927 for the detailed derivation.
98+
/// var names and code structure mostly follow https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
99+
func firstOrderUpdate(
100+
modelOutput: MLShapedArray<Float32>,
101+
timestep: Int,
102+
prevTimestep: Int,
103+
sample: MLShapedArray<Float32>
104+
) -> MLShapedArray<Float32> {
105+
let (p_lambda_t, lambda_s) = (Double(lambda_t[prevTimestep]), Double(lambda_t[timestep]))
106+
let p_alpha_t = Double(alpha_t[prevTimestep])
107+
let (p_sigma_t, sigma_s) = (Double(sigma_t[prevTimestep]), Double(sigma_t[timestep]))
108+
let h = p_lambda_t - lambda_s
109+
// x_t = (sigma_t / sigma_s) * sample - (alpha_t * (torch.exp(-h) - 1.0)) * model_output
110+
let x_t = weightedSum(
111+
[p_sigma_t / sigma_s, -p_alpha_t * (exp(-h) - 1)],
112+
[sample, modelOutput]
113+
)
114+
return x_t
115+
}
116+
117+
/// One step for the second-order multistep DPM-Solver++ algorithm, using the midpoint method.
118+
/// var names and code structure mostly follow https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
119+
func secondOrderUpdate(
120+
modelOutputs: [MLShapedArray<Float32>],
121+
timesteps: [Int],
122+
prevTimestep t: Int,
123+
sample: MLShapedArray<Float32>
124+
) -> MLShapedArray<Float32> {
125+
let (s0, s1) = (timesteps[back: 1], timesteps[back: 2])
126+
let (m0, m1) = (modelOutputs[back: 1], modelOutputs[back: 2])
127+
let (p_lambda_t, lambda_s0, lambda_s1) = (Double(lambda_t[t]), Double(lambda_t[s0]), Double(lambda_t[s1]))
128+
let p_alpha_t = Double(alpha_t[t])
129+
let (p_sigma_t, sigma_s0) = (Double(sigma_t[t]), Double(sigma_t[s0]))
130+
let (h, h_0) = (p_lambda_t - lambda_s0, lambda_s0 - lambda_s1)
131+
let r0 = h_0 / h
132+
let D0 = m0
133+
134+
// D1 = (1.0 / r0) * (m0 - m1)
135+
let D1 = weightedSum(
136+
[1/r0, -1/r0],
137+
[m0, m1]
138+
)
139+
140+
// See https://arxiv.org/abs/2211.01095 for detailed derivations
141+
// x_t = (
142+
// (sigma_t / sigma_s0) * sample
143+
// - (alpha_t * (torch.exp(-h) - 1.0)) * D0
144+
// - 0.5 * (alpha_t * (torch.exp(-h) - 1.0)) * D1
145+
// )
146+
let x_t = weightedSum(
147+
[p_sigma_t/sigma_s0, -p_alpha_t * (exp(-h) - 1), -0.5 * p_alpha_t * (exp(-h) - 1)],
148+
[sample, D0, D1]
149+
)
150+
return x_t
151+
}
152+
153+
public func step(output: MLShapedArray<Float32>, timeStep t: Int, sample: MLShapedArray<Float32>) -> MLShapedArray<Float32> {
154+
let stepIndex = timeSteps.firstIndex(of: t) ?? timeSteps.count - 1
155+
let prevTimestep = stepIndex == timeSteps.count - 1 ? 0 : timeSteps[stepIndex + 1]
156+
157+
let lowerOrderFinal = useLowerOrderFinal && stepIndex == timeSteps.count - 1 && timeSteps.count < 15
158+
let lowerOrderSecond = useLowerOrderFinal && stepIndex == timeSteps.count - 2 && timeSteps.count < 15
159+
let lowerOrder = lowerOrderStepped < 1 || lowerOrderFinal || lowerOrderSecond
160+
161+
let modelOutput = convertModelOutput(modelOutput: output, timestep: t, sample: sample)
162+
if modelOutputs.count == solverOrder { modelOutputs.removeFirst() }
163+
modelOutputs.append(modelOutput)
164+
165+
let prevSample: MLShapedArray<Float32>
166+
if lowerOrder {
167+
prevSample = firstOrderUpdate(modelOutput: modelOutput, timestep: t, prevTimestep: prevTimestep, sample: sample)
168+
} else {
169+
prevSample = secondOrderUpdate(
170+
modelOutputs: modelOutputs,
171+
timesteps: [timeSteps[stepIndex - 1], t],
172+
prevTimestep: prevTimestep,
173+
sample: sample
174+
)
175+
}
176+
if lowerOrderStepped < solverOrder {
177+
lowerOrderStepped += 1
178+
}
179+
180+
return prevSample
181+
}
182+
}

swift/StableDiffusion/pipeline/Decoder.swift

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,31 @@ import CoreML
66
import Accelerate
77

88
/// A decoder model which produces RGB images from latent samples
9-
public struct Decoder {
9+
@available(iOS 16.2, macOS 13.1, *)
10+
public struct Decoder: ResourceManaging {
1011

1112
/// VAE decoder model
12-
var model: MLModel
13+
var model: ManagedMLModel
1314

1415
/// Create decoder from Core ML model
1516
///
16-
/// - Parameters
17-
/// - model: Core ML model for VAE decoder
18-
public init(model: MLModel) {
19-
self.model = model
17+
/// - Parameters:
18+
/// - url: Location of compiled VAE decoder Core ML model
19+
/// - configuration: configuration to be used when the model is loaded
20+
/// - Returns: A decoder that will lazily load its required resources when needed or requested
21+
public init(modelAt url: URL, configuration: MLModelConfiguration) {
22+
self.model = ManagedMLModel(modelAt: url, configuration: configuration)
2023
}
2124

22-
/// Prediction queue
23-
let queue = DispatchQueue(label: "decoder.predict")
25+
/// Ensure the model has been loaded into memory
26+
public func loadResources() throws {
27+
try model.loadResources()
28+
}
29+
30+
/// Unload the underlying model to free up memory
31+
public func unloadResources() {
32+
model.unloadResources()
33+
}
2434

2535
/// Batch decode latent samples into images
2636
///
@@ -42,7 +52,9 @@ public struct Decoder {
4252
let batch = MLArrayBatchProvider(array: inputs)
4353

4454
// Batch predict with model
45-
let results = try queue.sync { try model.predictions(fromBatch: batch) }
55+
let results = try model.perform { model in
56+
try model.predictions(fromBatch: batch)
57+
}
4658

4759
// Transform the outputs to CGImages
4860
let images: [CGImage] = (0..<results.count).map { i in
@@ -57,7 +69,9 @@ public struct Decoder {
5769
}
5870

5971
var inputName: String {
60-
model.modelDescription.inputDescriptionsByName.first!.key
72+
try! model.perform { model in
73+
model.modelDescription.inputDescriptionsByName.first!.key
74+
}
6175
}
6276

6377
typealias PixelBufferPFx1 = vImage.PixelBuffer<vImage.PlanarF>
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// For licensing see accompanying LICENSE.md file.
2+
// Copyright (C) 2022 Apple Inc. All Rights Reserved.
3+
4+
import CoreML
5+
6+
/// A class to manage and gate access to a Core ML model
7+
///
8+
/// It will automatically load a model into memory when needed or requested
9+
/// It allows one to request to unload the model from memory
10+
@available(iOS 16.2, macOS 13.1, *)
11+
public final class ManagedMLModel: ResourceManaging {
12+
13+
/// The location of the model
14+
var modelURL: URL
15+
16+
/// The configuration to be used when the model is loaded
17+
var configuration: MLModelConfiguration
18+
19+
/// The loaded model (when loaded)
20+
var loadedModel: MLModel?
21+
22+
/// Queue to protect access to loaded model
23+
var queue: DispatchQueue
24+
25+
/// Create a managed model given its location and desired loaded configuration
26+
///
27+
/// - Parameters:
28+
/// - url: The location of the model
29+
/// - configuration: The configuration to be used when the model is loaded/used
30+
/// - Returns: A managed model that has not been loaded
31+
public init(modelAt url: URL, configuration: MLModelConfiguration) {
32+
self.modelURL = url
33+
self.configuration = configuration
34+
self.loadedModel = nil
35+
self.queue = DispatchQueue(label: "managed.\(url.lastPathComponent)")
36+
}
37+
38+
/// Instantiation and load model into memory
39+
public func loadResources() throws {
40+
try queue.sync {
41+
try loadModel()
42+
}
43+
}
44+
45+
/// Unload the model if it was loaded
46+
public func unloadResources() {
47+
queue.sync {
48+
loadedModel = nil
49+
}
50+
}
51+
52+
/// Perform an operation with the managed model via a supplied closure.
53+
/// The model will be loaded and supplied to the closure and should only be
54+
/// used within the closure to ensure all resource management is synchronized
55+
///
56+
/// - Parameters:
57+
/// - body: Closure which performs and action on a loaded model
58+
/// - Returns: The result of the closure
59+
/// - Throws: An error if the model cannot be loaded or if the closure throws
60+
public func perform<R>(_ body: (MLModel) throws -> R) throws -> R {
61+
return try queue.sync {
62+
try autoreleasepool {
63+
try loadModel()
64+
return try body(loadedModel!)
65+
}
66+
}
67+
}
68+
69+
private func loadModel() throws {
70+
if loadedModel == nil {
71+
loadedModel = try MLModel(contentsOf: modelURL,
72+
configuration: configuration)
73+
}
74+
}
75+
76+
77+
}

0 commit comments

Comments
 (0)