@@ -210,22 +210,24 @@ public struct StableDiffusionPipeline: StableDiffusionPipelineProtocol {
210
210
progressHandler: ( Progress ) -> Bool = { _ in true }
211
211
) throws -> [ CGImage ? ] {
212
212
213
- // Encode the input prompt and negative prompt
214
- let promptEmbedding = try textEncoder. encode ( config. prompt)
215
- let negativePromptEmbedding = try textEncoder. encode ( config. negativePrompt)
213
+ // Encode the input prompt
214
+ var promptEmbedding = try textEncoder. encode ( config. prompt)
215
+
216
+ if config. guidanceScale >= 1.0 {
217
+ // Convert to Unet hidden state representation
218
+ // Concatenate the prompt and negative prompt embeddings
219
+ let negativePromptEmbedding = try textEncoder. encode ( config. negativePrompt)
220
+ promptEmbedding = MLShapedArray < Float32 > (
221
+ concatenating: [ negativePromptEmbedding, promptEmbedding] ,
222
+ alongAxis: 0
223
+ )
224
+ }
216
225
217
226
if reduceMemory {
218
227
textEncoder. unloadResources ( )
219
228
}
220
229
221
- // Convert to Unet hidden state representation
222
- // Concatenate the prompt and negative prompt embeddings
223
- let concatEmbedding = MLShapedArray < Float32 > (
224
- concatenating: [ negativePromptEmbedding, promptEmbedding] ,
225
- alongAxis: 0
226
- )
227
-
228
- let hiddenStates = useMultilingualTextEncoder ? concatEmbedding : toHiddenStates ( concatEmbedding)
230
+ let hiddenStates = useMultilingualTextEncoder ? promptEmbedding : toHiddenStates ( promptEmbedding)
229
231
230
232
/// Setup schedulers
231
233
let scheduler : [ Scheduler ] = ( 0 ..< config. imageCount) . map { _ in
@@ -262,8 +264,13 @@ public struct StableDiffusionPipeline: StableDiffusionPipelineProtocol {
262
264
263
265
// Expand the latents for classifier-free guidance
264
266
// and input to the Unet noise prediction model
265
- let latentUnetInput = latents. map {
266
- MLShapedArray < Float32 > ( concatenating: [ $0, $0] , alongAxis: 0 )
267
+ let latentUnetInput : [ MLShapedArray < Float32 > ]
268
+ if config. guidanceScale >= 1.0 {
269
+ latentUnetInput = latents. map {
270
+ MLShapedArray < Float32 > ( concatenating: [ $0, $0] , alongAxis: 0 )
271
+ }
272
+ } else {
273
+ latentUnetInput = latents
267
274
}
268
275
269
276
// Before Unet, execute controlNet and add the output into Unet inputs
@@ -283,7 +290,9 @@ public struct StableDiffusionPipeline: StableDiffusionPipelineProtocol {
283
290
additionalResiduals: additionalResiduals
284
291
)
285
292
286
- noise = performGuidance ( noise, config. guidanceScale)
293
+ if config. guidanceScale >= 1.0 {
294
+ noise = performGuidance ( noise, config. guidanceScale)
295
+ }
287
296
288
297
// Have the scheduler compute the previous (t-1) latent
289
298
// sample given the predicted noise and current sample
0 commit comments