Skip to content

Commit b30b3be

Browse files
authored
Non-Metal based perceptual image comparison (#666)
* Improve the speed of comparing memory buffers by using a workaround to a missed compiler optimization * Remove Metal usage since some virtualized environments don’t support it Replaces the CoreImage operations that require Metal with CPU-based calculations * Re-add the Metal-based image comparison Check for support before using and fallback to CPU computation if Metal is not supported * Update logic to determine if a Metal kernel is supported on the device * Use the maintainers preferred method of using a while loop
1 parent 59b663f commit b30b3be

File tree

1 file changed

+129
-41
lines changed

1 file changed

+129
-41
lines changed

Sources/SnapshotTesting/Snapshotting/UIImage.swift

Lines changed: 129 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -192,57 +192,132 @@
192192
#endif
193193

194194
#if os(iOS) || os(tvOS) || os(macOS)
195+
import Accelerate.vImage
195196
import CoreImage.CIKernel
196197
import MetalPerformanceShaders
197198

198199
@available(iOS 10.0, tvOS 10.0, macOS 10.13, *)
199200
func perceptuallyCompare(
200201
_ old: CIImage, _ new: CIImage, pixelPrecision: Float, perceptualPrecision: Float
201202
) -> String? {
202-
let deltaOutputImage = old.applyingFilter("CILabDeltaE", parameters: ["inputImage2": new])
203-
let thresholdOutputImage: CIImage
204-
do {
205-
thresholdOutputImage = try ThresholdImageProcessorKernel.apply(
206-
withExtent: new.extent,
207-
inputs: [deltaOutputImage],
208-
arguments: [
209-
ThresholdImageProcessorKernel.inputThresholdKey: (1 - perceptualPrecision) * 100
210-
]
211-
)
212-
} catch {
213-
return "Newly-taken snapshot's data could not be loaded. \(error)"
214-
}
215-
var averagePixel: Float = 0
203+
// Calculate the deltaE values. Each pixel is a value between 0-100.
204+
// 0 means no difference, 100 means completely opposite.
205+
let deltaOutputImage = old.applyingLabDeltaE(new)
206+
// Setting the working color space and output color space to NSNull disables color management. This is appropriate when the output
207+
// of the operations is computational instead of an image intended to be displayed.
216208
let context = CIContext(options: [.workingColorSpace: NSNull(), .outputColorSpace: NSNull()])
217-
context.render(
218-
thresholdOutputImage.applyingFilter(
219-
"CIAreaAverage", parameters: [kCIInputExtentKey: new.extent]),
220-
toBitmap: &averagePixel,
221-
rowBytes: MemoryLayout<Float>.size,
222-
bounds: CGRect(x: 0, y: 0, width: 1, height: 1),
223-
format: .Rf,
224-
colorSpace: nil
225-
)
226-
let actualPixelPrecision = 1 - averagePixel
227-
guard actualPixelPrecision < pixelPrecision else { return nil }
209+
let deltaThreshold = (1 - perceptualPrecision) * 100
210+
let actualPixelPrecision: Float
228211
var maximumDeltaE: Float = 0
229-
context.render(
230-
deltaOutputImage.applyingFilter("CIAreaMaximum", parameters: [kCIInputExtentKey: new.extent]),
231-
toBitmap: &maximumDeltaE,
232-
rowBytes: MemoryLayout<Float>.size,
233-
bounds: CGRect(x: 0, y: 0, width: 1, height: 1),
234-
format: .Rf,
235-
colorSpace: nil
236-
)
237-
let actualPerceptualPrecision = 1 - maximumDeltaE / 100
238-
if pixelPrecision < 1 {
239-
return """
240-
Actual image precision \(actualPixelPrecision) is less than required \(pixelPrecision)
241-
Actual perceptual precision \(actualPerceptualPrecision) is less than required \(perceptualPrecision)
242-
"""
212+
213+
// Metal is supported by all iOS/tvOS devices (2013 models or later) and Macs (2012 models or later).
214+
// Older devices do not support iOS/tvOS 13 and macOS 10.15 which are the minimum versions of swift-snapshot-testing.
215+
// However, some virtualized hardware do not have GPUs and therefore do not support Metal.
216+
// In this case, macOS falls back to a CPU-based OpenGL ES renderer that silently fails when a Metal command is issued.
217+
// We need to check for Metal device support and fallback to CPU based vImage buffer iteration.
218+
if ThresholdImageProcessorKernel.isSupported {
219+
// Fast path - Metal processing
220+
guard
221+
let thresholdOutputImage = try? deltaOutputImage.applyingThreshold(deltaThreshold),
222+
let averagePixel = thresholdOutputImage.applyingAreaAverage().renderSingleValue(in: context)
223+
else {
224+
return "Newly-taken snapshot's data could not be processed."
225+
}
226+
actualPixelPrecision = 1 - averagePixel
227+
if actualPixelPrecision < pixelPrecision {
228+
maximumDeltaE = deltaOutputImage.applyingAreaMaximum().renderSingleValue(in: context) ?? 0
229+
}
243230
} else {
244-
return
245-
"Actual perceptual precision \(actualPerceptualPrecision) is less than required \(perceptualPrecision)"
231+
// Slow path - CPU based vImage buffer iteration
232+
guard let buffer = deltaOutputImage.render(in: context) else {
233+
return "Newly-taken snapshot could not be processed."
234+
}
235+
defer { buffer.free() }
236+
var failingPixelCount: Int = 0
237+
// rowBytes must be a multiple of 8, so vImage_Buffer pads the end of each row with bytes to meet the multiple of 0 requirement.
238+
// We must do 2D iteration of the vImage_Buffer in order to avoid loading the padding garbage bytes at the end of each row.
239+
//
240+
// NB: We are purposely using a verbose 'while' loop instead of a 'for in' loop. When the
241+
// compiler doesn't have optimizations enabled, like in test targets, a `while` loop is
242+
// significantly faster than a `for` loop for iterating through the elements of a memory
243+
// buffer. Details can be found in [SR-6983](https://github.com/apple/swift/issues/49531)
244+
let componentStride = MemoryLayout<Float>.stride
245+
var line = 0
246+
while line < buffer.height {
247+
defer { line += 1 }
248+
let lineOffset = buffer.rowBytes * line
249+
var column = 0
250+
while column < buffer.width {
251+
defer { column += 1 }
252+
let byteOffset = lineOffset + column * componentStride
253+
let deltaE = buffer.data.load(fromByteOffset: byteOffset, as: Float.self)
254+
if deltaE > deltaThreshold {
255+
failingPixelCount += 1
256+
if deltaE > maximumDeltaE {
257+
maximumDeltaE = deltaE
258+
}
259+
}
260+
}
261+
}
262+
let failingPixelPercent = Float(failingPixelCount) / Float(deltaOutputImage.extent.width * deltaOutputImage.extent.height)
263+
actualPixelPrecision = 1 - failingPixelPercent
264+
}
265+
266+
guard actualPixelPrecision < pixelPrecision else { return nil }
267+
// The actual perceptual precision is the perceptual precision of the pixel with the highest DeltaE.
268+
// DeltaE is in a 0-100 scale, so we need to divide by 100 to transform it to a percentage.
269+
let minimumPerceptualPrecision = 1 - min(maximumDeltaE / 100, 1)
270+
return """
271+
The percentage of pixels that match \(actualPixelPrecision) is less than required \(pixelPrecision)
272+
The lowest perceptual color precision \(minimumPerceptualPrecision) is less than required \(perceptualPrecision)
273+
"""
274+
}
275+
276+
extension CIImage {
277+
func applyingLabDeltaE(_ other: CIImage) -> CIImage {
278+
applyingFilter("CILabDeltaE", parameters: ["inputImage2": other])
279+
}
280+
281+
func applyingThreshold(_ threshold: Float) throws -> CIImage {
282+
try ThresholdImageProcessorKernel.apply(
283+
withExtent: extent,
284+
inputs: [self],
285+
arguments: [ThresholdImageProcessorKernel.inputThresholdKey: threshold]
286+
)
287+
}
288+
289+
func applyingAreaAverage() -> CIImage {
290+
applyingFilter("CIAreaAverage", parameters: [kCIInputExtentKey: extent])
291+
}
292+
293+
func applyingAreaMaximum() -> CIImage {
294+
applyingFilter("CIAreaMaximum", parameters: [kCIInputExtentKey: extent])
295+
}
296+
297+
func renderSingleValue(in context: CIContext) -> Float? {
298+
guard let buffer = render(in: context) else { return nil }
299+
defer { buffer.free() }
300+
return buffer.data.load(fromByteOffset: 0, as: Float.self)
301+
}
302+
303+
func render(in context: CIContext, format: CIFormat = CIFormat.Rh) -> vImage_Buffer? {
304+
// Some hardware configurations (virtualized CPU renderers) do not support 32-bit float output formats,
305+
// so use a compatible 16-bit float format and convert the output value to 32-bit floats.
306+
guard var buffer16 = try? vImage_Buffer(width: Int(extent.width), height: Int(extent.height), bitsPerPixel: 16) else { return nil }
307+
defer { buffer16.free() }
308+
context.render(
309+
self,
310+
toBitmap: buffer16.data,
311+
rowBytes: buffer16.rowBytes,
312+
bounds: extent,
313+
format: format,
314+
colorSpace: nil
315+
)
316+
guard
317+
var buffer32 = try? vImage_Buffer(width: Int(buffer16.width), height: Int(buffer16.height), bitsPerPixel: 32),
318+
vImageConvert_Planar16FtoPlanarF(&buffer16, &buffer32, 0) == kvImageNoError
319+
else { return nil }
320+
return buffer32
246321
}
247322
}
248323

@@ -252,6 +327,19 @@
252327
static let inputThresholdKey = "thresholdValue"
253328
static let device = MTLCreateSystemDefaultDevice()
254329

330+
static var isSupported: Bool {
331+
guard let device = device else {
332+
return false
333+
}
334+
#if targetEnvironment(simulator)
335+
guard #available(iOS 14.0, tvOS 14.0, *) else {
336+
// The MPSSupportsMTLDevice method throws an exception on iOS/tvOS simulators < 14.0
337+
return false
338+
}
339+
#endif
340+
return MPSSupportsMTLDevice(device)
341+
}
342+
255343
override class func process(
256344
with inputs: [CIImageProcessorInput]?, arguments: [String: Any]?,
257345
output: CIImageProcessorOutput

0 commit comments

Comments
 (0)