Skip to content

Commit d25db35

Browse files
ssh4netlgritz
authored andcommitted
perf: IBA::unsharp_mask() speed and memory optimization (#4513)
Replacing 3x IBA + Helper function that generate 4 fulls size image buffers with single unsharp_mask_impl() that use parallel_image() to compute unsharp: src + contr * (((src - blur) < threshold) ? 0.0 : (src - blur)) Added two pass 1D convolution for a kernels higher than 3x3 ## Tests ``` ImageBuf sharped(input.spec()); const int repeats = 50; std::cout << "Start sharpening\n"; auto start = std::chrono::high_resolution_clock::now(); for (int i = 0; i < repeats; i++) { //ok = ImageBufAlgo::unsharp_mask(sharped, input, "gaussian", 15.0f, 10.0f, 0.01f); ok = ImageBufAlgo::unsharp_mask(sharped, input, "gaussian", 5.0f, 2.0f, 0.05f); std::cout << "."; } std::cout << "\n"; auto part1 = std::chrono::high_resolution_clock::now(); std::chrono::duration<double> elapsed_part1 = part1 - start; std::cout << "Elapsed time: " << elapsed_part1.count() << " s\n"; ``` both single threaded (one IB at time) and multithreaded (multiply IB at time) show pretty good speedup: ~30-40% with less memory use. for 5x5 gaussian kernels two pass mode should add at least 20% speedup. (if someone can do independent benchmark, will be great. As soon as I had a big differences on them depend on real or synthetic use) --------- Signed-off-by: Vlad (Kuzmin) Erium <[email protected]>
1 parent 5427ce1 commit d25db35

File tree

5 files changed

+42
-30
lines changed

5 files changed

+42
-30
lines changed

src/libOpenImageIO/imagebufalgo.cpp

Lines changed: 38 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -947,17 +947,28 @@ ImageBufAlgo::make_kernel(string_view name, float width, float height,
947947

948948

949949

950-
// Helper function for unsharp mask to perform the thresholding
950+
template<class Rtype>
951951
static bool
952-
threshold_to_zero(ImageBuf& dst, float threshold, ROI roi, int nthreads)
952+
unsharp_impl(ImageBuf& dst, const ImageBuf& blr, const ImageBuf& src,
953+
const float contrast, const float threshold, ROI roi, int nthreads)
953954
{
954-
OIIO_DASSERT(dst.spec().format.basetype == TypeDesc::FLOAT);
955+
OIIO_DASSERT(dst.spec().nchannels == src.spec().nchannels
956+
&& dst.spec().nchannels == blr.spec().nchannels);
955957

956958
ImageBufAlgo::parallel_image(roi, nthreads, [&](ROI roi) {
957-
for (ImageBuf::Iterator<float> p(dst, roi); !p.done(); ++p)
958-
for (int c = roi.chbegin; c < roi.chend; ++c)
959-
if (fabsf(p[c]) < threshold)
960-
p[c] = 0.0f;
959+
ImageBuf::ConstIterator<Rtype> s(src, roi);
960+
ImageBuf::ConstIterator<float> b(blr, roi);
961+
for (ImageBuf::Iterator<Rtype> d(dst, roi); !d.done(); ++s, ++d, ++b) {
962+
for (int c = roi.chbegin; c < roi.chend; ++c) {
963+
const float diff = s[c] - b[c];
964+
const float abs_diff = fabsf(diff);
965+
if (abs_diff > threshold) {
966+
d[c] = s[c] + contrast * diff;
967+
} else {
968+
d[c] = s[c];
969+
}
970+
}
971+
}
961972
});
962973
return true;
963974
}
@@ -977,10 +988,26 @@ ImageBufAlgo::unsharp_mask(ImageBuf& dst, const ImageBuf& src,
977988
// Blur the source image, store in Blurry
978989
ImageSpec BlurrySpec = src.spec();
979990
BlurrySpec.set_format(TypeDesc::FLOAT); // force float
991+
ImageBuf fst_pass(BlurrySpec);
980992
ImageBuf Blurry(BlurrySpec);
981993

982994
if (kernel == "median") {
983995
median_filter(Blurry, src, ceilf(width), 0, roi, nthreads);
996+
} else if (width > 3.0) {
997+
ImageBuf K = make_kernel(kernel, 1, width);
998+
ImageBuf Kt = ImageBufAlgo::transpose(K);
999+
if (K.has_error()) {
1000+
dst.errorfmt("{}", K.geterror());
1001+
return false;
1002+
}
1003+
if (!convolve(fst_pass, src, K, true, roi, nthreads)) {
1004+
dst.errorfmt("{}", fst_pass.geterror());
1005+
return false;
1006+
}
1007+
if (!convolve(Blurry, fst_pass, Kt, true, roi, nthreads)) {
1008+
dst.errorfmt("{}", Blurry.geterror());
1009+
return false;
1010+
}
9841011
} else {
9851012
ImageBuf K = make_kernel(kernel, width, width);
9861013
if (K.has_error()) {
@@ -993,25 +1020,10 @@ ImageBufAlgo::unsharp_mask(ImageBuf& dst, const ImageBuf& src,
9931020
}
9941021
}
9951022

996-
// Compute the difference between the source image and the blurry
997-
// version. (We store it in the same buffer we used for the difference
998-
// image.)
999-
ImageBuf& Diff(Blurry);
1000-
bool ok = sub(Diff, src, Blurry, roi, nthreads);
1001-
1002-
if (ok && threshold > 0.0f)
1003-
ok = threshold_to_zero(Diff, threshold, roi, nthreads);
1004-
1005-
// Scale the difference image by the contrast
1006-
if (ok)
1007-
ok = mul(Diff, Diff, contrast, roi, nthreads);
1008-
if (!ok) {
1009-
dst.errorfmt("{}", Diff.geterror());
1010-
return false;
1011-
}
1012-
1013-
// Add the scaled difference to the original, to get the final answer
1014-
ok = add(dst, src, Diff, roi, nthreads);
1023+
bool ok;
1024+
OIIO_DISPATCH_COMMON_TYPES(ok, "unsharp_mask", unsharp_impl,
1025+
dst.spec().format, dst, Blurry, src, contrast,
1026+
threshold, roi, nthreads);
10151027

10161028
return ok;
10171029
}

testsuite/docs-examples-cpp/ref/out-arm.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ checker_with_alpha_filled.exr : 256 x 256, 4 channel, half openexr
137137
tahoe_median_filter.tif : 512 x 384, 3 channel, uint8 tiff
138138
SHA-1: A0B2E3A10A16EA8CC905F144C5F91B6A0964A177
139139
tahoe_unsharp_mask.tif : 512 x 384, 3 channel, uint8 tiff
140-
SHA-1: CDE3FAC8053381C59B7BEB3B47991F357E14D9D2
140+
SHA-1: 5842D16483BC74700DE9FD27967B2FFBD54DFCD2
141141
Comparing "simple.tif" and "ref/simple.tif"
142142
PASS
143143
Comparing "scanlines.tif" and "ref/scanlines.tif"

testsuite/docs-examples-cpp/ref/out.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ checker_with_alpha_filled.exr : 256 x 256, 4 channel, half openexr
137137
tahoe_median_filter.tif : 512 x 384, 3 channel, uint8 tiff
138138
SHA-1: A0B2E3A10A16EA8CC905F144C5F91B6A0964A177
139139
tahoe_unsharp_mask.tif : 512 x 384, 3 channel, uint8 tiff
140-
SHA-1: D3B56074F48EC5D3ADDA4BDE1F487192ABE9BA76
140+
SHA-1: C1C9C843D45D90B7C0BBD7BCDB7A11814668FC6D
141141
Comparing "simple.tif" and "ref/simple.tif"
142142
PASS
143143
Comparing "scanlines.tif" and "ref/scanlines.tif"

testsuite/docs-examples-python/ref/out-arm.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ checker_with_alpha_filled.exr : 256 x 256, 4 channel, half openexr
137137
tahoe_median_filter.tif : 512 x 384, 3 channel, uint8 tiff
138138
SHA-1: A0B2E3A10A16EA8CC905F144C5F91B6A0964A177
139139
tahoe_unsharp_mask.tif : 512 x 384, 3 channel, uint8 tiff
140-
SHA-1: CDE3FAC8053381C59B7BEB3B47991F357E14D9D2
140+
SHA-1: 5842D16483BC74700DE9FD27967B2FFBD54DFCD2
141141
Comparing "simple.tif" and "../docs-examples-cpp/ref/simple.tif"
142142
PASS
143143
Comparing "scanlines.tif" and "../docs-examples-cpp/ref/scanlines.tif"

testsuite/docs-examples-python/ref/out.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ checker_with_alpha_filled.exr : 256 x 256, 4 channel, half openexr
137137
tahoe_median_filter.tif : 512 x 384, 3 channel, uint8 tiff
138138
SHA-1: A0B2E3A10A16EA8CC905F144C5F91B6A0964A177
139139
tahoe_unsharp_mask.tif : 512 x 384, 3 channel, uint8 tiff
140-
SHA-1: D3B56074F48EC5D3ADDA4BDE1F487192ABE9BA76
140+
SHA-1: C1C9C843D45D90B7C0BBD7BCDB7A11814668FC6D
141141
Comparing "simple.tif" and "../docs-examples-cpp/ref/simple.tif"
142142
PASS
143143
Comparing "scanlines.tif" and "../docs-examples-cpp/ref/scanlines.tif"

0 commit comments

Comments
 (0)