Skip to content

Commit fd663c4

Browse files
committed
Blur: Improve parallelization
1 parent 7868157 commit fd663c4

File tree

1 file changed

+57
-118
lines changed

1 file changed

+57
-118
lines changed

src/effects/Blur.cpp

Lines changed: 57 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -77,154 +77,93 @@ std::shared_ptr<Frame> Blur::GetFrame(std::shared_ptr<Frame> frame, int64_t fram
7777
int w = frame_image->width();
7878
int h = frame_image->height();
7979

80-
// Declare 2-column arrays for each color channel
81-
typedef struct {
82-
unsigned char *red;
83-
unsigned char *green;
84-
unsigned char *blue;
85-
unsigned char *alpha;
86-
} channels;
87-
88-
channels arrays_in {
89-
new unsigned char[w * h](),
90-
new unsigned char[w * h](),
91-
new unsigned char[w * h](),
92-
new unsigned char[w * h]()
93-
};
94-
channels arrays_out {
95-
new unsigned char[w * h](),
96-
new unsigned char[w * h](),
97-
new unsigned char[w * h](),
98-
new unsigned char[w * h]()
99-
};
100-
101-
// Loop through pixels and split RGBA channels into separate arrays
102-
unsigned char *pixels = (unsigned char *) frame_image->bits();
103-
104-
#pragma omp parallel for
105-
for (int pixel = 0; pixel < w * h; ++pixel)
106-
{
107-
// Get the RGBA values from each pixel
108-
arrays_in.red[pixel] = arrays_out.red[pixel] = pixels[pixel * 4];
109-
arrays_in.green[pixel] = arrays_out.green[pixel] = pixels[pixel * 4 + 1];
110-
arrays_in.blue[pixel] = arrays_out.blue[pixel] = pixels[pixel * 4 + 2];
111-
arrays_in.alpha[pixel] = arrays_out.alpha[pixel] = pixels[pixel * 4 + 3];
112-
}
113-
114-
// Initialize target struct pointers for boxBlur operations
115-
channels *array_a = &arrays_in;
116-
channels *array_b = &arrays_out;
80+
// Grab two copies of the image pixel data
81+
QImage image_copy = frame_image->copy();
82+
std::shared_ptr<QImage> frame_image_2 = std::make_shared<QImage>(image_copy);
11783

11884
// Loop through each iteration
11985
for (int iteration = 0; iteration < iteration_value; ++iteration)
12086
{
12187
// HORIZONTAL BLUR (if any)
12288
if (horizontal_radius_value > 0.0) {
12389
// Apply horizontal blur to target RGBA channels
124-
#pragma omp parallel
125-
{
126-
boxBlurH(array_a->red, array_b->red, w, h, horizontal_radius_value);
127-
boxBlurH(array_a->green, array_b->green, w, h, horizontal_radius_value);
128-
boxBlurH(array_a->blue, array_b->blue, w, h, horizontal_radius_value);
129-
boxBlurH(array_a->alpha, array_b->alpha, w, h, horizontal_radius_value);
130-
}
131-
132-
// Swap input and output arrays
133-
channels *temp = array_a;
134-
array_a = array_b;
135-
array_b = temp;
90+
boxBlurH(frame_image->bits(), frame_image_2->bits(), w, h, horizontal_radius_value);
91+
92+
// Swap output image back to input
93+
frame_image.swap(frame_image_2);
13694
}
13795

13896
// VERTICAL BLUR (if any)
13997
if (vertical_radius_value > 0.0) {
14098
// Apply vertical blur to target RGBA channels
141-
#pragma omp parallel
142-
{
143-
boxBlurT(array_a->red, array_b->red, w, h, vertical_radius_value);
144-
boxBlurT(array_a->green, array_b->green, w, h, vertical_radius_value);
145-
boxBlurT(array_a->blue, array_b->blue, w, h, vertical_radius_value);
146-
boxBlurT(array_a->alpha, array_b->alpha, w, h, vertical_radius_value);
147-
}
99+
boxBlurT(frame_image->bits(), frame_image_2->bits(), w, h, vertical_radius_value);
148100

149-
// Swap input and output arrays
150-
channels *temp = array_a;
151-
array_a = array_b;
152-
array_b = temp;
101+
// Swap output image back to input
102+
frame_image.swap(frame_image_2);
153103
}
154104
}
155105

156-
// Copy RGBA channels back to original image
157-
#pragma omp parallel for
158-
for (int pixel = 0; pixel < w * h; ++pixel)
159-
{
160-
// Combine channels
161-
pixels[pixel * 4] = array_b->red[pixel];
162-
pixels[pixel * 4 + 1] = array_b->green[pixel];
163-
pixels[pixel * 4 + 2] = array_b->blue[pixel];
164-
pixels[pixel * 4 + 3] = array_b->alpha[pixel];
165-
}
166-
167-
// Delete channel arrays
168-
delete[] arrays_in.red;
169-
delete[] arrays_in.green;
170-
delete[] arrays_in.blue;
171-
delete[] arrays_in.alpha;
172-
173-
delete[] arrays_out.red;
174-
delete[] arrays_out.green;
175-
delete[] arrays_out.blue;
176-
delete[] arrays_out.alpha;
177-
178106
// return the modified frame
179107
return frame;
180108
}
181109

182110
// Credit: http://blog.ivank.net/fastest-gaussian-blur.html (MIT License)
111+
// Modified to process all four channels in a pixel array
183112
void Blur::boxBlurH(unsigned char *scl, unsigned char *tcl, int w, int h, int r) {
184113
float iarr = 1.0 / (r + r + 1);
185-
for (int i = 0; i < h; i++) {
186-
int ti = i * w, li = ti, ri = ti + r;
187-
int fv = scl[ti], lv = scl[ti + w - 1], val = (r + 1) * fv;
188-
for (int j = 0; j < r; j++) val += scl[ti + j];
189-
for (int j = 0; j <= r; j++) {
190-
val += scl[ri++] - fv;
191-
tcl[ti++] = round(val * iarr);
192-
}
193-
for (int j = r + 1; j < w - r; j++) {
194-
val += scl[ri++] - scl[li++];
195-
tcl[ti++] = round(val * iarr);
196-
}
197-
for (int j = w - r; j < w; j++) {
198-
val += lv - scl[li++];
199-
tcl[ti++] = round(val * iarr);
114+
115+
#pragma omp parallel for shared (scl, tcl)
116+
for (int i = 0; i < h; ++i) {
117+
for (int ch = 0; ch < 4; ++ch) {
118+
int ti = i * w, li = ti, ri = ti + r;
119+
int fv = scl[ti * 4 + ch], lv = scl[(ti + w - 1) * 4 + ch], val = (r + 1) * fv;
120+
for (int j = 0; j < r; ++j) {
121+
val += scl[(ti + j) * 4 + ch];
122+
}
123+
for (int j = 0; j <= r; ++j) {
124+
val += scl[ri++ * 4 + ch] - fv;
125+
tcl[ti++ * 4 + ch] = round(val * iarr);
126+
}
127+
for (int j = r + 1; j < w - r; ++j) {
128+
val += scl[ri++ * 4 + ch] - scl[li++ * 4 + ch];
129+
tcl[ti++ * 4 + ch] = round(val * iarr);
130+
}
131+
for (int j = w - r; j < w; ++j) {
132+
val += lv - scl[li++ * 4 + ch];
133+
tcl[ti++ * 4 + ch] = round(val * iarr);
134+
}
200135
}
201136
}
202137
}
203138

204139
void Blur::boxBlurT(unsigned char *scl, unsigned char *tcl, int w, int h, int r) {
205140
float iarr = 1.0 / (r + r + 1);
141+
142+
#pragma omp parallel for shared (scl, tcl)
206143
for (int i = 0; i < w; i++) {
207-
int ti = i, li = ti, ri = ti + r * w;
208-
int fv = scl[ti], lv = scl[ti + w * (h - 1)], val = (r + 1) * fv;
209-
for (int j = 0; j < r; j++) val += scl[ti + j * w];
210-
for (int j = 0; j <= r; j++) {
211-
val += scl[ri] - fv;
212-
tcl[ti] = round(val * iarr);
213-
ri += w;
214-
ti += w;
215-
}
216-
for (int j = r + 1; j < h - r; j++) {
217-
val += scl[ri] - scl[li];
218-
tcl[ti] = round(val * iarr);
219-
li += w;
220-
ri += w;
221-
ti += w;
222-
}
223-
for (int j = h - r; j < h; j++) {
224-
val += lv - scl[li];
225-
tcl[ti] = round(val * iarr);
226-
li += w;
227-
ti += w;
144+
for (int ch = 0; ch < 4; ++ch) {
145+
int ti = i, li = ti, ri = ti + r * w;
146+
int fv = scl[ti * 4 + ch], lv = scl[(ti + w * (h - 1)) * 4 + ch], val = (r + 1) * fv;
147+
for (int j = 0; j < r; j++) val += scl[(ti + j * w) * 4 + ch];
148+
for (int j = 0; j <= r; j++) {
149+
val += scl[ri * 4 + ch] - fv;
150+
tcl[ti * 4 + ch] = round(val * iarr);
151+
ri += w;
152+
ti += w;
153+
}
154+
for (int j = r + 1; j < h - r; j++) {
155+
val += scl[ri * 4 + ch] - scl[li * 4 + ch];
156+
tcl[ti * 4 + ch] = round(val * iarr);
157+
li += w;
158+
ri += w;
159+
ti += w;
160+
}
161+
for (int j = h - r; j < h; j++) {
162+
val += lv - scl[li * 4 + ch];
163+
tcl[ti * 4 + ch] = round(val * iarr);
164+
li += w;
165+
ti += w;
166+
}
228167
}
229168
}
230169
}

0 commit comments

Comments
 (0)