Skip to content

Commit 1793d3a

Browse files
committed
Merge branch 'fix_varios_highlights'
* fix_varios_highlights: Make use of standard dt opencl kernel calling Fix laplacian highlights reconstruction Reintroduce demosaic snapping for stability
2 parents 1492ecd + 1ee1ada commit 1793d3a

File tree

4 files changed

+52
-45
lines changed

4 files changed

+52
-45
lines changed

data/kernels/basic.cl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -833,15 +833,15 @@ interpolate_and_mask(read_only image2d_t input,
833833
const size_t j_prev = (j - 1);
834834
const size_t j_next = (j + 1);
835835

836-
const float north = read_imagef(input, samplerA, (int2)(j, i_prev)).x;
837-
const float south = read_imagef(input, samplerA, (int2)(j, i_next)).x;
838-
const float west = read_imagef(input, samplerA, (int2)(j_prev, i)).x;
839-
const float east = read_imagef(input, samplerA, (int2)(j_next, i)).x;
836+
const float north = read_imagef(input, sampleri, (int2)(j, i_prev)).x;
837+
const float south = read_imagef(input, sampleri, (int2)(j, i_next)).x;
838+
const float west = read_imagef(input, sampleri, (int2)(j_prev, i)).x;
839+
const float east = read_imagef(input, sampleri, (int2)(j_next, i)).x;
840840

841-
const float north_east = read_imagef(input, samplerA, (int2)(j_next, i_prev)).x;
842-
const float north_west = read_imagef(input, samplerA, (int2)(j_prev, i_prev)).x;
843-
const float south_east = read_imagef(input, samplerA, (int2)(j_next, i_next)).x;
844-
const float south_west = read_imagef(input, samplerA, (int2)(j_prev, i_next)).x;
841+
const float north_east = read_imagef(input, sampleri, (int2)(j_next, i_prev)).x;
842+
const float north_west = read_imagef(input, sampleri, (int2)(j_prev, i_prev)).x;
843+
const float south_east = read_imagef(input, sampleri, (int2)(j_next, i_next)).x;
844+
const float south_west = read_imagef(input, sampleri, (int2)(j_prev, i_next)).x;
845845

846846
if(c == GREEN) // green pixel
847847
{
@@ -913,8 +913,8 @@ interpolate_and_mask(read_only image2d_t input,
913913
}
914914
}
915915

916-
float4 RGB = {R, G, B, dtcl_sqrt(R * R + G * G + B * B) };
917-
float4 clipped = { R_clipped, G_clipped, B_clipped, (R_clipped || G_clipped || B_clipped) };
916+
const float4 RGB = {R, G, B, dtcl_sqrt(R * R + G * G + B * B) };
917+
const float4 clipped = { (float)R_clipped, (float)G_clipped, (float)B_clipped, (R_clipped || G_clipped || B_clipped) ? 1.0f : 0.0f };
918918
const float4 WB4 = { wb[0], wb[1], wb[2], wb[3] };
919919
write_imagef(interpolated, (int2)(j, i), RGB / WB4);
920920
write_imagef(clipping_mask, (int2)(j, i), clipped);
@@ -940,7 +940,7 @@ remosaic_and_replace(read_only image2d_t input,
940940
const int c = FC(i, j, filters);
941941
const float4 center = read_imagef(interpolated, sampleri, (int2)(j, i));
942942
float *rgb = (float *)&center;
943-
const float opacity = read_imagef(clipping_mask, sampleri, (int2)(j, i)).w;
943+
const float opacity = clipf(read_imagef(clipping_mask, sampleri, (int2)(j, i)).w);
944944
const float4 pix_in = read_imagef(input, sampleri, (int2)(j, i));
945945
const float4 pix_out = opacity * fmax(rgb[c] * wb[c], 0.f) + (1.f - opacity) * pix_in;
946946
write_imagef(output, (int2)(j, i), pix_out);
@@ -964,7 +964,7 @@ box_blur_5x5(read_only image2d_t in,
964964
{
965965
const int row = clamp(y + ii, 0, height - 1);
966966
const int col = clamp(x + jj, 0, width - 1);
967-
acc += read_imagef(in, samplerA, (int2)(col, row)) / 25.f;
967+
acc += fmax(0.0f, read_imagef(in, samplerA, (int2)(col, row))) / 25.f;
968968
}
969969

970970
write_imagef(out, (int2)(x, y), acc);

src/iop/demosaic.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -515,14 +515,23 @@ void modify_roi_out(dt_iop_module_t *self,
515515
roi_out->y = 0;
516516
}
517517

518+
static inline int _snap_to_cfa(const int p, const uint32_t filters)
519+
{
520+
const int snap = !filters ? 1 : filters != 9u ? 2 : 3;
521+
return (p / snap) * snap;
522+
}
523+
518524
void modify_roi_in(dt_iop_module_t *self,
519525
dt_dev_pixelpipe_iop_t *piece,
520526
const dt_iop_roi_t *roi_out,
521527
dt_iop_roi_t *roi_in)
522528
{
523529
*roi_in = *roi_out;
524-
roi_in->x = MAX(0, roi_in->x / roi_out->scale);
525-
roi_in->y = MAX(0, roi_in->y / roi_out->scale);
530+
// always set position to closest top/left sensor pattern snap
531+
const uint32_t filters = piece->pipe->dsc.filters;
532+
roi_in->x = MAX(0, _snap_to_cfa(roi_in->x / roi_out->scale, filters));
533+
roi_in->y = MAX(0, _snap_to_cfa(roi_in->y / roi_out->scale, filters));
534+
526535
roi_in->width = MAX(8, roi_in->width / roi_out->scale);
527536
roi_in->height = MAX(8, roi_in->height / roi_out->scale);
528537
roi_in->scale = 1.0f;

src/iop/highlights.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -583,12 +583,14 @@ int process_cl(dt_iop_module_t *self,
583583

584584
dev_xtrans = dt_opencl_copy_host_to_device_constant(devid, sizeof(pipe->dsc.xtrans), pipe->dsc.xtrans);
585585
if(dev_xtrans == NULL) goto finish;
586+
const int dy = roi_out->y - roi_in->y;
587+
const int dx = roi_out->x - roi_in->x;
586588

587589
err = dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_highlights_false_color, roi_out->width, roi_out->height,
588590
CLARG(dev_in), CLARG(dev_out),
589591
CLARG(roi_out->width), CLARG(roi_out->height),
590592
CLARG(roi_in->width), CLARG(roi_in->height),
591-
CLARG(roi_out->x), CLARG(roi_out->y),
593+
CLARG(dx), CLARG(dy),
592594
CLARG(filters), CLARG(dev_xtrans),
593595
CLARG(dev_clips));
594596
announce = FALSE;

src/iop/hlreconstruct/laplacian.c

Lines changed: 26 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,8 @@ static void _interpolate_and_mask(const float *const restrict input,
164164
}
165165
}
166166

167-
dt_aligned_pixel_t RGB = { R, G, B, sqrtf(sqf(R) + sqf(G) + sqf(B)) };
168-
dt_aligned_pixel_t clipped = { R_clipped, G_clipped, B_clipped, (R_clipped || G_clipped || B_clipped) };
167+
const dt_aligned_pixel_t RGB = { R, G, B, sqrtf(sqf(R) + sqf(G) + sqf(B)) };
168+
const dt_aligned_pixel_t clipped = { (float)R_clipped, (float)G_clipped, (float)B_clipped, (R_clipped || G_clipped || B_clipped) ? 1.0f : 0.0f };
169169

170170
for_each_channel(k, aligned(RGB, interpolated, clipping_mask, clipped, wb))
171171
{
@@ -193,7 +193,7 @@ static void _remosaic_and_replace(const float *const restrict input,
193193
const size_t c = FC(i, j, filters);
194194
const size_t idx = i * width + j;
195195
const size_t index = idx * 4;
196-
const float opacity = clipping_mask[index + ALPHA];
196+
const float opacity = CLIP(clipping_mask[index + ALPHA]);
197197
output[idx] = opacity * fmaxf(interpolated[index + c] * wb[c], 0.f)
198198
+ (1.f - opacity) * input[idx];
199199
}
@@ -689,7 +689,6 @@ static inline cl_int wavelets_process_cl(const int devid,
689689
cl_mem in,
690690
cl_mem reconstructed,
691691
cl_mem clipping_mask,
692-
const size_t sizes[3],
693692
const int width,
694693
const int height,
695694
dt_iop_highlights_global_data_t *const gd,
@@ -702,7 +701,7 @@ static inline cl_int wavelets_process_cl(const int devid,
702701
const int salt,
703702
const float solid_color)
704703
{
705-
cl_int err = DT_OPENCL_DEFAULT_ERROR;
704+
cl_int err = CL_SUCCESS;
706705

707706
// À trous wavelet decompose
708707
// there is a paper from a guy we know that explains it : https://jo.dreggn.org/home/2010_atrous.pdf
@@ -732,21 +731,18 @@ static inline cl_int wavelets_process_cl(const int devid,
732731
}
733732

734733
// Compute wavelets low-frequency scales
735-
dt_opencl_set_kernel_args(devid, gd->kernel_filmic_bspline_horizontal, 0,
736-
CLARG(buffer_in), CLARG(HF), CLARG(width), CLARG(height), CLARG(mult));
737-
err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_filmic_bspline_horizontal, sizes);
734+
err = dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_filmic_bspline_horizontal, width, height,
735+
CLARG(buffer_in), CLARG(HF), CLARG(width), CLARG(height), CLARG(mult));
738736
if(err != CL_SUCCESS) return err;
739737

740-
dt_opencl_set_kernel_args(devid, gd->kernel_filmic_bspline_vertical, 0,
741-
CLARG(HF), CLARG(buffer_out), CLARG(width), CLARG(height), CLARG(mult));
742-
err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_filmic_bspline_vertical, sizes);
738+
err = dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_filmic_bspline_vertical, width, height,
739+
CLARG(HF), CLARG(buffer_out), CLARG(width), CLARG(height), CLARG(mult));
743740
if(err != CL_SUCCESS) return err;
744741

745742
// Compute wavelets high-frequency scales and backup the maximum of texture over the RGB channels
746743
// Note : HF = detail - LF
747-
dt_opencl_set_kernel_args(devid, gd->kernel_filmic_wavelets_detail, 0,
748-
CLARG(buffer_in), CLARG(buffer_out), CLARG(HF), CLARG(width), CLARG(height));
749-
err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_filmic_wavelets_detail, sizes);
744+
err = dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_filmic_wavelets_detail, width, height,
745+
CLARG(buffer_in), CLARG(buffer_out), CLARG(HF), CLARG(width), CLARG(height));
750746
if(err != CL_SUCCESS) return err;
751747

752748
unsigned int current_scale_type = scale_type(s, scales);
@@ -755,22 +751,20 @@ static inline cl_int wavelets_process_cl(const int devid,
755751
// Compute wavelets low-frequency scales
756752
if(variant == DIFFUSE_RECONSTRUCT_RGB)
757753
{
758-
dt_opencl_set_kernel_args(devid, gd->kernel_highlights_guide_laplacians, 0,
754+
err = dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_highlights_guide_laplacians, width, height,
759755
CLARG(HF), CLARG(buffer_out), CLARG(clipping_mask),
760756
CLARG(reconstructed), // read-only
761757
CLARG(reconstructed), // write-only
762758
CLARG(width), CLARG(height), CLARG(mult), CLARG(noise_level), CLARG(salt), CLARG(current_scale_type), CLARG(radius));
763-
err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_highlights_guide_laplacians, sizes);
764759
if(err != CL_SUCCESS) return err;
765760
}
766761
else // DIFFUSE_RECONSTRUCT_CHROMA
767762
{
768-
dt_opencl_set_kernel_args(devid, gd->kernel_highlights_diffuse_color, 0,
763+
err = dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_highlights_diffuse_color, width, height,
769764
CLARG(HF), CLARG(buffer_out), CLARG(clipping_mask),
770765
CLARG(reconstructed), // read-only
771766
CLARG(reconstructed), // write-only
772767
CLARG(width), CLARG(height), CLARG(mult), CLARG(current_scale_type), CLARG(solid_color));
773-
err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_highlights_diffuse_color, sizes);
774768
if(err != CL_SUCCESS) return err;
775769
}
776770
}
@@ -789,7 +783,7 @@ static cl_int process_laplacian_bayer_cl(dt_iop_module_t *self,
789783
dt_iop_highlights_data_t *data = piece->data;
790784
dt_iop_highlights_global_data_t *gd = self->global_data;
791785

792-
cl_int err = DT_OPENCL_DEFAULT_ERROR;
786+
cl_int err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
793787

794788
const int devid = piece->pipe->devid;
795789
const int width = roi_in->width;
@@ -798,8 +792,8 @@ static cl_int process_laplacian_bayer_cl(dt_iop_module_t *self,
798792
const int ds_height = height / DS_FACTOR;
799793
const int ds_width = width / DS_FACTOR;
800794

801-
size_t sizes[] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid), 1 };
802-
size_t ds_sizes[] = { ROUNDUPDWD(ds_width, devid), ROUNDUPDHT(ds_height, devid), 1 };
795+
const size_t sizes[2] = { ROUNDUPDWD(width, devid), ROUNDUPDHT(height, devid) };
796+
const size_t ds_sizes[2] = { ROUNDUPDWD(ds_width, devid), ROUNDUPDHT(ds_height, devid) };
803797

804798
const uint32_t filters = piece->pipe->dsc.filters;
805799

@@ -811,6 +805,11 @@ static cl_int process_laplacian_bayer_cl(dt_iop_module_t *self,
811805
wb[2] = piece->pipe->dsc.temperature.coeffs[2];
812806
}
813807

808+
const float scale = fmaxf(DS_FACTOR * piece->iscale / (roi_in->scale), 1.f);
809+
const float final_radius = (float)((int)(1 << data->scales)) / scale;
810+
const int scales = CLAMP((int)ceilf(log2f(final_radius)), 1, MAX_NUM_SCALES);
811+
const float noise_level = data->noise_level / scale;
812+
814813
cl_mem interpolated = dt_opencl_alloc_device(devid, sizes[0], sizes[1], sizeof(float) * 4); // [R, G, B, norm] for each pixel
815814
cl_mem clipping_mask = dt_opencl_alloc_device(devid, sizes[0], sizes[1], sizeof(float) * 4); // [R, G, B, norm] for each pixel
816815

@@ -819,24 +818,20 @@ static cl_int process_laplacian_bayer_cl(dt_iop_module_t *self,
819818
cl_mem LF_even = dt_opencl_alloc_device(devid, ds_sizes[0], ds_sizes[1], sizeof(float) * 4);
820819
cl_mem temp = dt_opencl_alloc_device(devid, sizes[0], sizes[1], sizeof(float) * 4); // need full size here for blurring
821820

822-
const float scale = fmaxf(DS_FACTOR * piece->iscale / (roi_in->scale), 1.f);
823-
const float final_radius = (float)((int)(1 << data->scales)) / scale;
824-
const int scales = CLAMP((int)ceilf(log2f(final_radius)), 1, MAX_NUM_SCALES);
825-
826-
const float noise_level = data->noise_level / scale;
827-
828821
// wavelets scales buffers
829822
cl_mem HF = dt_opencl_alloc_device(devid, ds_sizes[0], ds_sizes[1], sizeof(float) * 4);
830823
cl_mem ds_interpolated = dt_opencl_alloc_device(devid, ds_sizes[0], ds_sizes[1], sizeof(float) * 4);
831824
cl_mem ds_clipping_mask = dt_opencl_alloc_device(devid, ds_sizes[0], ds_sizes[1], sizeof(float) * 4);
832825

833826
cl_mem clips_cl = dt_opencl_copy_host_to_device_constant(devid, 4 * sizeof(float), (float*)clips);
834827
cl_mem wb_cl = dt_opencl_copy_host_to_device_constant(devid, 4 * sizeof(float), (float*)wb);
828+
if(!interpolated || !clipping_mask || !LF_odd || !LF_even || !temp || !HF
829+
|| !ds_interpolated || !ds_clipping_mask || !clips_cl || !wb_cl)
830+
goto error;
835831

836832
err = dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_highlights_bilinear_and_mask, width, height,
837833
CLARG(dev_in), CLARG(interpolated), CLARG(temp),
838834
CLARG(clips_cl), CLARG(wb_cl), CLARG(filters), CLARG(roi_out->width), CLARG(roi_out->height));
839-
dt_opencl_release_mem_object(clips_cl);
840835
if(err != CL_SUCCESS) goto error;
841836

842837
err = dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_highlights_box_blur, width, height,
@@ -859,11 +854,11 @@ static cl_int process_laplacian_bayer_cl(dt_iop_module_t *self,
859854
for(int i = 0; i < data->iterations; i++)
860855
{
861856
const int salt = (i == data->iterations - 1); // add noise on the last iteration only
862-
err = wavelets_process_cl(devid, ds_interpolated, temp, ds_clipping_mask, ds_sizes, ds_width, ds_height, gd, scales, HF,
857+
err = wavelets_process_cl(devid, ds_interpolated, temp, ds_clipping_mask, ds_width, ds_height, gd, scales, HF,
863858
LF_odd, LF_even, DIFFUSE_RECONSTRUCT_RGB, noise_level, salt, data->solid_color);
864859
if(err != CL_SUCCESS) goto error;
865860

866-
err = wavelets_process_cl(devid, temp, ds_interpolated, ds_clipping_mask, ds_sizes, ds_width, ds_height, gd, scales, HF,
861+
err = wavelets_process_cl(devid, temp, ds_interpolated, ds_clipping_mask, ds_width, ds_height, gd, scales, HF,
867862
LF_odd, LF_even, DIFFUSE_RECONSTRUCT_CHROMA, noise_level, salt, data->solid_color);
868863
if(err != CL_SUCCESS) goto error;
869864
}
@@ -881,6 +876,7 @@ static cl_int process_laplacian_bayer_cl(dt_iop_module_t *self,
881876

882877
error:
883878
dt_opencl_release_mem_object(wb_cl);
879+
dt_opencl_release_mem_object(clips_cl);
884880
dt_opencl_release_mem_object(interpolated);
885881
dt_opencl_release_mem_object(ds_clipping_mask);
886882
dt_opencl_release_mem_object(ds_interpolated);

0 commit comments

Comments
 (0)