@@ -164,8 +164,8 @@ static void _interpolate_and_mask(const float *const restrict input,
164164 }
165165 }
166166
167- dt_aligned_pixel_t RGB = { R , G , B , sqrtf (sqf (R ) + sqf (G ) + sqf (B )) };
168- dt_aligned_pixel_t clipped = { R_clipped , G_clipped , B_clipped , (R_clipped || G_clipped || B_clipped ) };
167+ const dt_aligned_pixel_t RGB = { R , G , B , sqrtf (sqf (R ) + sqf (G ) + sqf (B )) };
168+ const dt_aligned_pixel_t clipped = { ( float ) R_clipped , ( float ) G_clipped , ( float ) B_clipped , (R_clipped || G_clipped || B_clipped ) ? 1.0f : 0.0f };
169169
170170 for_each_channel (k , aligned (RGB , interpolated , clipping_mask , clipped , wb ))
171171 {
@@ -193,7 +193,7 @@ static void _remosaic_and_replace(const float *const restrict input,
193193 const size_t c = FC (i , j , filters );
194194 const size_t idx = i * width + j ;
195195 const size_t index = idx * 4 ;
196- const float opacity = clipping_mask [index + ALPHA ];
196+ const float opacity = CLIP ( clipping_mask [index + ALPHA ]) ;
197197 output [idx ] = opacity * fmaxf (interpolated [index + c ] * wb [c ], 0.f )
198198 + (1.f - opacity ) * input [idx ];
199199 }
@@ -689,7 +689,6 @@ static inline cl_int wavelets_process_cl(const int devid,
689689 cl_mem in ,
690690 cl_mem reconstructed ,
691691 cl_mem clipping_mask ,
692- const size_t sizes [3 ],
693692 const int width ,
694693 const int height ,
695694 dt_iop_highlights_global_data_t * const gd ,
@@ -702,7 +701,7 @@ static inline cl_int wavelets_process_cl(const int devid,
702701 const int salt ,
703702 const float solid_color )
704703{
705- cl_int err = DT_OPENCL_DEFAULT_ERROR ;
704+ cl_int err = CL_SUCCESS ;
706705
707706 // À trous wavelet decompose
708707 // there is a paper from a guy we know that explains it : https://jo.dreggn.org/home/2010_atrous.pdf
@@ -732,21 +731,18 @@ static inline cl_int wavelets_process_cl(const int devid,
732731 }
733732
734733 // Compute wavelets low-frequency scales
735- dt_opencl_set_kernel_args (devid , gd -> kernel_filmic_bspline_horizontal , 0 ,
736- CLARG (buffer_in ), CLARG (HF ), CLARG (width ), CLARG (height ), CLARG (mult ));
737- err = dt_opencl_enqueue_kernel_2d (devid , gd -> kernel_filmic_bspline_horizontal , sizes );
734+ err = dt_opencl_enqueue_kernel_2d_args (devid , gd -> kernel_filmic_bspline_horizontal , width , height ,
735+ CLARG (buffer_in ), CLARG (HF ), CLARG (width ), CLARG (height ), CLARG (mult ));
738736 if (err != CL_SUCCESS ) return err ;
739737
740- dt_opencl_set_kernel_args (devid , gd -> kernel_filmic_bspline_vertical , 0 ,
741- CLARG (HF ), CLARG (buffer_out ), CLARG (width ), CLARG (height ), CLARG (mult ));
742- err = dt_opencl_enqueue_kernel_2d (devid , gd -> kernel_filmic_bspline_vertical , sizes );
738+ err = dt_opencl_enqueue_kernel_2d_args (devid , gd -> kernel_filmic_bspline_vertical , width , height ,
739+ CLARG (HF ), CLARG (buffer_out ), CLARG (width ), CLARG (height ), CLARG (mult ));
743740 if (err != CL_SUCCESS ) return err ;
744741
745742 // Compute wavelets high-frequency scales and backup the maximum of texture over the RGB channels
746743 // Note : HF = detail - LF
747- dt_opencl_set_kernel_args (devid , gd -> kernel_filmic_wavelets_detail , 0 ,
748- CLARG (buffer_in ), CLARG (buffer_out ), CLARG (HF ), CLARG (width ), CLARG (height ));
749- err = dt_opencl_enqueue_kernel_2d (devid , gd -> kernel_filmic_wavelets_detail , sizes );
744+ err = dt_opencl_enqueue_kernel_2d_args (devid , gd -> kernel_filmic_wavelets_detail , width , height ,
745+ CLARG (buffer_in ), CLARG (buffer_out ), CLARG (HF ), CLARG (width ), CLARG (height ));
750746 if (err != CL_SUCCESS ) return err ;
751747
752748 unsigned int current_scale_type = scale_type (s , scales );
@@ -755,22 +751,20 @@ static inline cl_int wavelets_process_cl(const int devid,
755751 // Compute wavelets low-frequency scales
756752 if (variant == DIFFUSE_RECONSTRUCT_RGB )
757753 {
758- dt_opencl_set_kernel_args (devid , gd -> kernel_highlights_guide_laplacians , 0 ,
754+ err = dt_opencl_enqueue_kernel_2d_args (devid , gd -> kernel_highlights_guide_laplacians , width , height ,
759755 CLARG (HF ), CLARG (buffer_out ), CLARG (clipping_mask ),
760756 CLARG (reconstructed ), // read-only
761757 CLARG (reconstructed ), // write-only
762758 CLARG (width ), CLARG (height ), CLARG (mult ), CLARG (noise_level ), CLARG (salt ), CLARG (current_scale_type ), CLARG (radius ));
763- err = dt_opencl_enqueue_kernel_2d (devid , gd -> kernel_highlights_guide_laplacians , sizes );
764759 if (err != CL_SUCCESS ) return err ;
765760 }
766761 else // DIFFUSE_RECONSTRUCT_CHROMA
767762 {
768- dt_opencl_set_kernel_args (devid , gd -> kernel_highlights_diffuse_color , 0 ,
763+ err = dt_opencl_enqueue_kernel_2d_args (devid , gd -> kernel_highlights_diffuse_color , width , height ,
769764 CLARG (HF ), CLARG (buffer_out ), CLARG (clipping_mask ),
770765 CLARG (reconstructed ), // read-only
771766 CLARG (reconstructed ), // write-only
772767 CLARG (width ), CLARG (height ), CLARG (mult ), CLARG (current_scale_type ), CLARG (solid_color ));
773- err = dt_opencl_enqueue_kernel_2d (devid , gd -> kernel_highlights_diffuse_color , sizes );
774768 if (err != CL_SUCCESS ) return err ;
775769 }
776770 }
@@ -789,7 +783,7 @@ static cl_int process_laplacian_bayer_cl(dt_iop_module_t *self,
789783 dt_iop_highlights_data_t * data = piece -> data ;
790784 dt_iop_highlights_global_data_t * gd = self -> global_data ;
791785
792- cl_int err = DT_OPENCL_DEFAULT_ERROR ;
786+ cl_int err = CL_MEM_OBJECT_ALLOCATION_FAILURE ;
793787
794788 const int devid = piece -> pipe -> devid ;
795789 const int width = roi_in -> width ;
@@ -798,8 +792,8 @@ static cl_int process_laplacian_bayer_cl(dt_iop_module_t *self,
798792 const int ds_height = height / DS_FACTOR ;
799793 const int ds_width = width / DS_FACTOR ;
800794
801- size_t sizes [] = { ROUNDUPDWD (width , devid ), ROUNDUPDHT (height , devid ), 1 };
802- size_t ds_sizes [] = { ROUNDUPDWD (ds_width , devid ), ROUNDUPDHT (ds_height , devid ), 1 };
795+ const size_t sizes [2 ] = { ROUNDUPDWD (width , devid ), ROUNDUPDHT (height , devid ) };
796+ const size_t ds_sizes [2 ] = { ROUNDUPDWD (ds_width , devid ), ROUNDUPDHT (ds_height , devid ) };
803797
804798 const uint32_t filters = piece -> pipe -> dsc .filters ;
805799
@@ -811,6 +805,11 @@ static cl_int process_laplacian_bayer_cl(dt_iop_module_t *self,
811805 wb [2 ] = piece -> pipe -> dsc .temperature .coeffs [2 ];
812806 }
813807
808+ const float scale = fmaxf (DS_FACTOR * piece -> iscale / (roi_in -> scale ), 1.f );
809+ const float final_radius = (float )((int )(1 << data -> scales )) / scale ;
810+ const int scales = CLAMP ((int )ceilf (log2f (final_radius )), 1 , MAX_NUM_SCALES );
811+ const float noise_level = data -> noise_level / scale ;
812+
814813 cl_mem interpolated = dt_opencl_alloc_device (devid , sizes [0 ], sizes [1 ], sizeof (float ) * 4 ); // [R, G, B, norm] for each pixel
815814 cl_mem clipping_mask = dt_opencl_alloc_device (devid , sizes [0 ], sizes [1 ], sizeof (float ) * 4 ); // [R, G, B, norm] for each pixel
816815
@@ -819,24 +818,20 @@ static cl_int process_laplacian_bayer_cl(dt_iop_module_t *self,
819818 cl_mem LF_even = dt_opencl_alloc_device (devid , ds_sizes [0 ], ds_sizes [1 ], sizeof (float ) * 4 );
820819 cl_mem temp = dt_opencl_alloc_device (devid , sizes [0 ], sizes [1 ], sizeof (float ) * 4 ); // need full size here for blurring
821820
822- const float scale = fmaxf (DS_FACTOR * piece -> iscale / (roi_in -> scale ), 1.f );
823- const float final_radius = (float )((int )(1 << data -> scales )) / scale ;
824- const int scales = CLAMP ((int )ceilf (log2f (final_radius )), 1 , MAX_NUM_SCALES );
825-
826- const float noise_level = data -> noise_level / scale ;
827-
828821 // wavelets scales buffers
829822 cl_mem HF = dt_opencl_alloc_device (devid , ds_sizes [0 ], ds_sizes [1 ], sizeof (float ) * 4 );
830823 cl_mem ds_interpolated = dt_opencl_alloc_device (devid , ds_sizes [0 ], ds_sizes [1 ], sizeof (float ) * 4 );
831824 cl_mem ds_clipping_mask = dt_opencl_alloc_device (devid , ds_sizes [0 ], ds_sizes [1 ], sizeof (float ) * 4 );
832825
833826 cl_mem clips_cl = dt_opencl_copy_host_to_device_constant (devid , 4 * sizeof (float ), (float * )clips );
834827 cl_mem wb_cl = dt_opencl_copy_host_to_device_constant (devid , 4 * sizeof (float ), (float * )wb );
828+ if (!interpolated || !clipping_mask || !LF_odd || !LF_even || !temp || !HF
829+ || !ds_interpolated || !ds_clipping_mask || !clips_cl || !wb_cl )
830+ goto error ;
835831
836832 err = dt_opencl_enqueue_kernel_2d_args (devid , gd -> kernel_highlights_bilinear_and_mask , width , height ,
837833 CLARG (dev_in ), CLARG (interpolated ), CLARG (temp ),
838834 CLARG (clips_cl ), CLARG (wb_cl ), CLARG (filters ), CLARG (roi_out -> width ), CLARG (roi_out -> height ));
839- dt_opencl_release_mem_object (clips_cl );
840835 if (err != CL_SUCCESS ) goto error ;
841836
842837 err = dt_opencl_enqueue_kernel_2d_args (devid , gd -> kernel_highlights_box_blur , width , height ,
@@ -859,11 +854,11 @@ static cl_int process_laplacian_bayer_cl(dt_iop_module_t *self,
859854 for (int i = 0 ; i < data -> iterations ; i ++ )
860855 {
861856 const int salt = (i == data -> iterations - 1 ); // add noise on the last iteration only
862- err = wavelets_process_cl (devid , ds_interpolated , temp , ds_clipping_mask , ds_sizes , ds_width , ds_height , gd , scales , HF ,
857+ err = wavelets_process_cl (devid , ds_interpolated , temp , ds_clipping_mask , ds_width , ds_height , gd , scales , HF ,
863858 LF_odd , LF_even , DIFFUSE_RECONSTRUCT_RGB , noise_level , salt , data -> solid_color );
864859 if (err != CL_SUCCESS ) goto error ;
865860
866- err = wavelets_process_cl (devid , temp , ds_interpolated , ds_clipping_mask , ds_sizes , ds_width , ds_height , gd , scales , HF ,
861+ err = wavelets_process_cl (devid , temp , ds_interpolated , ds_clipping_mask , ds_width , ds_height , gd , scales , HF ,
867862 LF_odd , LF_even , DIFFUSE_RECONSTRUCT_CHROMA , noise_level , salt , data -> solid_color );
868863 if (err != CL_SUCCESS ) goto error ;
869864 }
@@ -881,6 +876,7 @@ static cl_int process_laplacian_bayer_cl(dt_iop_module_t *self,
881876
882877error :
883878 dt_opencl_release_mem_object (wb_cl );
879+ dt_opencl_release_mem_object (clips_cl );
884880 dt_opencl_release_mem_object (interpolated );
885881 dt_opencl_release_mem_object (ds_clipping_mask );
886882 dt_opencl_release_mem_object (ds_interpolated );
0 commit comments