@@ -150,7 +150,7 @@ static void vng_interpolate(float *out,
150150 const int height ,
151151 const uint32_t filters ,
152152 const uint8_t (* const xtrans )[6 ],
153- const gboolean only_vng_linear )
153+ const gboolean only_linear )
154154{
155155 static const signed char terms []
156156 = { -2 , -2 , +0 , -1 , 1 , 0x01 , -2 , -2 , +0 , +0 , 2 , 0x01 , -2 , -1 , -1 , +0 , 1 , 0x01 , -2 , -1 , +0 , -1 , 1 , 0x02 ,
@@ -194,7 +194,7 @@ static void vng_interpolate(float *out,
194194 lin_interpolate (out , in , width , height , filters4 , xtrans );
195195
196196 // if only linear interpolation is requested we can stop it here
197- if (only_vng_linear )
197+ if (only_linear )
198198 {
199199 if (is_bayer ) goto bayer_greens ;
200200 else return ;
@@ -332,7 +332,7 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
332332 const int width ,
333333 const int height ,
334334 const uint32_t filters ,
335- const gboolean only_vng_linear )
335+ const gboolean only_linear )
336336{
337337 const dt_iop_demosaic_global_data_t * gd = self -> global_data ;
338338 const gboolean is_xtrans = (filters == 9u );
@@ -419,7 +419,7 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
419419 static const signed char chood []
420420 = { -1 , -1 , -1 , 0 , -1 , +1 , 0 , +1 , +1 , +1 , +1 , 0 , +1 , -1 , 0 , -1 };
421421
422- if (!only_vng_linear )
422+ if (!only_linear )
423423 {
424424 const size_t ips_size = (size_t )prow * pcol * 352 * sizeof (int );
425425 ips = malloc (ips_size );
@@ -485,43 +485,55 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
485485 dev_lookup = dt_opencl_copy_host_to_device_constant (devid , lookup_size , lookup );
486486 if (dev_lookup == NULL ) goto finish ;
487487
488- dev_tmp = dt_opencl_alloc_device (devid , width , height , sizeof (float ) * 4 );
489- if (dev_tmp == NULL ) goto finish ;
490-
491- cl_mem tmp_out = only_vng_linear ? dev_tmp : dev_out ;
488+ // Only xtrans only-linear does not require a tmp buffer and can render directly to out
489+ const gboolean linear_xtrans = is_xtrans && only_linear ;
490+ if (!linear_xtrans )
491+ {
492+ dev_tmp = dt_opencl_alloc_device (devid , width , height , sizeof (float ) * 4 );
493+ if (dev_tmp == NULL ) goto finish ;
494+ }
492495
493- // manage borders for linear interpolation part
496+ /* We don't want any copy of data so we fiddle a bit
497+ linear xtrans: directly to out
498+ linear bayer: first to dev_tmp, greens always take dev_tmp to out
499+ full xtrans: first to tmp, then directly to out
500+ full bayer: first to out, then to tmp, greens take tmp to out
501+ */
502+ const gboolean full_bayer = !is_xtrans && !only_linear ;
503+ const gboolean lin_to_out = linear_xtrans || full_bayer ;
504+ cl_mem dev_lin_out = lin_to_out ? dev_out : dev_tmp ;
505+ cl_mem dev_full_out = is_xtrans ? dev_out : dev_tmp ;
506+
507+ // write border **before** linear interpolation
494508 int border = 1 ;
495509 err = dt_opencl_enqueue_kernel_2d_args (devid , gd -> kernel_vng_border_interpolate , width , height ,
496- CLARG (dev_in ), CLARG (tmp_out ), CLARG (width ), CLARG (height ), CLARG (border ),
510+ CLARG (dev_in ), CLARG (dev_lin_out ), CLARG (width ), CLARG (height ), CLARG (border ),
497511 CLARG (filters4 ), CLARG (dev_xtrans ));
498512 if (err != CL_SUCCESS ) goto finish ;
499513
500- {
501- // do linear interpolation
502- dt_opencl_local_buffer_t locopt
514+ // do linear interpolation
515+ dt_opencl_local_buffer_t locopt_lin
503516 = (dt_opencl_local_buffer_t ){ .xoffset = 2 * 1 , .xfactor = 1 , .yoffset = 2 * 1 , .yfactor = 1 ,
504517 .cellsize = 1 * sizeof (float ), .overhead = 0 ,
505518 .sizex = 1 << 8 , .sizey = 1 << 8 };
506-
507- if (!dt_opencl_local_buffer_opt (devid , gd -> kernel_vng_lin_interpolate , & locopt ))
508- {
509- err = CL_INVALID_WORK_DIMENSION ;
510- goto finish ;
511- }
512- size_t sizes [3 ] = { ROUNDUP (width , locopt .sizex ), ROUNDUP (height , locopt .sizey ), 1 };
513- size_t local [3 ] = { locopt .sizex , locopt .sizey , 1 };
514- dt_opencl_set_kernel_args (devid , gd -> kernel_vng_lin_interpolate , 0 ,
515- CLARG (dev_in ), CLARG (tmp_out ),
516- CLARG (width ), CLARG (height ), CLARG (filters4 ), CLARG (dev_lookup ), CLLOCAL (sizeof (float ) * (locopt .sizex + 2 ) * (locopt .sizey + 2 )));
517- err = dt_opencl_enqueue_kernel_2d_with_local (devid , gd -> kernel_vng_lin_interpolate , sizes , local );
518- if (err != CL_SUCCESS ) goto finish ;
519+ if (!dt_opencl_local_buffer_opt (devid , gd -> kernel_vng_lin_interpolate , & locopt_lin ))
520+ {
521+ err = CL_INVALID_WORK_DIMENSION ;
522+ goto finish ;
519523 }
520524
521- if (only_vng_linear )
525+ size_t sizes_lin [3 ] = { ROUNDUP (width , locopt_lin .sizex ), ROUNDUP (height , locopt_lin .sizey ), 1 };
526+ size_t local_lin [3 ] = { locopt_lin .sizex , locopt_lin .sizey , 1 };
527+ dt_opencl_set_kernel_args (devid , gd -> kernel_vng_lin_interpolate , 0 ,
528+ CLARG (dev_in ), CLARG (dev_lin_out ),
529+ CLARG (width ), CLARG (height ), CLARG (filters4 ), CLARG (dev_lookup ), CLLOCAL (sizeof (float ) * (locopt_lin .sizex + 2 ) * (locopt_lin .sizey + 2 )));
530+ err = dt_opencl_enqueue_kernel_2d_with_local (devid , gd -> kernel_vng_lin_interpolate , sizes_lin , local_lin );
531+ if (err != CL_SUCCESS ) goto finish ;
532+
533+ if (only_linear )
522534 goto backcopy ;
523535
524- // do full VNG interpolation; linear data is in dev_out
536+ // do full VNG interpolation; linear data is in dev_lin_out
525537 dt_opencl_local_buffer_t locopt
526538 = (dt_opencl_local_buffer_t ){ .xoffset = 2 * 2 , .xfactor = 1 , .yoffset = 2 * 2 , .yfactor = 1 ,
527539 .cellsize = 4 * sizeof (float ), .overhead = 0 ,
@@ -535,31 +547,23 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
535547 size_t sizes [3 ] = { ROUNDUP (width , locopt .sizex ), ROUNDUP (height , locopt .sizey ), 1 };
536548 size_t local [3 ] = { locopt .sizex , locopt .sizey , 1 };
537549 dt_opencl_set_kernel_args (devid , gd -> kernel_vng_interpolate , 0 ,
538- CLARG (dev_out ), CLARG (dev_tmp ),
550+ CLARG (dev_lin_out ), CLARG (dev_full_out ),
539551 CLARG (width ), CLARG (height ), CLARG (filters4 ),
540552 CLARG (dev_xtrans ), CLARG (dev_ips ), CLARG (dev_code ), CLLOCAL (sizeof (float ) * 4 * (locopt .sizex + 4 ) * (locopt .sizey + 4 )));
541553 err = dt_opencl_enqueue_kernel_2d_with_local (devid , gd -> kernel_vng_interpolate , sizes , local );
542554 if (err != CL_SUCCESS ) goto finish ;
543555
544- // manage borders
556+ // overwrite border as 2nd outermost pixels were not interpolated
545557 border = 2 ;
546558 err = dt_opencl_enqueue_kernel_2d_args (devid , gd -> kernel_vng_border_interpolate , width , height ,
547- CLARG (dev_in ), CLARG (dev_tmp ), CLARG (width ), CLARG (height ), CLARG (border ),
559+ CLARG (dev_in ), CLARG (dev_full_out ), CLARG (width ), CLARG (height ), CLARG (border ),
548560 CLARG (filters4 ), CLARG (dev_xtrans ));
549561 if (err != CL_SUCCESS ) goto finish ;
550562
551563backcopy :
552564 if (!is_xtrans )
553- {
554565 err = dt_opencl_enqueue_kernel_2d_args (devid , gd -> kernel_vng_green_equilibrate , width , height ,
555566 CLARG (dev_tmp ), CLARG (dev_out ), CLARG (width ), CLARG (height ));
556- }
557- else
558- {
559- size_t origin [] = { 0 , 0 , 0 };
560- size_t region [] = { width , height , 1 };
561- err = dt_opencl_enqueue_copy_image (devid , dev_tmp , dev_out , origin , origin , region );
562- }
563567
564568finish :
565569 dt_opencl_release_mem_object (dev_tmp );
0 commit comments