@@ -175,13 +175,16 @@ static void vng_interpolate(float *out,
175175 // ring buffer pointing to three most recent rows processed (brow[3]
176176 // is only used for rotating the buffer
177177 float (* brow [4 ])[4 ];
178- const int prow = (filters == 9 ) ? 6 : 8 ;
179- const int pcol = (filters == 9 ) ? 6 : 2 ;
180- const int colors = (filters == 9 ) ? 3 : 4 ;
178+ const gboolean is_xtrans = (filters == 9 );
179+ const gboolean is_4bayer = FILTERS_ARE_4BAYER (filters );
180+ const gboolean is_bayer = !(is_xtrans || is_4bayer );
181+ const int prow = is_xtrans ? 6 : 8 ;
182+ const int pcol = is_xtrans ? 6 : 2 ;
183+ const int colors = is_xtrans ? 3 : 4 ;
181184
182185 // separate out G1 and G2 in RGGB Bayer patterns
183186 uint32_t filters4 = filters ;
184- if (filters == 9 || FILTERS_ARE_4BAYER ( filters )) // x-trans or CYGM/RGBE
187+ if (is_xtrans || is_4bayer )
185188 filters4 = filters ;
186189 else if ((filters & 3 ) == 1 )
187190 filters4 = filters | 0x03030303u ;
@@ -191,7 +194,11 @@ static void vng_interpolate(float *out,
191194 lin_interpolate (out , in , width , height , filters4 , xtrans );
192195
193196 // if only linear interpolation is requested we can stop it here
194- if (only_vng_linear ) return ;
197+ if (only_vng_linear )
198+ {
199+ if (is_bayer ) goto bayer_greens ;
200+ else return ;
201+ }
195202
196203 char * buffer = dt_alloc_aligned (sizeof (* * brow ) * width * 3 + sizeof (* ip ) * prow * pcol * 320 );
197204 if (!buffer )
@@ -306,9 +313,9 @@ static void vng_interpolate(float *out,
306313 _copy_abovezero (out + (4 * ((height - 3 ) * width + 2 )), (float * )(brow [1 ] + 2 ), width - 4 );
307314 dt_free_align (buffer );
308315
309- if ( filters != 9 && ! FILTERS_ARE_4BAYER ( filters )) // x-trans or CYGM/RGBE
310- {
311- // for Bayer mix the two greens to make VNG4
316+ bayer_greens :
317+ if ( is_bayer ) // x-trans or CYGM/RGBE
318+ {
312319 DT_OMP_FOR ()
313320 for (int i = 0 ; i < height * width ; i ++ )
314321 out [i * 4 + 1 ] = (out [i * 4 + 1 ] + out [i * 4 + 3 ]) / 2.0f ;
@@ -328,20 +335,21 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
328335 const gboolean only_vng_linear )
329336{
330337 const dt_iop_demosaic_global_data_t * gd = self -> global_data ;
331-
338+ const gboolean is_xtrans = (filters == 9u );
339+
332340 // separate out G1 and G2 in Bayer patterns
333341 uint32_t filters4 ;
334- if (filters == 9u )
342+ if (is_xtrans )
335343 filters4 = filters ;
336344 else if ((filters & 3 ) == 1 )
337345 filters4 = filters | 0x03030303u ;
338346 else
339347 filters4 = filters | 0x0c0c0c0cu ;
340348
341- const int lsize = ( filters4 == 9u ) ? 6 : 16 ;
342- const int colors = ( filters4 == 9u ) ? 3 : 4 ;
343- const int prow = ( filters4 == 9u ) ? 6 : 8 ;
344- const int pcol = ( filters4 == 9u ) ? 6 : 2 ;
349+ const int lsize = is_xtrans ? 6 : 16 ;
350+ const int colors = is_xtrans ? 3 : 4 ;
351+ const int prow = is_xtrans ? 6 : 8 ;
352+ const int pcol = is_xtrans ? 6 : 2 ;
345353 const int devid = piece -> pipe -> devid ;
346354
347355 int * ips = NULL ;
@@ -477,10 +485,21 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
477485 dev_lookup = dt_opencl_copy_host_to_device_constant (devid , lookup_size , lookup );
478486 if (dev_lookup == NULL ) goto finish ;
479487
488+ if (!only_vng_linear || !is_xtrans ) // we need this for full VNG or VNG4
489+ {
490+ dev_tmp = dt_opencl_alloc_device (devid , width , height , sizeof (float ) * 4 );
491+ if (dev_tmp == NULL ) goto finish ;
492+ }
493+
494+ /* If only_linear we still want the green equilibration for VNG4 so we write the linear
495+ interpolation data to dev_tmp right here.
496+ */
497+ cl_mem tmp_out = only_vng_linear ? dev_tmp : dev_out ;
498+
480499 // manage borders for linear interpolation part
481500 int border = 1 ;
482501 err = dt_opencl_enqueue_kernel_2d_args (devid , gd -> kernel_vng_border_interpolate , width , height ,
483- CLARG (dev_in ), CLARG (dev_out ), CLARG (width ), CLARG (height ), CLARG (border ),
502+ CLARG (dev_in ), CLARG (tmp_out ), CLARG (width ), CLARG (height ), CLARG (border ),
484503 CLARG (filters4 ), CLARG (dev_xtrans ));
485504 if (err != CL_SUCCESS ) goto finish ;
486505
@@ -499,22 +518,16 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
499518 size_t sizes [3 ] = { ROUNDUP (width , locopt .sizex ), ROUNDUP (height , locopt .sizey ), 1 };
500519 size_t local [3 ] = { locopt .sizex , locopt .sizey , 1 };
501520 dt_opencl_set_kernel_args (devid , gd -> kernel_vng_lin_interpolate , 0 ,
502- CLARG (dev_in ), CLARG (dev_out ),
521+ CLARG (dev_in ), CLARG (tmp_out ),
503522 CLARG (width ), CLARG (height ), CLARG (filters4 ), CLARG (dev_lookup ), CLLOCAL (sizeof (float ) * (locopt .sizex + 2 ) * (locopt .sizey + 2 )));
504523 err = dt_opencl_enqueue_kernel_2d_with_local (devid , gd -> kernel_vng_lin_interpolate , sizes , local );
505524 if (err != CL_SUCCESS ) goto finish ;
506525 }
507526
508-
509527 if (only_vng_linear )
510- goto finish ;
528+ goto vng4_greens ;
511529
512- // need to reserve scaled auxiliary buffer or use dev_out
513- err = CL_MEM_OBJECT_ALLOCATION_FAILURE ;
514530 // do full VNG interpolation
515- dev_tmp = dt_opencl_alloc_device (devid , width , height , sizeof (float ) * 4 );
516- if (dev_tmp == NULL ) goto finish ;
517-
518531 dt_opencl_local_buffer_t locopt
519532 = (dt_opencl_local_buffer_t ){ .xoffset = 2 * 2 , .xfactor = 1 , .yoffset = 2 * 2 , .yfactor = 1 ,
520533 .cellsize = 4 * sizeof (float ), .overhead = 0 ,
@@ -541,7 +554,8 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
541554 CLARG (filters4 ), CLARG (dev_xtrans ));
542555 if (err != CL_SUCCESS ) goto finish ;
543556
544- if (filters4 != 9 )
557+ vng4_greens :
558+ if (!is_xtrans )
545559 {
546560 err = dt_opencl_enqueue_kernel_2d_args (devid , gd -> kernel_vng_green_equilibrate , width , height ,
547561 CLARG (dev_tmp ), CLARG (dev_out ), CLARG (width ), CLARG (height ));
0 commit comments