11/*
22 This file is part of darktable,
3- Copyright (C) 2010-2025 darktable developers.
3+ Copyright (C) 2010-2026 darktable developers.
44
55 darktable is free software: you can redistribute it and/or modify
66 it under the terms of the GNU General Public License as published by
@@ -137,10 +137,11 @@ static void lin_interpolate(float *out,
137137 I've extended the basic idea to work with non-Bayer filter arrays.
138138 Gradients are numbered clockwise from NW=0 to W=7.
139139*/
140- static inline void _ensure_abovezero (float * to , float * from , const int floats )
140+ static void _copy_abovezero (float * to , float * from , const int pixels )
141141{
142- for (int i = 0 ; i < floats ; i ++ )
143- to [i ] = fmaxf (0.0f , from [i ]);
142+ static dt_aligned_pixel_t zero = { 0.0f , 0.0f , 0.0f , 0.0f };
143+ for (int i = 0 ; i < pixels ; i ++ )
144+ dt_vector_max (& to [i * 4 ], zero , & from [i * 4 ]);
144145}
145146
146147static void vng_interpolate (float * out ,
@@ -295,14 +296,14 @@ static void vng_interpolate(float *out,
295296 }
296297 }
297298 if (row > 3 ) /* Write buffer to image */
298- _ensure_abovezero (out + 4 * ((row - 2 ) * width + 2 ), (float * )(brow [0 ] + 2 ), 4 * ( width - 4 ) );
299+ _copy_abovezero (out + 4 * ((row - 2 ) * width + 2 ), (float * )(brow [0 ] + 2 ), width - 4 );
299300
300301 // rotate ring buffer
301302 for (int g = 0 ; g < 4 ; g ++ ) brow [(g - 1 ) & 3 ] = brow [g ];
302303 }
303304 // copy the final two rows to the image
304- _ensure_abovezero (out + (4 * ((height - 4 ) * width + 2 )), (float * )(brow [0 ] + 2 ), 4 * ( width - 4 ) );
305- _ensure_abovezero (out + (4 * ((height - 3 ) * width + 2 )), (float * )(brow [1 ] + 2 ), 4 * ( width - 4 ) );
305+ _copy_abovezero (out + (4 * ((height - 4 ) * width + 2 )), (float * )(brow [0 ] + 2 ), width - 4 );
306+ _copy_abovezero (out + (4 * ((height - 3 ) * width + 2 )), (float * )(brow [1 ] + 2 ), width - 4 );
306307 dt_free_align (buffer );
307308
308309 if (filters != 9 && !FILTERS_ARE_4BAYER (filters )) // x-trans or CYGM/RGBE
@@ -337,7 +338,7 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
337338 else
338339 filters4 = filters | 0x0c0c0c0cu ;
339340
340- const int size = (filters4 == 9u ) ? 6 : 16 ;
341+ const int lsize = (filters4 == 9u ) ? 6 : 16 ;
341342 const int colors = (filters4 == 9u ) ? 3 : 4 ;
342343 const int prow = (filters4 == 9u ) ? 6 : 8 ;
343344 const int pcol = (filters4 == 9u ) ? 6 : 2 ;
@@ -351,53 +352,46 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
351352 cl_mem dev_ips = NULL ;
352353 cl_int err = CL_MEM_OBJECT_ALLOCATION_FAILURE ;
353354
354- int32_t (* lookup )[16 ][32 ] = NULL ;
355-
356-
357- // build interpolation lookup table for linear interpolation which for a given offset in the sensor
358- // lists neighboring pixels from which to interpolate:
359- // NUM_PIXELS # of neighboring pixels to read
360- // for(1..NUM_PIXELS):
361- // OFFSET # in bytes from current pixel
362- // WEIGHT # how much weight to give this neighbor
363- // COLOR # sensor color
364- // # weights of adjoining pixels not of this pixel's color
365- // COLORA TOT_WEIGHT
366- // COLORB TOT_WEIGHT
367- // COLORPIX # color of center pixel
368- const size_t lookup_size = (size_t )16 * 16 * 32 * sizeof (int32_t );
369- lookup = malloc (lookup_size );
370-
371- for (int row = 0 ; row < size ; row ++ )
372- for (int col = 0 ; col < size ; col ++ )
355+ const size_t lookup_size = (size_t )16 * 16 * 32 * sizeof (int32_t );
356+ int32_t (* lookup )[16 ][32 ] = malloc (lookup_size );
357+ if (!lookup ) goto finish ;
358+ // build interpolation lookup table for linear interpolation which for a given offset in the sensor
359+ // lists neighboring pixels from which to interpolate:
360+ for (int row = 0 ; row < lsize ; row ++ )
361+ {
362+ for (int col = 0 ; col < lsize ; col ++ )
363+ {
364+ int32_t * ip = & (lookup [row ][col ][1 ]);
365+ int sum [4 ] = { 0 };
366+ const int f = fcol (row , col , filters4 , xtrans );
367+ // make list of adjoining pixel offsets by weight & color
368+ for (int y = -1 ; y <= 1 ; y ++ )
373369 {
374- int32_t * ip = & (lookup [row ][col ][1 ]);
375- int sum [4 ] = { 0 };
376- const int f = fcol (row , col , filters4 , xtrans );
377- // make list of adjoining pixel offsets by weight & color
378- for (int y = -1 ; y <= 1 ; y ++ )
379- for (int x = -1 ; x <= 1 ; x ++ )
380- {
381- const int weight = 1 << ((y == 0 ) + (x == 0 ));
382- const int color = fcol (row + y , col + x , filters4 , xtrans );
383- if (color == f ) continue ;
384- * ip ++ = (y << 16 ) | (x & 0xffffu );
385- * ip ++ = weight ;
386- * ip ++ = color ;
387- sum [color ] += weight ;
388- }
389- lookup [row ][col ][0 ] = (ip - & (lookup [row ][col ][0 ])) / 3 ; /* # of neighboring pixels found */
390- for (int c = 0 ; c < colors ; c ++ )
391- if (c != f )
392- {
393- * ip ++ = c ;
394- * ip ++ = sum [c ];
395- }
396- * ip = f ;
370+ for (int x = -1 ; x <= 1 ; x ++ )
371+ {
372+ const int weight = 1 << ((y == 0 ) + (x == 0 ));
373+ const int color = fcol (row + y , col + x , filters4 , xtrans );
374+ if (color == f ) continue ;
375+ * ip ++ = (y << 16 ) | (x & 0xffffu );
376+ * ip ++ = weight ;
377+ * ip ++ = color ;
378+ sum [color ] += weight ;
379+ }
380+ }
381+ lookup [row ][col ][0 ] = (ip - & (lookup [row ][col ][0 ])) / 3 ; /* # of neighboring pixels found */
382+ for (int c = 0 ; c < colors ; c ++ )
383+ {
384+ if (c != f )
385+ {
386+ * ip ++ = c ;
387+ * ip ++ = sum [c ];
388+ }
397389 }
390+ * ip = f ;
391+ }
392+ }
398393
399- // Precalculate for VNG
400- static const signed char terms []
394+ static const signed char terms []
401395 = { -2 , -2 , +0 , -1 , 1 , 0x01 , -2 , -2 , +0 , +0 , 2 , 0x01 , -2 , -1 , -1 , +0 , 1 , 0x01 , -2 , -1 , +0 , -1 , 1 , 0x02 ,
402396 -2 , -1 , +0 , +0 , 1 , 0x03 , -2 , -1 , +0 , +1 , 2 , 0x01 , -2 , +0 , +0 , -1 , 1 , 0x06 , -2 , +0 , +0 , +0 , 2 , 0x02 ,
403397 -2 , +0 , +0 , +1 , 1 , 0x03 , -2 , +1 , -1 , +0 , 1 , 0x04 , -2 , +1 , +0 , -1 , 2 , 0x04 , -2 , +1 , +0 , +0 , 1 , 0x06 ,
@@ -414,16 +408,19 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
414408 +0 , +0 , +2 , +2 , 2 , 0x10 , +0 , +1 , +1 , +0 , 1 , 0x44 , +0 , +1 , +1 , +2 , 1 , 0x10 , +0 , +1 , +2 , -1 , 2 , 0x40 ,
415409 +0 , +1 , +2 , +0 , 1 , 0x60 , +0 , +1 , +2 , +1 , 1 , 0x20 , +0 , +1 , +2 , +2 , 1 , 0x10 , +1 , -2 , +1 , +0 , 1 , 0x80 ,
416410 +1 , -1 , +1 , +1 , 1 , 0x88 , +1 , +0 , +1 , +2 , 1 , 0x08 , +1 , +0 , +2 , -1 , 1 , 0x40 , +1 , +0 , +2 , +1 , 1 , 0x10 };
417- static const signed char chood []
411+ static const signed char chood []
418412 = { -1 , -1 , -1 , 0 , -1 , +1 , 0 , +1 , +1 , +1 , +1 , 0 , +1 , -1 , 0 , -1 };
419413
414+ if (!only_vng_linear )
415+ {
420416 const size_t ips_size = (size_t )prow * pcol * 352 * sizeof (int );
421417 ips = malloc (ips_size );
422418
423419 int * ip = ips ;
424420 int code [16 ][16 ];
425421
426422 for (int row = 0 ; row < prow ; row ++ )
423+ {
427424 for (int col = 0 ; col < pcol ; col ++ )
428425 {
429426 code [row ][col ] = ip - ips ;
@@ -468,20 +465,18 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
468465 }
469466 }
470467 }
468+ }
469+
470+ dev_code = dt_opencl_copy_host_to_device_constant (devid , sizeof (code ), code );
471+ if (dev_code == NULL ) goto finish ;
471472
473+ dev_ips = dt_opencl_copy_host_to_device_constant (devid , ips_size , ips );
474+ if (dev_ips == NULL ) goto finish ;
475+ }
472476
473477 dev_lookup = dt_opencl_copy_host_to_device_constant (devid , lookup_size , lookup );
474478 if (dev_lookup == NULL ) goto finish ;
475479
476- dev_code = dt_opencl_copy_host_to_device_constant (devid , sizeof (code ), code );
477- if (dev_code == NULL ) goto finish ;
478-
479- dev_ips = dt_opencl_copy_host_to_device_constant (devid , ips_size , ips );
480- if (dev_ips == NULL ) goto finish ;
481-
482- // need to reserve scaled auxiliary buffer or use dev_out
483- err = CL_MEM_OBJECT_ALLOCATION_FAILURE ;
484-
485480 // manage borders for linear interpolation part
486481 int border = 1 ;
487482 err = dt_opencl_enqueue_kernel_2d_args (devid , gd -> kernel_vng_border_interpolate , width , height ,
@@ -514,6 +509,8 @@ static cl_int process_vng_cl(const dt_iop_module_t *self,
514509 if (only_vng_linear )
515510 goto finish ;
516511
512+ // need to reserve scaled auxiliary buffer or use dev_out
513+ err = CL_MEM_OBJECT_ALLOCATION_FAILURE ;
517514 // do full VNG interpolation
518515 dev_tmp = dt_opencl_alloc_device (devid , width , height , sizeof (float ) * 4 );
519516 if (dev_tmp == NULL ) goto finish ;
0 commit comments