Skip to content

Commit cbbfdd7

Browse files
kgrytesaurabhraghuvanshii
authored andcommitted
feat: add dispatcher
--- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: na - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: passed - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: na - task: lint_typescript_tests status: na - task: lint_license_headers status: passed ---
1 parent 757a70a commit cbbfdd7

File tree

1 file changed

+344
-0
lines changed
  • lib/node_modules/@stdlib/ndarray/base/unary-accumulate/src

1 file changed

+344
-0
lines changed
Lines changed: 344 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,344 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2025 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
#include "stdlib/ndarray/base/unary-accumulate/dispatch_object.h"
20+
#include "stdlib/ndarray/base/unary-accumulate/typedefs.h"
21+
#include "stdlib/ndarray/base/iteration_order.h"
22+
#include "stdlib/ndarray/base/bytes_per_element.h"
23+
#include "stdlib/ndarray/ctor.h"
24+
#include <stdint.h>
25+
#include <stddef.h>
26+
27+
/**
28+
* Performs a reduction over elements in n-dimensional input ndarray having `ndims-1` singleton dimensions.
29+
*
30+
* ## Notes
31+
*
32+
* - If able to successfully perform a reduction, the function returns `0`; otherwise, the function returns an error code.
33+
*
34+
* @param f ndarray function
35+
* @param x1 input ndarray
36+
* @param x2 initial value ndarray
37+
* @param x3 output ndarray
38+
* @param i index of the non-singleton dimension
39+
* @param fcn callback
40+
* @return status code
41+
*/
42+
static int8_t stdlib_ndarray_unary_accumulate_1d_squeeze( const ndarrayUnaryAccumulateFcn f, const struct ndarray *x1, const struct ndarray *x2, struct ndarray *x3, const int64_t i, void *fcn ) {
43+
int64_t sh[] = { stdlib_ndarray_shape( x1 )[ i ] };
44+
45+
// Shallow copy and reshape the array...
46+
int64_t sx1[] = { stdlib_ndarray_strides( x1 )[ i ] };
47+
struct ndarray *x1c = stdlib_ndarray_allocate(
48+
stdlib_ndarray_dtype( x1 ),
49+
stdlib_ndarray_data( x1 ),
50+
1,
51+
sh,
52+
sx1,
53+
stdlib_ndarray_offset( x1 ),
54+
stdlib_ndarray_order( x1 ),
55+
stdlib_ndarray_index_mode( x1 ),
56+
stdlib_ndarray_nsubmodes( x1 ),
57+
stdlib_ndarray_submodes( x1 )
58+
);
59+
if ( x1c == NULL ) {
60+
return -1;
61+
}
62+
// Perform the reduction:
63+
struct ndarray *arrays[] = { x1c, x2, x3 };
64+
int8_t status = f( arrays, fcn );
65+
66+
// Free allocated memory:
67+
stdlib_ndarray_free( x1c );
68+
69+
return status;
70+
}
71+
72+
/**
73+
* Performs a reduction over elements in a flattened n-dimensional input ndarray.
74+
*
75+
* ## Notes
76+
*
77+
* - If able to successfully perform a reduction, the function returns `0`; otherwise, the function returns an error code.
78+
*
79+
* @param f ndarray function
80+
* @param N number of elements
81+
* @param x1 input ndarray
82+
* @param s1 input ndarray stride length
83+
* @param x2 initial value ndarray
84+
* @param x3 output ndarray
85+
* @param fcn callback
86+
* @return status code
87+
*/
88+
static int8_t stdlib_ndarray_unary_accumulate_1d_flatten( const ndarrayUnaryAccumulateFcn f, const int64_t N, const struct ndarray *x1, const int64_t s1, const struct ndarray *x2, struct ndarray *x3, void *fcn ) {
89+
// Define the (flattened) strided array shape:
90+
int64_t sh[] = { N };
91+
92+
// Shallow copy and reshape the array...
93+
int64_t sx1[] = { s1 };
94+
struct ndarray *x1c = stdlib_ndarray_allocate(
95+
stdlib_ndarray_dtype( x1 ),
96+
stdlib_ndarray_data( x1 ),
97+
1,
98+
sh,
99+
sx1,
100+
stdlib_ndarray_offset( x1 ),
101+
stdlib_ndarray_order( x1 ),
102+
stdlib_ndarray_index_mode( x1 ),
103+
stdlib_ndarray_nsubmodes( x1 ),
104+
stdlib_ndarray_submodes( x1 )
105+
);
106+
if ( x1c == NULL ) {
107+
return -1;
108+
}
109+
// Perform the reduction:
110+
struct ndarray *arrays[] = { x1c, x2, x3 };
111+
int8_t status = f( arrays, fcn );
112+
113+
// Free allocated memory:
114+
stdlib_ndarray_free( x1c );
115+
116+
return status;
117+
}
118+
119+
/**
120+
* Dispatches to an ndarray function according to the dimensionality of provided ndarray arguments.
121+
*
122+
* ## Notes
123+
*
124+
* - If able to successfully dispatch, the function returns `0`; otherwise, the function returns an error code.
125+
*
126+
* @param obj object comprised of dispatch tables containing ndarray functions
127+
* @param arrays array whose first element is a pointer to an input ndarray, second element is a pointer to a zero-dimensional initial value ndarray, and last element is a pointer to a zero-dimensional output ndarray
128+
* @param fcn callback
129+
* @return status code
130+
*
131+
* @example
132+
* #include "stdlib/ndarray/base/unary-accumulate/dispatch.h"
133+
* #include "stdlib/ndarray/base/unary-accumulate/dispatch_object.h"
134+
* #include "stdlib/ndarray/base/unary-accumulate/typedefs.h"
135+
* #include "stdlib/ndarray/base/unary-accumulate/bb_b.h"
136+
* #include "stdlib/ndarray/ctor.h"
137+
* #include <stdint.h>
138+
* #include <stdlib.h>
139+
* #include <stdio.h>
140+
*
141+
* // Define a list of ndarray functions:
142+
* ndarrayUnaryAccumulateFcn functions[] = {
143+
* stdlib_ndarray_accumulate_bb_b_0d,
144+
* stdlib_ndarray_accumulate_bb_b_1d,
145+
* stdlib_ndarray_accumulate_bb_b_2d,
146+
* stdlib_ndarray_accumulate_bb_b_3d,
147+
* stdlib_ndarray_accumulate_bb_b_4d,
148+
* stdlib_ndarray_accumulate_bb_b_5d,
149+
* stdlib_ndarray_accumulate_bb_b_6d,
150+
* stdlib_ndarray_accumulate_bb_b_7d,
151+
* stdlib_ndarray_accumulate_bb_b_8d,
152+
* stdlib_ndarray_accumulate_bb_b_9d,
153+
* stdlib_ndarray_accumulate_bb_b_10d
154+
* stdlib_ndarray_accumulate_bb_b_nd
155+
* };
156+
*
157+
* // Define a list of ndarray functions using loop blocking:
158+
* ndarrayUnaryAccumulateFcn blocked_functions[] = {
159+
* stdlib_ndarray_accumulate_bb_b_2d_blocked,
160+
* stdlib_ndarray_accumulate_bb_b_3d_blocked,
161+
* stdlib_ndarray_accumulate_bb_b_4d_blocked,
162+
* stdlib_ndarray_accumulate_bb_b_5d_blocked,
163+
* stdlib_ndarray_accumulate_bb_b_6d_blocked,
164+
* stdlib_ndarray_accumulate_bb_b_7d_blocked,
165+
* stdlib_ndarray_accumulate_bb_b_8d_blocked,
166+
* stdlib_ndarray_accumulate_bb_b_9d_blocked,
167+
* stdlib_ndarray_accumulate_bb_b_10d_blocked
168+
* };
169+
*
170+
* // Create a function dispatch object:
171+
* struct ndarrayUnaryAccumulateDispatchObject obj = {
172+
* // Array containing ndarray functions:
173+
* functions,
174+
*
175+
* // Number of ndarray functions:
176+
* 12,
177+
*
178+
* // Array containing ndarray functions using loop blocking:
179+
* blocked_functions,
180+
*
181+
* // Number of ndarray functions using loop blocking:
182+
* 9
183+
* }
184+
*
185+
* // Define a function which performs dispatch:
186+
* int8_t stdlib_ndarray_accumulate_bb_b( struct ndarray *arrays[], void *fcn ) {
187+
* return stdlib_ndarray_unary_accumulate_dispatch( &obj, arrays, fcn );
188+
* }
189+
*
190+
* // ...
191+
*
192+
* // Create ndarrays...
193+
* struct ndarray *x = stdlib_ndarray_allocate( ... );
194+
* if ( x == NULL ) {
195+
* fprintf( stderr, "Error allocating memory.\n" );
196+
* exit( EXIT_FAILURE );
197+
* }
198+
*
199+
* struct ndarray *initial = stdlib_ndarray_allocate( ... );
200+
* if ( y == NULL ) {
201+
* fprintf( stderr, "Error allocating memory.\n" );
202+
* exit( EXIT_FAILURE );
203+
* }
204+
*
205+
* struct ndarray *out = stdlib_ndarray_allocate( ... );
206+
* if ( y == NULL ) {
207+
* fprintf( stderr, "Error allocating memory.\n" );
208+
* exit( EXIT_FAILURE );
209+
* }
210+
*
211+
* // ...
212+
*
213+
* // Define a callback:
214+
* uint8_t add( const uint8_t acc, const uint8_t x ) {
215+
* return acc + x;
216+
* }
217+
*
218+
* // Apply the callback:
219+
* struct ndarray *arrays[] = { x, initial, out };
220+
* int8_t status = stdlib_ndarray_accumulate_bb_b( arrays, (void *)add );
221+
* if ( status != 0 ) {
222+
* fprintf( stderr, "Error during computation.\n" );
223+
* exit( EXIT_FAILURE );
224+
* }
225+
*/
226+
int8_t stdlib_ndarray_unary_accumulate_dispatch( const struct ndarrayUnaryAccumulateDispatchObject *obj, struct ndarray *arrays[], void *fcn ) {
227+
const int64_t *sh1;
228+
struct ndarray *x1;
229+
struct ndarray *x2;
230+
struct ndarray *x3;
231+
int8_t status;
232+
int64_t ndims;
233+
int64_t mab1;
234+
int64_t mib1;
235+
int64_t *s1;
236+
int64_t len;
237+
int64_t bp1;
238+
int8_t io1;
239+
int64_t ns;
240+
int64_t s;
241+
int64_t d;
242+
int64_t i;
243+
244+
// Unpack the arrays:
245+
x1 = arrays[ 0 ];
246+
x2 = arrays[ 1 ];
247+
x3 = arrays[ 3 ];
248+
249+
ndims = stdlib_ndarray_ndims( x1 );
250+
251+
// Determine whether we can avoid iteration altogether...
252+
if ( ndims == 0 ) {
253+
obj->functions[ 0 ]( arrays, fcn );
254+
return 0;
255+
}
256+
sh1 = stdlib_ndarray_shape( x1 );
257+
258+
// Determine the number of elements and the number of singleton dimensions...
259+
len = 1; // number of elements
260+
ns = 0; // number of singleton dimensions
261+
for ( i = 0; i < ndims; i++ ) {
262+
d = sh1[ i ];
263+
264+
// Note that, if one of the dimensions is `0`, the length will be `0`...
265+
len *= d;
266+
267+
// Check whether the current dimension is a singleton dimension...
268+
if ( d == 1 ) {
269+
ns += 1;
270+
}
271+
}
272+
// Check whether we were provided an empty ndarray...
273+
if ( len == 0 ) {
274+
return 0;
275+
}
276+
// Determine whether the ndarray is one-dimensional and thus readily translates to a one-dimensional strided array...
277+
if ( ndims == 1 ) {
278+
obj->functions[ 1 ]( arrays, fcn );
279+
return 0;
280+
}
281+
// Determine whether the ndarray has only **one** non-singleton dimension (e.g., ndims=4, shape=[10,1,1,1]) so that we can treat an ndarray as being equivalent to a one-dimensional strided array...
282+
if ( ns == ndims-1 ) {
283+
// Get the index of the non-singleton dimension...
284+
for ( i = 0; i < ndims; i++ ) {
285+
if ( sh1[ i ] != 1 ) {
286+
break;
287+
}
288+
}
289+
// Remove the singleton dimensions and apply the callback function...
290+
status = stdlib_ndarray_unary_accumulate_1d_squeeze( obj->functions[ 1 ], x1, x2, x3, i, fcn );
291+
if ( status == 0 ) {
292+
return 0;
293+
}
294+
// If we failed, this is probably due to failed memory allocation, so fall through and try again...
295+
}
296+
s1 = stdlib_ndarray_strides( x1 );
297+
io1 = stdlib_ndarray_iteration_order( ndims, s1 ); // +/-1
298+
299+
// Determine whether we can avoid blocked iteration...
300+
if ( io1 != 0 ) {
301+
// Determine the minimum and maximum linear byte indices which are accessible by the array view:
302+
mib1 = stdlib_ndarray_offset( x1 ); // byte offset
303+
mab1 = mib1;
304+
for ( i = 0; i < ndims; i++ ) {
305+
s = s1[ i ]; // units: bytes
306+
if ( s > 0 ) {
307+
mab1 += s * ( sh1[i]-1 );
308+
} else if ( s < 0 ) {
309+
mib1 += s * ( sh1[i]-1 ); // decrements
310+
}
311+
}
312+
bp1 = stdlib_ndarray_bytes_per_element( stdlib_ndarray_dtype( x1 ) );
313+
314+
// Determine whether we can ignore shape (and strides) and treat the ndarray as a linear one-dimensional strided array...
315+
if ( ( len*bp1 ) == ( mab1-mib1+bp1 ) ) {
316+
// Note: the above is equivalent to @stdlib/ndarray/base/assert/is-contiguous, but in-lined so we can retain computed values...
317+
status = stdlib_ndarray_unary_accumulate_1d_flatten( obj->functions[ 1 ], len, x1, io1*bp1, x2, x3, fcn );
318+
if ( status == 0 ) {
319+
return 0;
320+
}
321+
// If we failed, this is probably due to failed memory allocation, so fall through and try again...
322+
}
323+
// The ndarray is non-contiguous, so we cannot directly use one-dimensional array functionality...
324+
325+
// Determine whether we can use simple nested loops...
326+
if ( ndims < (obj->nfunctions) ) {
327+
// So long as iteration always moves in the same direction (i.e., no mixed sign strides), we can leverage cache-optimal (i.e., normal) nested loops without resorting to blocked iteration...
328+
obj->functions[ ndims ]( arrays, fcn );
329+
return 0;
330+
}
331+
// Fall-through to blocked iteration...
332+
}
333+
// At this point, we're either dealing with a non-contiguous n-dimensional array or a high dimensional n-dimensional array, so our only hope is that we can still perform blocked iteration...
334+
335+
// Determine whether we can perform blocked iteration...
336+
if ( ndims <= (obj->nblockedfunctions)+1 ) {
337+
obj->blocked_functions[ ndims-2 ]( arrays, fcn );
338+
return 0;
339+
}
340+
// Fall-through to linear view iteration without regard for how data is stored in memory (i.e., take the slow path)...
341+
obj->functions[ (obj->nfunctions)-1 ]( arrays, fcn );
342+
343+
return 0;
344+
}

0 commit comments

Comments
 (0)