Skip to content

Commit 3677e41

Browse files
committed
feat: add dispatch implementation
--- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: na - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: passed - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: na - task: lint_typescript_tests status: na - task: lint_license_headers status: passed ---
1 parent 2c5c512 commit 3677e41

File tree

1 file changed

+328
-0
lines changed
  • lib/node_modules/@stdlib/ndarray/base/every/src

1 file changed

+328
-0
lines changed
Lines changed: 328 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,328 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2025 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
#include "stdlib/ndarray/base/every/dispatch_object.h"
20+
#include "stdlib/ndarray/base/every/typedefs.h"
21+
#include "stdlib/ndarray/base/iteration_order.h"
22+
#include "stdlib/ndarray/base/bytes_per_element.h"
23+
#include "stdlib/ndarray/ctor.h"
24+
#include <stdint.h>
25+
#include <stddef.h>
26+
27+
/**
28+
* Tests whether every element is truthy in an n-dimensional input ndarray having `ndims-1` singleton dimensions and assigns results to elements in a zero-dimensional output ndarray.
29+
*
30+
* ## Notes
31+
*
32+
* - If able to successfully perform operation, the function returns `0`; otherwise, the function returns an error code.
33+
*
34+
* @param f ndarray function
35+
* @param x1 input ndarray
36+
* @param x2 output ndarray
37+
* @param i index of the non-singleton dimension
38+
* @param data function data (e.g., a callback)
39+
* @return status code
40+
*/
41+
static int8_t stdlib_ndarray_every_1d_squeeze( const ndarrayEveryFcn f, struct ndarray *x1, struct ndarray *x2, const int64_t i, void *data ) {
42+
int64_t sh[] = { stdlib_ndarray_shape( x1 )[ i ] };
43+
44+
// Shallow copy and reshape the array...
45+
int64_t sx1[] = { stdlib_ndarray_strides( x1 )[ i ] };
46+
struct ndarray *x1c = stdlib_ndarray_allocate(
47+
stdlib_ndarray_dtype( x1 ),
48+
stdlib_ndarray_data( x1 ),
49+
1,
50+
sh,
51+
sx1,
52+
stdlib_ndarray_offset( x1 ),
53+
stdlib_ndarray_order( x1 ),
54+
stdlib_ndarray_index_mode( x1 ),
55+
stdlib_ndarray_nsubmodes( x1 ),
56+
stdlib_ndarray_submodes( x1 )
57+
);
58+
if ( x1c == NULL ) {
59+
return -1;
60+
}
61+
// Perform computation:
62+
struct ndarray *arrays[] = { x1c, x2 };
63+
int8_t status = f( arrays, data );
64+
65+
// Free allocated memory:
66+
stdlib_ndarray_free( x1c );
67+
68+
return status;
69+
}
70+
71+
/**
72+
* Tests whether every element is truthy in a flattened n-dimensional input ndarray and assigns results to elements in a zero-dimensional output ndarray.
73+
*
74+
* ## Notes
75+
*
76+
* - If able to successfully perform operation, the function returns `0`; otherwise, the function returns an error code.
77+
*
78+
* @param f ndarray function
79+
* @param N number of elements
80+
* @param x1 input ndarray
81+
* @param s1 input ndarray stride length
82+
* @param x2 output ndarray
83+
* @param data function data (e.g., a callback)
84+
* @return status code
85+
*/
86+
static int8_t stdlib_ndarray_every_1d_flatten( const ndarrayEveryFcn f, const int64_t N, struct ndarray *x1, const int64_t s1, struct ndarray *x2, void *data ) {
87+
// Define the (flattened) strided array shape:
88+
int64_t sh[] = { N };
89+
90+
// Shallow copy and reshape the array...
91+
int64_t sx1[] = { s1 };
92+
struct ndarray *x1c = stdlib_ndarray_allocate(
93+
stdlib_ndarray_dtype( x1 ),
94+
stdlib_ndarray_data( x1 ),
95+
1,
96+
sh,
97+
sx1,
98+
stdlib_ndarray_offset( x1 ),
99+
stdlib_ndarray_order( x1 ),
100+
stdlib_ndarray_index_mode( x1 ),
101+
stdlib_ndarray_nsubmodes( x1 ),
102+
stdlib_ndarray_submodes( x1 )
103+
);
104+
if ( x1c == NULL ) {
105+
return -1;
106+
}
107+
// Perform computation:
108+
struct ndarray *arrays[] = { x1c, x2 };
109+
int8_t status = f( arrays, data );
110+
111+
// Free allocated memory:
112+
stdlib_ndarray_free( x1c );
113+
114+
return status;
115+
}
116+
117+
/**
118+
* Dispatches to an ndarray function according to the dimensionality of provided ndarray arguments.
119+
*
120+
* ## Notes
121+
*
122+
* - If able to successfully dispatch, the function returns `0`; otherwise, the function returns an error code.
123+
*
124+
* @param obj object comprised of dispatch tables containing ndarray functions
125+
* @param arrays array whose first element is a pointer to an input ndarray and whose last element is a pointer to an output ndarray
126+
* @param data function data (e.g., a callback)
127+
* @return status code
128+
*
129+
* @example
130+
* #include "stdlib/ndarray/base/every/dispatch.h"
131+
* #include "stdlib/ndarray/base/every/dispatch_object.h"
132+
* #include "stdlib/ndarray/base/every/typedefs.h"
133+
* #include "stdlib/ndarray/base/every/b.h"
134+
* #include "stdlib/ndarray/ctor.h"
135+
* #include <stdint.h>
136+
* #include <stdlib.h>
137+
* #include <stdio.h>
138+
*
139+
* // Define a list of ndarray functions:
140+
* ndarrayEveryFcn functions[] = {
141+
* stdlib_ndarray_every_b_x_0d,
142+
* stdlib_ndarray_every_b_x_1d,
143+
* stdlib_ndarray_every_b_x_2d,
144+
* stdlib_ndarray_every_b_x_3d,
145+
* stdlib_ndarray_every_b_x_4d,
146+
* stdlib_ndarray_every_b_x_5d,
147+
* stdlib_ndarray_every_b_x_6d,
148+
* stdlib_ndarray_every_b_x_7d,
149+
* stdlib_ndarray_every_b_x_8d,
150+
* stdlib_ndarray_every_b_x_9d,
151+
* stdlib_ndarray_every_b_x_10d
152+
* stdlib_ndarray_every_b_x_nd
153+
* };
154+
*
155+
* // Define a list of ndarray functions using loop blocking...
156+
* ndarrayEveryFcn blocked_functions[] = {
157+
* stdlib_ndarray_every_b_x_2d_blocked,
158+
* stdlib_ndarray_every_b_x_3d_blocked,
159+
* stdlib_ndarray_every_b_x_4d_blocked,
160+
* stdlib_ndarray_every_b_x_5d_blocked,
161+
* stdlib_ndarray_every_b_x_6d_blocked,
162+
* stdlib_ndarray_every_b_x_7d_blocked,
163+
* stdlib_ndarray_every_b_x_8d_blocked,
164+
* stdlib_ndarray_every_b_x_9d_blocked,
165+
* stdlib_ndarray_every_b_x_10d_blocked
166+
* };
167+
*
168+
* // Create a function dispatch object:
169+
* struct ndarrayNullaryDispatchObject obj = {
170+
* // Array containing ndarray functions:
171+
* functions,
172+
*
173+
* // Number of ndarray functions:
174+
* 12,
175+
*
176+
* // Array containing ndarray functions using loop blocking:
177+
* blocked_functions,
178+
*
179+
* // Number of ndarray functions using loop blocking:
180+
* 9
181+
* }
182+
*
183+
* // Define a function which performs dispatch:
184+
* int8_t stdlib_ndarray_every_b_x( struct ndarray *arrays[], void *data ) {
185+
* return stdlib_ndarray_every_dispatch( &obj, arrays, data );
186+
* }
187+
*
188+
* // ...
189+
*
190+
* // Create ndarrays:
191+
* struct ndarray *x = stdlib_ndarray_allocate( ... );
192+
* if ( x == NULL ) {
193+
* fprintf( stderr, "Error allocating memory.\n" );
194+
* exit( EXIT_FAILURE );
195+
* }
196+
*
197+
* struct ndarray *y = stdlib_ndarray_allocate( ... );
198+
* if ( y == NULL ) {
199+
* fprintf( stderr, "Error allocating memory.\n" );
200+
* exit( EXIT_FAILURE );
201+
* }
202+
*
203+
* // ...
204+
*
205+
* // Perform computation:
206+
* struct ndarray *arrays[] = { x, y };
207+
* int8_t status = stdlib_ndarray_every_b_x( arrays, NULL );
208+
* if ( status != 0 ) {
209+
* fprintf( stderr, "Error during computation.\n" );
210+
* exit( EXIT_FAILURE );
211+
* }
212+
*/
213+
int8_t stdlib_ndarray_every_dispatch( const struct ndarrayEveryDispatchObject *obj, struct ndarray *arrays[], void *data ) {
214+
const int64_t *sh1;
215+
struct ndarray *x1;
216+
struct ndarray *x2;
217+
int8_t status;
218+
int64_t ndims;
219+
int64_t mab1;
220+
int64_t mib1;
221+
int64_t *s1;
222+
int64_t len;
223+
int64_t bp1;
224+
int8_t io1;
225+
int64_t ns;
226+
int64_t s;
227+
int64_t d;
228+
int64_t i;
229+
230+
// Unpack the arrays:
231+
x1 = arrays[ 0 ];
232+
x2 = arrays[ 1 ];
233+
234+
// Determine whether we can avoid iteration altogether...
235+
ndims = stdlib_ndarray_ndims( x1 );
236+
if ( ndims == 0 ) {
237+
obj->functions[ 0 ]( arrays, data );
238+
return 0;
239+
}
240+
sh1 = stdlib_ndarray_shape( x1 );
241+
242+
// Compute the number of elements and the number of singleton dimensions...
243+
len = 1; // number of elements
244+
ns = 0; // number of singleton dimensions
245+
for ( i = 0; i < ndims; i++ ) {
246+
d = sh1[ i ];
247+
248+
// Note that, if one of the dimensions is `0`, the length will be `0`...
249+
len *= d;
250+
251+
// Check whether the current dimension is a singleton dimension...
252+
if ( d == 1 ) {
253+
ns += 1;
254+
}
255+
}
256+
// Check whether we were provided an empty ndarray...
257+
if ( len == 0 ) {
258+
return 0;
259+
}
260+
// Determine whether the input ndarray is one-dimensional and thus readily translates to a one-dimensional strided array...
261+
if ( ndims == 1 ) {
262+
obj->functions[ 1 ]( arrays, data );
263+
return 0;
264+
}
265+
// Determine whether the input ndarray has only **one** non-singleton dimension (e.g., ndims=4, shape=[10,1,1,1]) so that we can treat the ndarray as being equivalent to a one-dimensional strided array...
266+
if ( ns == ndims-1 ) {
267+
// Get the index of the non-singleton dimension...
268+
for ( i = 0; i < ndims; i++ ) {
269+
if ( sh1[ i ] != 1 ) {
270+
break;
271+
}
272+
}
273+
// Remove the singleton dimensions and perform computation...
274+
status = stdlib_ndarray_every_1d_squeeze( obj->functions[ 1 ], x1, x2, i, data );
275+
if ( status == 0 ) {
276+
return 0;
277+
}
278+
// If we failed, this is probably due to failed memory allocation, so fall through and try again...
279+
}
280+
s1 = stdlib_ndarray_strides( x1 );
281+
io1 = stdlib_ndarray_iteration_order( ndims, s1 ); // +/-1
282+
283+
// Determine whether we can avoid blocked iteration...
284+
if ( io1 != 0 ) {
285+
// Determine the minimum and maximum linear byte indices which are accessible by the array view:
286+
mib1 = stdlib_ndarray_offset( x1 ); // byte offset
287+
mab1 = mib1;
288+
for ( i = 0; i < ndims; i++ ) {
289+
s = s1[ i ]; // units: bytes
290+
if ( s > 0 ) {
291+
mab1 += s * ( sh1[i]-1 );
292+
} else if ( s < 0 ) {
293+
mib1 += s * ( sh1[i]-1 ); // decrements
294+
}
295+
}
296+
bp1 = stdlib_ndarray_bytes_per_element( stdlib_ndarray_dtype( x1 ) );
297+
298+
// Determine whether we can ignore shape (and strides) and treat the input ndarray as a linear one-dimensional strided array...
299+
if ( ( len*bp1 ) == ( mab1-mib1+bp1 ) ) {
300+
// Note: the above is equivalent to @stdlib/ndarray/base/assert/is-contiguous, but in-lined so we can retain computed values...
301+
status = stdlib_ndarray_every_1d_flatten( obj->functions[ 1 ], len, x1, io1*bp1, x2, data );
302+
if ( status == 0 ) {
303+
return 0;
304+
}
305+
// If we failed, this is probably due to failed memory allocation, so fall through and try again...
306+
}
307+
// The input ndarray is non-contiguous, so we cannot directly use one-dimensional array functionality...
308+
309+
// Determine whether we can use simple nested loops...
310+
if ( ndims < (obj->nfunctions) ) {
311+
// So long as iteration for the array always moves in the same direction (i.e., no mixed sign strides), we can leverage cache-optimal (i.e., normal) nested loops without resorting to blocked iteration...
312+
obj->functions[ ndims ]( arrays, data );
313+
return 0;
314+
}
315+
// Fall-through to blocked iteration...
316+
}
317+
// At this point, we're either dealing with either a non-contiguous n-dimensional array or a high dimensional n-dimensional array, so our only hope is that we can still perform blocked iteration...
318+
319+
// Determine whether we can perform blocked iteration...
320+
if ( ndims <= (obj->nblockedfunctions)+1 ) {
321+
obj->blocked_functions[ ndims-2 ]( arrays, data );
322+
return 0;
323+
}
324+
// Fall-through to linear view iteration without regard for how data is stored in memory (i.e., take the slow path)...
325+
obj->functions[ (obj->nfunctions)-1 ]( arrays, data );
326+
327+
return 0;
328+
}

0 commit comments

Comments
 (0)