Skip to content

Commit 03696ac

Browse files
committed
feat: add 8d blocked macros
--- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: na - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: na - task: lint_typescript_tests status: na - task: lint_license_headers status: passed ---
1 parent 3764601 commit 03696ac

File tree

1 file changed

+346
-0
lines changed
  • lib/node_modules/@stdlib/ndarray/base/every/include/stdlib/ndarray/base/every/macros

1 file changed

+346
-0
lines changed
Lines changed: 346 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,346 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2025 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
#ifndef STDLIB_NDARRAY_BASE_EVERY_MACROS_8D_BLOCKED_H
20+
#define STDLIB_NDARRAY_BASE_EVERY_MACROS_8D_BLOCKED_H
21+
22+
#include "stdlib/ndarray/base/nullary/macros/constants.h"
23+
#include "stdlib/ndarray/base/nullary/internal/permute.h"
24+
#include "stdlib/ndarray/base/nullary/internal/range.h"
25+
#include "stdlib/ndarray/base/nullary/internal/sort2ins.h"
26+
#include "stdlib/ndarray/base/bytes_per_element.h"
27+
#include "stdlib/ndarray/ctor.h"
28+
#include <stdbool.h>
29+
#include <stdint.h>
30+
#include <string.h>
31+
32+
/**
33+
* Macro containing the preamble for blocked nested loops which operate on elements of an eight-dimensional ndarray.
34+
*
35+
* ## Notes
36+
*
37+
* - Variable naming conventions:
38+
*
39+
* - `sx#`, `pbx#`, `px#`, `ox#`, `nbx#`, and `d@x#` where `#` corresponds to the ndarray argument number, starting at `1`.
40+
* - `S@`, `i@`, `j@`, `o@x#`, and `d@x#` where `@` corresponds to the loop number, with `0` being the innermost loop.
41+
*
42+
* @example
43+
* STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_PREMABLE {
44+
* // Innermost loop body...
45+
* }
46+
* STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_EPILOGUE
47+
*/
48+
#define STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_PREAMBLE \
49+
const struct ndarray *x1 = arrays[ 0 ]; \
50+
const struct ndarray *x2 = arrays[ 1 ]; \
51+
bool *px2 = stdlib_ndarray_data( x2 ); \
52+
int64_t shape[8]; \
53+
int64_t sx1[8]; \
54+
int64_t idx[8]; \
55+
int64_t tmp[8]; \
56+
int64_t bsize; \
57+
uint8_t *pbx1; \
58+
uint8_t *px1; \
59+
int64_t d0x1; \
60+
int64_t d1x1; \
61+
int64_t d2x1; \
62+
int64_t d3x1; \
63+
int64_t d4x1; \
64+
int64_t d5x1; \
65+
int64_t d6x1; \
66+
int64_t d7x1; \
67+
int64_t o1x1; \
68+
int64_t o2x1; \
69+
int64_t o3x1; \
70+
int64_t o4x1; \
71+
int64_t o5x1; \
72+
int64_t o6x1; \
73+
int64_t o7x1; \
74+
int64_t nbx1; \
75+
int64_t ox1; \
76+
int64_t s0; \
77+
int64_t s1; \
78+
int64_t s2; \
79+
int64_t s3; \
80+
int64_t s4; \
81+
int64_t s5; \
82+
int64_t s6; \
83+
int64_t s7; \
84+
int64_t i0; \
85+
int64_t i1; \
86+
int64_t i2; \
87+
int64_t i3; \
88+
int64_t i4; \
89+
int64_t i5; \
90+
int64_t i6; \
91+
int64_t i7; \
92+
int64_t j0; \
93+
int64_t j1; \
94+
int64_t j2; \
95+
int64_t j3; \
96+
int64_t j4; \
97+
int64_t j5; \
98+
int64_t j6; \
99+
int64_t j7; \
100+
/* Copy strides to prevent mutation to the original ndarray: */ \
101+
memcpy( sx1, stdlib_ndarray_strides( x1 ), sizeof sx1 ); \
102+
/* Create a loop interchange index array for loop order permutation: */ \
103+
stdlib_ndarray_base_nullary_internal_range( 8, idx ); \
104+
/* Sort the input array strides in increasing order (of magnitude): */ \
105+
stdlib_ndarray_base_nullary_internal_sort2ins( 8, sx1, idx ); \
106+
/* Permute the shape (avoiding mutation) according to loop order: */ \
107+
stdlib_ndarray_base_nullary_internal_permute( 8, stdlib_ndarray_shape( x1 ), idx, tmp ); \
108+
memcpy( shape, tmp, sizeof shape ); \
109+
/* Determine the block size... */ \
110+
nbx1 = stdlib_ndarray_bytes_per_element( stdlib_ndarray_dtype( x1 ) ); \
111+
if ( nbx1 == 0 ) { \
112+
bsize = STDLIB_NDARRAY_EVERY_BLOCK_SIZE_IN_ELEMENTS; \
113+
} else { \
114+
bsize = STDLIB_NDARRAY_EVERY_BLOCK_SIZE_IN_BYTES / nbx1; \
115+
} \
116+
/* Cache a pointer to the ndarray buffer... */ \
117+
pbx1 = stdlib_ndarray_data( x1 ); \
118+
/* Cache a byte offset to the first indexed element... */ \
119+
ox1 = stdlib_ndarray_offset( x1 ); \
120+
/* Set a pointer to the first indexed element of the output ndarray... */ \
121+
px2 += stdlib_ndarray_offset( x2 ); \
122+
/* Cache the offset increment for the innermost loop... */ \
123+
d0x1 = sx1[0]; \
124+
/* Iterate over blocks... */ \
125+
for ( j7 = shape[7]; j7 > 0; ) { \
126+
if ( j7 < bsize ) { \
127+
s7 = j7; \
128+
j7 = 0; \
129+
} else { \
130+
s7 = bsize; \
131+
j7 -= bsize; \
132+
} \
133+
o7x1 = ox1 + ( j7*sx1[7] ); \
134+
for ( j6 = shape[6]; j6 > 0; ) { \
135+
if ( j6 < bsize ) { \
136+
s6 = j6; \
137+
j6 = 0; \
138+
} else { \
139+
s6 = bsize; \
140+
j6 -= bsize; \
141+
} \
142+
d7x1 = sx1[7] - ( s6*sx1[6] ); \
143+
o6x1 = o7x1 + ( j6*sx1[6] ); \
144+
for ( j5 = shape[5]; j5 > 0; ) { \
145+
if ( j5 < bsize ) { \
146+
s5 = j5; \
147+
j5 = 0; \
148+
} else { \
149+
s5 = bsize; \
150+
j5 -= bsize; \
151+
} \
152+
d6x1 = sx1[6] - ( s5*sx1[5] ); \
153+
o5x1 = o6x1 + ( j5*sx1[5] ); \
154+
for ( j4 = shape[4]; j4 > 0; ) { \
155+
if ( j4 < bsize ) { \
156+
s4 = j4; \
157+
j4 = 0; \
158+
} else { \
159+
s4 = bsize; \
160+
j4 -= bsize; \
161+
} \
162+
d5x1 = sx1[5] - ( s4*sx1[4] ); \
163+
o4x1 = o5x1 + ( j4*sx1[4] ); \
164+
for ( j3 = shape[3]; j3 > 0; ) { \
165+
if ( j3 < bsize ) { \
166+
s3 = j3; \
167+
j3 = 0; \
168+
} else { \
169+
s3 = bsize; \
170+
j3 -= bsize; \
171+
} \
172+
d4x1 = sx1[4] - ( s3*sx1[3] ); \
173+
o3x1 = o4x1 + ( j3*sx1[3] ); \
174+
for ( j2 = shape[2]; j2 > 0; ) { \
175+
if ( j2 < bsize ) { \
176+
s2 = j2; \
177+
j2 = 0; \
178+
} else { \
179+
s2 = bsize; \
180+
j2 -= bsize; \
181+
} \
182+
d3x1 = sx1[3] - ( s2*sx1[2] ); \
183+
o2x1 = o3x1 + ( j2*sx1[2] ); \
184+
for ( j1 = shape[1]; j1 > 0; ) { \
185+
if ( j1 < bsize ) { \
186+
s1 = j1; \
187+
j1 = 0; \
188+
} else { \
189+
s1 = bsize; \
190+
j1 -= bsize; \
191+
} \
192+
d2x1 = sx1[2] - ( s1*sx1[1] ); \
193+
o1x1 = o2x1 + ( j1*sx1[1] ); \
194+
for ( j0 = shape[0]; j0 > 0; ) { \
195+
if ( j0 < bsize ) { \
196+
s0 = j0; \
197+
j0 = 0; \
198+
} else { \
199+
s0 = bsize; \
200+
j0 -= bsize; \
201+
} \
202+
/* Compute a pointer to the first ndarray element in the current block... */ \
203+
px1 = pbx1 + o1x1 + ( j0*sx1[0] ); \
204+
/* Compute the loop offset increment... */ \
205+
d1x1 = sx1[1] - ( s0*sx1[0] ); \
206+
/* Iterate over the ndarray dimensions... */ \
207+
for ( i7 = 0; i7 < s7; i7++, px1 += d7x1 ) { \
208+
for ( i6 = 0; i6 < s6; i6++, px1 += d6x1 ) { \
209+
for ( i5 = 0; i5 < s5; i5++, px1 += d5x1 ) { \
210+
for ( i4 = 0; i4 < s4; i4++, px1 += d4x1 ) { \
211+
for ( i3 = 0; i3 < s3; i3++, px1 += d3x1 ) { \
212+
for ( i2 = 0; i2 < s2; i2++, px1 += d2x1 ) { \
213+
for ( i1 = 0; i1 < s1; i1++, px1 += d1x1 ) { \
214+
for ( i0 = 0; i0 < s0; i0++, px1 += d0x1 )
215+
216+
/**
217+
* Macro containing the epilogue for blocked nested loops which operate on elements of an eight-dimensional ndarray.
218+
*
219+
* @example
220+
* STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_PREMABLE {
221+
* // Innermost loop body...
222+
* }
223+
* STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_EPILOGUE
224+
*/
225+
#define STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_EPILOGUE \
226+
} \
227+
} \
228+
} \
229+
} \
230+
} \
231+
} \
232+
} \
233+
} \
234+
} \
235+
} \
236+
} \
237+
} \
238+
} \
239+
} \
240+
} \
241+
*px2 = true;
242+
243+
/**
244+
* Macro for a blocked eight-dimensional ndarray loop which inlines an expression.
245+
*
246+
* ## Notes
247+
*
248+
* - Retrieves each input ndarray element according to type `tin` via the pointer `px1` as `in1`.
249+
* - Expects a provided expression to operate on `tin in1`.
250+
* - Stores the final result in an output ndarray via the pointer `px2`.
251+
*
252+
* @param tin input type
253+
* @param expr expression to inline
254+
*
255+
* @example
256+
* STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_INLINE( double, in1 )
257+
*/
258+
#define STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_INLINE( tin, expr ) \
259+
STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_PREAMBLE { \
260+
const tin in1 = *(tin *)px1; \
261+
if ( !( expr ) ) { \
262+
*px2 = false; \
263+
return; \
264+
} \
265+
} \
266+
STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_EPILOGUE
267+
268+
/**
269+
* Macro for a blocked eight-dimensional ndarray loop which invokes a callback.
270+
*
271+
* ## Notes
272+
*
273+
* - Retrieves each ndarray element according to type `tin` via the pointer `px1`.
274+
* - Stores the final result in an output ndarray via the pointer `px2`.
275+
*
276+
* @param tin input type
277+
*
278+
* @example
279+
* // e.g., d_x
280+
* STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_CLBK( double )
281+
*/
282+
#define STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_CLBK( tin ) \
283+
STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_PREAMBLE { \
284+
const tin x = *(tin *)px1; \
285+
if ( !( f( x ) ) ) { \
286+
*px2 = false; \
287+
return; \
288+
} \
289+
} \
290+
STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_EPILOGUE
291+
292+
/**
293+
* Macro for a blocked eight-dimensional ndarray loop which invokes a callback requiring arguments be explicitly cast to a different type.
294+
*
295+
* ## Notes
296+
*
297+
* - Retrieves each ndarray element according to type `tin` via the pointer `px1`.
298+
* - Explicitly casts each function argument to `fin`.
299+
* - Stores the final result in an output ndarray via the pointer `px2`.
300+
*
301+
* @param tin input type
302+
* @param fin callback argument type
303+
*
304+
* @example
305+
* // e.g., f_x_as_d_x
306+
* STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_CLBK_ARG_CAST( float, double )
307+
*/
308+
#define STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_CLBK_ARG_CAST( tin, fin ) \
309+
STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_PREAMBLE { \
310+
const tin x = *(tin *)px1; \
311+
if ( !( f( (fin)x ) ) ) { \
312+
*px2 = false; \
313+
return; \
314+
} \
315+
} \
316+
STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_EPILOGUE
317+
318+
/**
319+
* Macro for a blocked eight-dimensional ndarray loop which invokes a callback requiring arguments be cast to a different type via casting functions.
320+
*
321+
* ## Notes
322+
*
323+
* - Retrieves each ndarray element according to type `tin` via a pointer `px1`.
324+
* - Explicitly casts each function argument via `cin`.
325+
* - Stores the final result in an output ndarray via the pointer `px2`.
326+
*
327+
* @param tin input type
328+
* @param cin input casting function
329+
*
330+
* @example
331+
* #include "stdlib/complex/float64/ctor.h"
332+
*
333+
* // e.g., f_x_as_z_x
334+
* STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_CLBK_ARG_CAST_FCN( float, stdlib_complex128_from_float32 )
335+
*/
336+
#define STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_CLBK_ARG_CAST_FCN( tin, cin ) \
337+
STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_PREAMBLE { \
338+
const tin x = *(tin *)px1; \
339+
if ( !( f( cin( x ) ) ) ) { \
340+
*px2 = false; \
341+
return; \
342+
} \
343+
} \
344+
STDLIB_NDARRAY_EVERY_8D_BLOCKED_LOOP_EPILOGUE
345+
346+
#endif // !STDLIB_NDARRAY_BASE_EVERY_MACROS_8D_BLOCKED_H

0 commit comments

Comments
 (0)