Skip to content

Commit 907c8f4

Browse files
committed
feat: add 8d blocked kernel
--- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: passed - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: na - task: lint_typescript_tests status: na - task: lint_license_headers status: passed ---
1 parent 8500ee3 commit 907c8f4

File tree

1 file changed

+397
-0
lines changed

1 file changed

+397
-0
lines changed
Lines changed: 397 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,397 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2025 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
/* eslint-disable max-depth, max-len, max-statements, max-lines-per-function */
20+
21+
'use strict';
22+
23+
// MODULES //
24+
25+
var loopOrder = require( '@stdlib/ndarray/base/binary-loop-interchange-order' );
26+
var blockSize = require( '@stdlib/ndarray/base/binary-tiling-block-size' );
27+
28+
29+
// MAIN //
30+
31+
/**
32+
* Applies a binary callback to elements in eight-dimensional input ndarrays and assigns results to elements in an equivalently shaped output ndarray via loop blocking.
33+
*
34+
* @private
35+
* @param {Object} x - object containing input ndarray meta data
36+
* @param {string} x.dtype - data type
37+
* @param {Collection} x.data - data buffer
38+
* @param {NonNegativeIntegerArray} x.shape - dimensions
39+
* @param {IntegerArray} x.strides - stride lengths
40+
* @param {NonNegativeInteger} x.offset - index offset
41+
* @param {string} x.order - specifies whether `x` is row-major (C-style) or column-major (Fortran-style)
42+
* @param {Object} y - object containing input ndarray meta data
43+
* @param {string} y.dtype - data type
44+
* @param {Collection} y.data - data buffer
45+
* @param {NonNegativeIntegerArray} y.shape - dimensions
46+
* @param {IntegerArray} y.strides - stride lengths
47+
* @param {NonNegativeInteger} y.offset - index offset
48+
* @param {string} y.order - specifies whether `y` is row-major (C-style) or column-major (Fortran-style)
49+
* @param {Object} z - object containing output ndarray meta data
50+
* @param {string} z.dtype - data type
51+
* @param {Collection} z.data - data buffer
52+
* @param {NonNegativeIntegerArray} z.shape - dimensions
53+
* @param {IntegerArray} z.strides - stride lengths
54+
* @param {NonNegativeInteger} z.offset - index offset
55+
* @param {string} z.order - specifies whether `z` is row-major (C-style) or column-major (Fortran-style)
56+
* @param {Callback} fcn - binary callback
57+
*
58+
* @example
59+
* var Float64Array = require( '@stdlib/array/float64' );
60+
*
61+
* function fcn( x, y ) {
62+
* return x + y;
63+
* }
64+
*
65+
* // Create data buffers:
66+
* var xbuf = new Float64Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 ] );
67+
* var ybuf = new Float64Array( [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] );
68+
* var zbuf = new Float64Array( 12 );
69+
*
70+
* // Define the shape of the input and output arrays:
71+
* var shape = [ 1, 1, 1, 1, 1, 3, 2, 2 ];
72+
*
73+
* // Define the array strides:
74+
* var sx = [ 12, 12, 12, 12, 12, 4, 2, 1 ];
75+
* var sy = [ 12, 12, 12, 12, 12, 4, 2, 1 ];
76+
* var sz = [ 12, 12, 12, 12, 12, 4, 2, 1 ];
77+
*
78+
* // Define the index offsets:
79+
* var ox = 0;
80+
* var oy = 0;
81+
* var oz = 0;
82+
*
83+
* // Create the input and output ndarray-like objects:
84+
* var x = {
85+
* 'dtype': 'float64',
86+
* 'data': xbuf,
87+
* 'shape': shape,
88+
* 'strides': sx,
89+
* 'offset': ox,
90+
* 'order': 'row-major'
91+
* };
92+
* var y = {
93+
* 'dtype': 'float64',
94+
* 'data': ybuf,
95+
* 'shape': shape,
96+
* 'strides': sy,
97+
* 'offset': oy,
98+
* 'order': 'row-major'
99+
* };
100+
* var z = {
101+
* 'dtype': 'float64',
102+
* 'data': zbuf,
103+
* 'shape': shape,
104+
* 'strides': sz,
105+
* 'offset': oz,
106+
* 'order': 'row-major'
107+
* };
108+
*
109+
* // Apply the binary function:
110+
* blockedbinary8d( x, y, z, fcn );
111+
*
112+
* console.log( z.data );
113+
* // => <Float64Array>[ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 ]
114+
*/
115+
function blockedbinary8d( x, y, z, fcn ) {
116+
var bsize;
117+
var xbuf;
118+
var ybuf;
119+
var zbuf;
120+
var dx0;
121+
var dx1;
122+
var dx2;
123+
var dx3;
124+
var dx4;
125+
var dx5;
126+
var dx6;
127+
var dx7;
128+
var dy0;
129+
var dy1;
130+
var dy2;
131+
var dy3;
132+
var dy4;
133+
var dy5;
134+
var dy6;
135+
var dy7;
136+
var dz0;
137+
var dz1;
138+
var dz2;
139+
var dz3;
140+
var dz4;
141+
var dz5;
142+
var dz6;
143+
var dz7;
144+
var ox1;
145+
var ox2;
146+
var ox3;
147+
var ox4;
148+
var ox5;
149+
var ox6;
150+
var ox7;
151+
var oy1;
152+
var oy2;
153+
var oy3;
154+
var oy4;
155+
var oy5;
156+
var oy6;
157+
var oy7;
158+
var oz1;
159+
var oz2;
160+
var oz3;
161+
var oz4;
162+
var oz5;
163+
var oz6;
164+
var oz7;
165+
var sh;
166+
var s0;
167+
var s1;
168+
var s2;
169+
var s3;
170+
var s4;
171+
var s5;
172+
var s6;
173+
var s7;
174+
var sx;
175+
var sy;
176+
var sz;
177+
var ox;
178+
var oy;
179+
var oz;
180+
var ix;
181+
var iy;
182+
var iz;
183+
var i0;
184+
var i1;
185+
var i2;
186+
var i3;
187+
var i4;
188+
var i5;
189+
var i6;
190+
var i7;
191+
var j0;
192+
var j1;
193+
var j2;
194+
var j3;
195+
var j4;
196+
var j5;
197+
var j6;
198+
var j7;
199+
var o;
200+
201+
// Note on variable naming convention: s#, dx#, dy#, dz#, i#, j# where # corresponds to the loop number, with `0` being the innermost loop...
202+
203+
// Resolve the loop interchange order:
204+
o = loopOrder( x.shape, x.strides, y.strides, z.strides );
205+
sh = o.sh;
206+
sx = o.sx;
207+
sy = o.sy;
208+
sz = o.sz;
209+
210+
// Determine the block size:
211+
bsize = blockSize( x.dtype, y.dtype, z.dtype );
212+
213+
// Cache the indices of the first indexed elements in the respective ndarrays...
214+
ox = x.offset;
215+
oy = y.offset;
216+
oz = z.offset;
217+
218+
// Cache references to the input and output ndarray buffers...
219+
xbuf = x.data;
220+
ybuf = y.data;
221+
zbuf = z.data;
222+
223+
// Cache offset increments for the innermost loop...
224+
dx0 = sx[0];
225+
dy0 = sy[0];
226+
dz0 = sz[0];
227+
228+
// Iterate over blocks...
229+
for ( j7 = sh[7]; j7 > 0; ) {
230+
if ( j7 < bsize ) {
231+
s7 = j7;
232+
j7 = 0;
233+
} else {
234+
s7 = bsize;
235+
j7 -= bsize;
236+
}
237+
ox7 = ox + ( j7*sx[7] );
238+
oy7 = oy + ( j7*sy[7] );
239+
oz7 = oz + ( j7*sz[7] );
240+
for ( j6 = sh[6]; j6 > 0; ) {
241+
if ( j6 < bsize ) {
242+
s6 = j6;
243+
j6 = 0;
244+
} else {
245+
s6 = bsize;
246+
j6 -= bsize;
247+
}
248+
dx7 = sx[7] - ( s6*sx[6] );
249+
dy7 = sy[7] - ( s6*sy[6] );
250+
dz7 = sz[7] - ( s6*sz[6] );
251+
ox6 = ox7 + ( j6*sx[6] );
252+
oy6 = oy7 + ( j6*sy[6] );
253+
oz6 = oz7 + ( j6*sz[6] );
254+
for ( j5 = sh[5]; j5 > 0; ) {
255+
if ( j5 < bsize ) {
256+
s5 = j5;
257+
j5 = 0;
258+
} else {
259+
s5 = bsize;
260+
j5 -= bsize;
261+
}
262+
dx6 = sx[6] - ( s5*sx[5] );
263+
dy6 = sy[6] - ( s5*sy[5] );
264+
dz6 = sz[6] - ( s5*sz[5] );
265+
ox5 = ox6 + ( j5*sx[5] );
266+
oy5 = oy6 + ( j5*sy[5] );
267+
oz5 = oz6 + ( j5*sz[5] );
268+
for ( j4 = sh[4]; j4 > 0; ) {
269+
if ( j4 < bsize ) {
270+
s4 = j4;
271+
j4 = 0;
272+
} else {
273+
s4 = bsize;
274+
j4 -= bsize;
275+
}
276+
dx5 = sx[5] - ( s4*sx[4] );
277+
dy5 = sy[5] - ( s4*sy[4] );
278+
dz5 = sz[5] - ( s4*sz[4] );
279+
ox4 = ox5 + ( j4*sx[4] );
280+
oy4 = oy5 + ( j4*sy[4] );
281+
oz4 = oz5 + ( j4*sz[4] );
282+
for ( j3 = sh[3]; j3 > 0; ) {
283+
if ( j3 < bsize ) {
284+
s3 = j3;
285+
j3 = 0;
286+
} else {
287+
s3 = bsize;
288+
j3 -= bsize;
289+
}
290+
dx4 = sx[4] - ( s3*sx[3] );
291+
dy4 = sy[4] - ( s3*sy[3] );
292+
dz4 = sz[4] - ( s3*sz[3] );
293+
ox3 = ox4 + ( j3*sx[3] );
294+
oy3 = oy4 + ( j3*sy[3] );
295+
oz3 = oz4 + ( j3*sz[3] );
296+
for ( j2 = sh[2]; j2 > 0; ) {
297+
if ( j2 < bsize ) {
298+
s2 = j2;
299+
j2 = 0;
300+
} else {
301+
s2 = bsize;
302+
j2 -= bsize;
303+
}
304+
dx3 = sx[3] - ( s2*sx[2] );
305+
dy3 = sy[3] - ( s2*sy[2] );
306+
dz3 = sz[3] - ( s2*sz[2] );
307+
ox2 = ox3 + ( j2*sx[2] );
308+
oy2 = oy3 + ( j2*sy[2] );
309+
oz2 = oz3 + ( j2*sz[2] );
310+
for ( j1 = sh[1]; j1 > 0; ) {
311+
if ( j1 < bsize ) {
312+
s1 = j1;
313+
j1 = 0;
314+
} else {
315+
s1 = bsize;
316+
j1 -= bsize;
317+
}
318+
dx2 = sx[2] - ( s1*sx[1] );
319+
dy2 = sy[2] - ( s1*sy[1] );
320+
dz2 = sz[2] - ( s1*sz[1] );
321+
ox1 = ox2 + ( j1*sx[1] );
322+
oy1 = oy2 + ( j1*sy[1] );
323+
oz1 = oz2 + ( j1*sz[1] );
324+
for ( j0 = sh[0]; j0 > 0; ) {
325+
if ( j0 < bsize ) {
326+
s0 = j0;
327+
j0 = 0;
328+
} else {
329+
s0 = bsize;
330+
j0 -= bsize;
331+
}
332+
// Compute index offsets for the first input and output ndarray elements in the current block...
333+
ix = ox1 + ( j0*sx[0] );
334+
iy = oy1 + ( j0*sy[0] );
335+
iz = oz1 + ( j0*sz[0] );
336+
337+
// Compute loop offset increments...
338+
dx1 = sx[1] - ( s0*sx[0] );
339+
dy1 = sy[1] - ( s0*sy[0] );
340+
dz1 = sz[1] - ( s0*sz[0] );
341+
342+
// Iterate over the ndarray dimensions...
343+
for ( i7 = 0; i7 < s7; i7++ ) {
344+
for ( i6 = 0; i6 < s6; i6++ ) {
345+
for ( i5 = 0; i5 < s5; i5++ ) {
346+
for ( i4 = 0; i4 < s4; i4++ ) {
347+
for ( i3 = 0; i3 < s3; i3++ ) {
348+
for ( i2 = 0; i2 < s2; i2++ ) {
349+
for ( i1 = 0; i1 < s1; i1++ ) {
350+
for ( i0 = 0; i0 < s0; i0++ ) {
351+
zbuf[ iz ] = fcn( xbuf[ ix ], ybuf[ iy ] );
352+
ix += dx0;
353+
iy += dy0;
354+
iz += dz0;
355+
}
356+
ix += dx1;
357+
iy += dy1;
358+
iz += dz1;
359+
}
360+
ix += dx2;
361+
iy += dy2;
362+
iz += dz2;
363+
}
364+
ix += dx3;
365+
iy += dy3;
366+
iz += dz3;
367+
}
368+
ix += dx4;
369+
iy += dy4;
370+
iz += dz4;
371+
}
372+
ix += dx5;
373+
iy += dy5;
374+
iz += dz5;
375+
}
376+
ix += dx6;
377+
iy += dy6;
378+
iz += dz6;
379+
}
380+
ix += dx7;
381+
iy += dy7;
382+
iz += dz7;
383+
}
384+
}
385+
}
386+
}
387+
}
388+
}
389+
}
390+
}
391+
}
392+
}
393+
394+
395+
// EXPORTS //
396+
397+
module.exports = blockedbinary8d;

0 commit comments

Comments
 (0)