Skip to content

Commit 10f7656

Browse files
committed
feat: add 2d blocked accessors kernel
--- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: passed - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: na - task: lint_typescript_tests status: na - task: lint_license_headers status: passed ---
1 parent ab4f417 commit 10f7656

File tree

1 file changed

+237
-0
lines changed

1 file changed

+237
-0
lines changed
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2025 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
// MODULES //
22+
23+
var loopOrder = require( '@stdlib/ndarray/base/binary-loop-interchange-order' );
24+
var blockSize = require( '@stdlib/ndarray/base/binary-tiling-block-size' );
25+
26+
27+
// MAIN //
28+
29+
/**
30+
* Applies a binary callback to elements in two-dimensional input ndarrays and assigns results to elements in an equivalently shaped output ndarray via loop blocking.
31+
*
32+
* @private
33+
* @param {Object} x - object containing input ndarray meta data
34+
* @param {string} x.dtype - data type
35+
* @param {Collection} x.data - data buffer
36+
* @param {NonNegativeIntegerArray} x.shape - dimensions
37+
* @param {IntegerArray} x.strides - stride lengths
38+
* @param {NonNegativeInteger} x.offset - index offset
39+
* @param {string} x.order - specifies whether `x` is row-major (C-style) or column-major (Fortran-style)
40+
* @param {Array<Function>} x.accessors - data buffer accessors
41+
* @param {Object} y - object containing input ndarray meta data
42+
* @param {string} y.dtype - data type
43+
* @param {Collection} y.data - data buffer
44+
* @param {NonNegativeIntegerArray} y.shape - dimensions
45+
* @param {IntegerArray} y.strides - stride lengths
46+
* @param {NonNegativeInteger} y.offset - index offset
47+
* @param {string} y.order - specifies whether `y` is row-major (C-style) or column-major (Fortran-style)
48+
* @param {Array<Function>} y.accessors - data buffer accessors
49+
* @param {Object} z - object containing output ndarray meta data
50+
* @param {string} z.dtype - data type
51+
* @param {Collection} z.data - data buffer
52+
* @param {NonNegativeIntegerArray} z.shape - dimensions
53+
* @param {IntegerArray} z.strides - stride lengths
54+
* @param {NonNegativeInteger} z.offset - index offset
55+
* @param {string} z.order - specifies whether `z` is row-major (C-style) or column-major (Fortran-style)
56+
* @param {Array<Function>} z.accessors - data buffer accessors
57+
* @param {Callback} fcn - binary callback
58+
*
59+
* @example
60+
* var toAccessorArray = require( '@stdlib/array/base/to-accessor-array' );
61+
* var accessors = require( '@stdlib/array/base/accessors' );
62+
* var copy = require( '@stdlib/array/base/copy' );
63+
*
64+
* function fcn( x, y ) {
65+
* return x + y;
66+
* }
67+
*
68+
* // Create data buffers:
69+
* var xbuf = toAccessorArray( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 ] );
70+
* var ybuf = toAccessorArray( [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] );
71+
* var zbuf = toAccessorArray( [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] );
72+
*
73+
* // Define the shape of the input and output arrays:
74+
* var shape = [ 6, 2 ];
75+
*
76+
* // Define the array strides:
77+
* var sx = [ 2, 1 ];
78+
* var sy = [ 2, 1 ];
79+
* var sz = [ 2, 1 ];
80+
*
81+
* // Define the index offsets:
82+
* var ox = 0;
83+
* var oy = 0;
84+
* var oz = 0;
85+
*
86+
* // Create the input and output ndarray-like objects:
87+
* var x = {
88+
* 'dtype': 'generic',
89+
* 'data': xbuf,
90+
* 'shape': shape,
91+
* 'strides': sx,
92+
* 'offset': ox,
93+
* 'order': 'row-major',
94+
* 'accessors': accessors( xbuf ).accessors
95+
* };
96+
* var y = {
97+
* 'dtype': 'generic',
98+
* 'data': ybuf,
99+
* 'shape': shape,
100+
* 'strides': sy,
101+
* 'offset': oy,
102+
* 'order': 'row-major',
103+
* 'accessors': accessors( ybuf ).accessors
104+
* };
105+
* var z = {
106+
* 'dtype': 'generic',
107+
* 'data': zbuf,
108+
* 'shape': shape,
109+
* 'strides': sz,
110+
* 'offset': oz,
111+
* 'order': 'row-major',
112+
* 'accessors': accessors( zbuf ).accessors
113+
* };
114+
*
115+
* // Apply the binary function:
116+
* blockedbinary2d( x, y, z, fcn );
117+
*
118+
* console.log( copy( z.data ) );
119+
* // => [ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0 ]
120+
*/
121+
function blockedbinary2d( x, y, z, fcn ) {
122+
var bsize;
123+
var xbuf;
124+
var ybuf;
125+
var zbuf;
126+
var xget;
127+
var yget;
128+
var zset;
129+
var dx0;
130+
var dx1;
131+
var dy0;
132+
var dy1;
133+
var dz0;
134+
var dz1;
135+
var ox1;
136+
var oy1;
137+
var oz1;
138+
var sh;
139+
var s0;
140+
var s1;
141+
var sx;
142+
var sy;
143+
var sz;
144+
var ox;
145+
var oy;
146+
var oz;
147+
var ix;
148+
var iy;
149+
var iz;
150+
var i0;
151+
var i1;
152+
var j0;
153+
var j1;
154+
var o;
155+
156+
// Note on variable naming convention: s#, dx#, dy#, dz#, i#, j# where # corresponds to the loop number, with `0` being the innermost loop...
157+
158+
// Resolve the loop interchange order:
159+
o = loopOrder( x.shape, x.strides, y.strides, z.strides );
160+
sh = o.sh;
161+
sx = o.sx;
162+
sy = o.sy;
163+
sz = o.sz;
164+
165+
// Determine the block size:
166+
bsize = blockSize( x.dtype, y.dtype, z.dtype );
167+
168+
// Cache the indices of the first indexed elements in the respective ndarrays...
169+
ox = x.offset;
170+
oy = y.offset;
171+
oz = z.offset;
172+
173+
// Cache references to the input and output ndarray buffers...
174+
xbuf = x.data;
175+
ybuf = y.data;
176+
zbuf = z.data;
177+
178+
// Cache offset increments for the innermost loop...
179+
dx0 = sx[0];
180+
dy0 = sy[0];
181+
dz0 = sz[0];
182+
183+
// Cache accessors:
184+
xget = x.accessors[0];
185+
yget = y.accessors[0];
186+
zset = z.accessors[1];
187+
188+
// Iterate over blocks...
189+
for ( j1 = sh[1]; j1 > 0; ) {
190+
if ( j1 < bsize ) {
191+
s1 = j1;
192+
j1 = 0;
193+
} else {
194+
s1 = bsize;
195+
j1 -= bsize;
196+
}
197+
ox1 = ox + ( j1*sx[1] );
198+
oy1 = oy + ( j1*sy[1] );
199+
oz1 = oz + ( j1*sz[1] );
200+
for ( j0 = sh[0]; j0 > 0; ) {
201+
if ( j0 < bsize ) {
202+
s0 = j0;
203+
j0 = 0;
204+
} else {
205+
s0 = bsize;
206+
j0 -= bsize;
207+
}
208+
// Compute index offsets for the first input and output ndarray elements in the current block...
209+
ix = ox1 + ( j0*sx[0] );
210+
iy = oy1 + ( j0*sy[0] );
211+
iz = oz1 + ( j0*sz[0] );
212+
213+
// Compute loop offset increments...
214+
dx1 = sx[1] - ( s0*sx[0] );
215+
dy1 = sy[1] - ( s0*sy[0] );
216+
dz1 = sz[1] - ( s0*sz[0] );
217+
218+
// Iterate over the ndarray dimensions...
219+
for ( i1 = 0; i1 < s1; i1++ ) {
220+
for ( i0 = 0; i0 < s0; i0++ ) {
221+
zset( zbuf, iz, fcn( xget( xbuf, ix ), yget( ybuf, iy ) ) );
222+
ix += dx0;
223+
iy += dy0;
224+
iz += dz0;
225+
}
226+
ix += dx1;
227+
iy += dy1;
228+
iz += dz1;
229+
}
230+
}
231+
}
232+
}
233+
234+
235+
// EXPORTS //
236+
237+
module.exports = blockedbinary2d;

0 commit comments

Comments
 (0)