diff --git a/lib/node_modules/@stdlib/blas/base/dsdot/README.md b/lib/node_modules/@stdlib/blas/base/dsdot/README.md index 0ff4c3e0131a..cc11f49ee0ef 100644 --- a/lib/node_modules/@stdlib/blas/base/dsdot/README.md +++ b/lib/node_modules/@stdlib/blas/base/dsdot/README.md @@ -227,6 +227,32 @@ The function accepts the following arguments: double c_dsdot( const CBLAS_INT N, const float *X, const CBLAS_INT strideX, const float *Y, const CBLAS_INT strideY ); ``` +#### c_dsdot_ndarray( N, \*X, strideX, offsetX, \*Y, strideY, offsetY ) + +Computes the dot product of two single-precision floating-point vectors with extended accumulation and result and using alternative indexing semantics. + +```c +const float x[] = { 4.0f, 2.0f, -3.0f, 5.0f, -1.0f }; +const float y[] = { 2.0f, 6.0f, -1.0f, -4.0f, 8.0f }; + +double v = c_dsdot_ndarray( 5, x, 1, 0, y, 1, 0 ); +// returns -5.0 +``` + +The function accepts the following arguments: + +- **N**: `[in] CBLAS_INT` number of indexed elements. +- **X**: `[in] float*` first input array. +- **strideX**: `[in] CBLAS_INT` index increment for `X`. +- **offsetX**: `[in] CBLAS_INT` starting index for `X`. +- **Y**: `[in] float*` second input array. +- **strideY**: `[in] CBLAS_INT` index increment for `Y`. +- **offsetY**: `[in] CBLAS_INT` starting index for `Y`. + +```c +double c_dsdot_ndarray( const CBLAS_INT N, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ); +``` + @@ -266,6 +292,12 @@ int main( void ) { // Print the result: printf( "dot product: %lf\n", d ); + + // Compute the dot product: + d = c_dsdot_ndarray( N, x, strideX, 0, y, strideY, N-1 ); + + // Print the result: + printf( "dot product: %lf\n", d ); } ``` diff --git a/lib/node_modules/@stdlib/blas/base/dsdot/benchmark/c/benchmark.length.c b/lib/node_modules/@stdlib/blas/base/dsdot/benchmark/c/benchmark.length.c index 0b4914f6fac2..bbe74f7e00f6 100644 --- a/lib/node_modules/@stdlib/blas/base/dsdot/benchmark/c/benchmark.length.c +++ b/lib/node_modules/@stdlib/blas/base/dsdot/benchmark/c/benchmark.length.c @@ -94,7 +94,7 @@ static float rand_float( void ) { * @param len array length * @return elapsed time in seconds */ -static double benchmark( int iterations, int len ) { +static double benchmark1( int iterations, int len ) { double elapsed; float x[ len ]; float y[ len ]; @@ -122,6 +122,41 @@ static double benchmark( int iterations, int len ) { return elapsed; } +/** +* Runs a benchmark. +* +* @param iterations number of iterations +* @param len array length +* @return elapsed time in seconds +*/ +static double benchmark2( int iterations, int len ) { + double elapsed; + float x[ len ]; + float y[ len ]; + double z; + double t; + int i; + + for ( i = 0; i < len; i++ ) { + x[ i ] = ( rand_float()*20000.0f ) - 10000.0f; + y[ i ] = ( rand_float()*20000.0f ) - 10000.0f; + } + z = 0.0; + t = tic(); + for ( i = 0; i < iterations; i++ ) { + z = c_dsdot_ndarray( len, x, 1, 0, y, 1, 0 ); + if ( z != z ) { + printf( "should not return NaN\n" ); + break; + } + } + elapsed = tic() - t; + if ( z != z ) { + printf( "should not return NaN\n" ); + } + return elapsed; +} + /** * Main execution sequence. */ @@ -144,7 +179,14 @@ int main( void ) { for ( j = 0; j < REPEATS; j++ ) { count += 1; printf( "# c::%s:len=%d\n", NAME, len ); - elapsed = benchmark( iter, len ); + elapsed = benchmark1( iter, len ); + print_results( iter, elapsed ); + printf( "ok %d benchmark finished\n", count ); + } + for ( j = 0; j < REPEATS; j++ ) { + count += 1; + printf( "# c::%s:ndarray:len=%d\n", NAME, len ); + elapsed = benchmark2( iter, len ); print_results( iter, elapsed ); printf( "ok %d benchmark finished\n", count ); } diff --git a/lib/node_modules/@stdlib/blas/base/dsdot/examples/c/example.c b/lib/node_modules/@stdlib/blas/base/dsdot/examples/c/example.c index 7c3054f271f2..7164804d9d13 100644 --- a/lib/node_modules/@stdlib/blas/base/dsdot/examples/c/example.c +++ b/lib/node_modules/@stdlib/blas/base/dsdot/examples/c/example.c @@ -36,4 +36,10 @@ int main( void ) { // Print the result: printf( "dot product: %lf\n", d ); + + // Compute the dot product: + d = c_dsdot_ndarray( N, x, strideX, 0, y, strideY, N-1 ); + + // Print the result: + printf( "dot product: %lf\n", d ); } diff --git a/lib/node_modules/@stdlib/blas/base/dsdot/include/stdlib/blas/base/dsdot.h b/lib/node_modules/@stdlib/blas/base/dsdot/include/stdlib/blas/base/dsdot.h index deeead972be0..424068fb3b20 100644 --- a/lib/node_modules/@stdlib/blas/base/dsdot/include/stdlib/blas/base/dsdot.h +++ b/lib/node_modules/@stdlib/blas/base/dsdot/include/stdlib/blas/base/dsdot.h @@ -36,6 +36,11 @@ extern "C" { */ double API_SUFFIX(c_dsdot)( const CBLAS_INT N, const float *X, const CBLAS_INT strideX, const float *Y, const CBLAS_INT strideY ); +/** +* Computes the dot product of two single-precision floating-point vectors with extended accumulation and result and using alternative indexing semantics. +*/ +double API_SUFFIX(c_dsdot_ndarray)( const CBLAS_INT N, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ); + #ifdef __cplusplus } #endif diff --git a/lib/node_modules/@stdlib/blas/base/dsdot/lib/ndarray.native.js b/lib/node_modules/@stdlib/blas/base/dsdot/lib/ndarray.native.js index 9174e07bb5b3..0f131ea1ca20 100644 --- a/lib/node_modules/@stdlib/blas/base/dsdot/lib/ndarray.native.js +++ b/lib/node_modules/@stdlib/blas/base/dsdot/lib/ndarray.native.js @@ -20,9 +20,7 @@ // MODULES // -var minViewBufferIndex = require( '@stdlib/strided/base/min-view-buffer-index' ); -var offsetView = require( '@stdlib/strided/base/offset-view' ); -var addon = require( './dsdot.native.js' ); +var addon = require( './../src/addon.node' ); // MAIN // @@ -49,16 +47,7 @@ var addon = require( './dsdot.native.js' ); * // returns -5.0 */ function dsdot( N, x, strideX, offsetX, y, strideY, offsetY ) { - var viewX; - var viewY; - - offsetX = minViewBufferIndex( N, strideX, offsetX ); - offsetY = minViewBufferIndex( N, strideY, offsetY ); - - viewX = offsetView( x, offsetX ); - viewY = offsetView( y, offsetY ); - - return addon( N, viewX, strideX, viewY, strideY ); + return addon.ndarray( N, x, strideX, offsetX, y, strideY, offsetY ); } diff --git a/lib/node_modules/@stdlib/blas/base/dsdot/manifest.json b/lib/node_modules/@stdlib/blas/base/dsdot/manifest.json index 934a31c1b36d..1ab42dde326b 100644 --- a/lib/node_modules/@stdlib/blas/base/dsdot/manifest.json +++ b/lib/node_modules/@stdlib/blas/base/dsdot/manifest.json @@ -45,6 +45,7 @@ "libpath": [], "dependencies": [ "@stdlib/blas/base/shared", + "@stdlib/strided/base/min-view-buffer-index", "@stdlib/napi/export", "@stdlib/napi/argv", "@stdlib/napi/argv-int64", @@ -58,7 +59,8 @@ "blas": "", "wasm": false, "src": [ - "./src/dsdot.c" + "./src/dsdot.c", + "./src/dsdot_ndarray.c" ], "include": [ "./include" @@ -66,7 +68,8 @@ "libraries": [], "libpath": [], "dependencies": [ - "@stdlib/blas/base/shared" + "@stdlib/blas/base/shared", + "@stdlib/strided/base/stride2offset" ] }, { @@ -75,7 +78,8 @@ "blas": "", "wasm": false, "src": [ - "./src/dsdot.c" + "./src/dsdot.c", + "./src/dsdot_ndarray.c" ], "include": [ "./include" @@ -83,7 +87,8 @@ "libraries": [], "libpath": [], "dependencies": [ - "@stdlib/blas/base/shared" + "@stdlib/blas/base/shared", + "@stdlib/strided/base/stride2offset" ] }, @@ -105,6 +110,7 @@ "libpath": [], "dependencies": [ "@stdlib/blas/base/shared", + "@stdlib/strided/base/min-view-buffer-index", "@stdlib/napi/export", "@stdlib/napi/argv", "@stdlib/napi/argv-int64", @@ -129,7 +135,8 @@ ], "libpath": [], "dependencies": [ - "@stdlib/blas/base/shared" + "@stdlib/blas/base/shared", + "@stdlib/strided/base/min-view-buffer-index" ] }, { @@ -149,7 +156,8 @@ ], "libpath": [], "dependencies": [ - "@stdlib/blas/base/shared" + "@stdlib/blas/base/shared", + "@stdlib/strided/base/min-view-buffer-index" ] }, @@ -170,6 +178,7 @@ "libpath": [], "dependencies": [ "@stdlib/blas/base/shared", + "@stdlib/strided/base/min-view-buffer-index", "@stdlib/napi/export", "@stdlib/napi/argv", "@stdlib/napi/argv-int64", @@ -183,7 +192,8 @@ "blas": "", "wasm": false, "src": [ - "./src/dsdot.c" + "./src/dsdot.c", + "./src/dsdot_ndarray.c" ], "include": [ "./include" @@ -191,7 +201,8 @@ "libraries": [], "libpath": [], "dependencies": [ - "@stdlib/blas/base/shared" + "@stdlib/blas/base/shared", + "@stdlib/strided/base/stride2offset" ] }, { @@ -200,7 +211,8 @@ "blas": "", "wasm": false, "src": [ - "./src/dsdot.c" + "./src/dsdot.c", + "./src/dsdot_ndarray.c" ], "include": [ "./include" @@ -208,7 +220,8 @@ "libraries": [], "libpath": [], "dependencies": [ - "@stdlib/blas/base/shared" + "@stdlib/blas/base/shared", + "@stdlib/strided/base/stride2offset" ] }, @@ -229,6 +242,7 @@ "libpath": [], "dependencies": [ "@stdlib/blas/base/shared", + "@stdlib/strided/base/min-view-buffer-index", "@stdlib/napi/export", "@stdlib/napi/argv", "@stdlib/napi/argv-int64", @@ -252,7 +266,8 @@ ], "libpath": [], "dependencies": [ - "@stdlib/blas/base/shared" + "@stdlib/blas/base/shared", + "@stdlib/strided/base/min-view-buffer-index" ] }, { @@ -271,7 +286,8 @@ ], "libpath": [], "dependencies": [ - "@stdlib/blas/base/shared" + "@stdlib/blas/base/shared", + "@stdlib/strided/base/min-view-buffer-index" ] }, @@ -293,6 +309,7 @@ "libpath": [], "dependencies": [ "@stdlib/blas/base/shared", + "@stdlib/strided/base/min-view-buffer-index", "@stdlib/napi/export", "@stdlib/napi/argv", "@stdlib/napi/argv-int64", @@ -317,7 +334,8 @@ ], "libpath": [], "dependencies": [ - "@stdlib/blas/base/shared" + "@stdlib/blas/base/shared", + "@stdlib/strided/base/min-view-buffer-index" ] }, { @@ -337,7 +355,8 @@ ], "libpath": [], "dependencies": [ - "@stdlib/blas/base/shared" + "@stdlib/blas/base/shared", + "@stdlib/strided/base/min-view-buffer-index" ] }, @@ -347,7 +366,8 @@ "blas": "", "wasm": false, "src": [ - "./src/dsdot.c" + "./src/dsdot.c", + "./src/dsdot_ndarray.c" ], "include": [ "./include" @@ -356,6 +376,7 @@ "libpath": [], "dependencies": [ "@stdlib/blas/base/shared", + "@stdlib/strided/base/stride2offset", "@stdlib/napi/export", "@stdlib/napi/argv", "@stdlib/napi/argv-int64", @@ -369,7 +390,8 @@ "blas": "", "wasm": false, "src": [ - "./src/dsdot.c" + "./src/dsdot.c", + "./src/dsdot_ndarray.c" ], "include": [ "./include" @@ -377,7 +399,8 @@ "libraries": [], "libpath": [], "dependencies": [ - "@stdlib/blas/base/shared" + "@stdlib/blas/base/shared", + "@stdlib/strided/base/stride2offset" ] }, { @@ -386,7 +409,8 @@ "blas": "", "wasm": false, "src": [ - "./src/dsdot.c" + "./src/dsdot.c", + "./src/dsdot_ndarray.c" ], "include": [ "./include" @@ -394,7 +418,8 @@ "libraries": [], "libpath": [], "dependencies": [ - "@stdlib/blas/base/shared" + "@stdlib/blas/base/shared", + "@stdlib/strided/base/stride2offset" ] }, @@ -404,7 +429,8 @@ "blas": "", "wasm": true, "src": [ - "./src/dsdot.c" + "./src/dsdot.c", + "./src/dsdot_ndarray.c" ], "include": [ "./include" @@ -412,7 +438,8 @@ "libraries": [], "libpath": [], "dependencies": [ - "@stdlib/blas/base/shared" + "@stdlib/blas/base/shared", + "@stdlib/strided/base/stride2offset" ] } ] diff --git a/lib/node_modules/@stdlib/blas/base/dsdot/src/addon.c b/lib/node_modules/@stdlib/blas/base/dsdot/src/addon.c index 5563de86e7e6..fbaa0fe11346 100644 --- a/lib/node_modules/@stdlib/blas/base/dsdot/src/addon.c +++ b/lib/node_modules/@stdlib/blas/base/dsdot/src/addon.c @@ -43,4 +43,24 @@ static napi_value addon( napi_env env, napi_callback_info info ) { return v; } -STDLIB_NAPI_MODULE_EXPORT_FCN( addon ) +/** +* Receives JavaScript callback invocation data. +* +* @param env environment under which the function is invoked +* @param info callback data +* @return Node-API value +*/ +static napi_value addon_method( napi_env env, napi_callback_info info ) { + STDLIB_NAPI_ARGV( env, info, argv, argc, 7 ); + STDLIB_NAPI_ARGV_INT64( env, N, argv, 0 ); + STDLIB_NAPI_ARGV_INT64( env, strideX, argv, 2 ); + STDLIB_NAPI_ARGV_INT64( env, offsetX, argv, 3 ); + STDLIB_NAPI_ARGV_INT64( env, strideY, argv, 5 ); + STDLIB_NAPI_ARGV_INT64( env, offsetY, argv, 6 ); + STDLIB_NAPI_ARGV_STRIDED_FLOAT32ARRAY( env, X, N, strideX, argv, 1 ); + STDLIB_NAPI_ARGV_STRIDED_FLOAT32ARRAY( env, Y, N, strideY, argv, 4 ); + STDLIB_NAPI_CREATE_DOUBLE( env, API_SUFFIX(c_dsdot_ndarray)( N, X, strideX, offsetX, Y, strideY, offsetY ), v ); + return v; +} + +STDLIB_NAPI_MODULE_EXPORT_FCN_WITH_METHOD( addon, "ndarray", addon_method ) diff --git a/lib/node_modules/@stdlib/blas/base/dsdot/src/dsdot.c b/lib/node_modules/@stdlib/blas/base/dsdot/src/dsdot.c index 686d97f00803..394a80b42558 100644 --- a/lib/node_modules/@stdlib/blas/base/dsdot/src/dsdot.c +++ b/lib/node_modules/@stdlib/blas/base/dsdot/src/dsdot.c @@ -18,6 +18,7 @@ #include "stdlib/blas/base/dsdot.h" #include "stdlib/blas/base/shared.h" +#include "stdlib/strided/base/stride2offset.h" /** * Computes the dot product of two single-precision floating-point vectors with extended accumulation and result. @@ -30,49 +31,8 @@ * @return the dot product */ double API_SUFFIX(c_dsdot)( const CBLAS_INT N, const float *X, const CBLAS_INT strideX, const float *Y, const CBLAS_INT strideY ) { - double dot; - CBLAS_INT ix; - CBLAS_INT iy; - CBLAS_INT m; - CBLAS_INT i; - - dot = 0.0; - if ( N <= 0 ) { - return dot; - } - // If both strides are equal to `1`, use unrolled loops... - if ( strideX == 1 && strideY == 1 ) { - m = N % 5; - - // If we have a remainder, do a clean-up loop... - if ( m > 0 ) { - for ( i = 0; i < m; i++ ) { - dot += (double)X[ i ] * (double)Y[ i ]; - } - } - if ( N < 5 ) { - return dot; - } - for ( i = m; i < N; i += 5 ) { - dot += ( (double)X[i]*(double)Y[i] ) + ( (double)X[i+1]*(double)Y[i+1]) + ( (double)X[i+2]*(double)Y[i+2] ) + ( (double)X[i+3]*(double)Y[i+3] ) + ( (double)X[i+4]*(double)Y[i+4] ); - } - return dot; - } - if ( strideX < 0 ) { - ix = (1-N) * strideX; - } else { - ix = 0; - } - if ( strideY < 0 ) { - iy = (1-N) * strideY; - } else { - iy = 0; - } - for ( i = 0; i < N; i++ ) { - dot += (double)X[ ix ] * (double)Y[ iy ]; - ix += strideX; - iy += strideY; - } - return dot; + CBLAS_INT ox = stdlib_strided_stride2offset( N, strideX ); + CBLAS_INT oy = stdlib_strided_stride2offset( N, strideY ); + return API_SUFFIX(c_dsdot_ndarray)( N, X, strideX, ox, Y, strideY, oy ); } diff --git a/lib/node_modules/@stdlib/blas/base/dsdot/src/dsdot_cblas.c b/lib/node_modules/@stdlib/blas/base/dsdot/src/dsdot_cblas.c index d31e98bfa6e8..5b42e30ee0fe 100644 --- a/lib/node_modules/@stdlib/blas/base/dsdot/src/dsdot_cblas.c +++ b/lib/node_modules/@stdlib/blas/base/dsdot/src/dsdot_cblas.c @@ -19,6 +19,7 @@ #include "stdlib/blas/base/dsdot.h" #include "stdlib/blas/base/dsdot_cblas.h" #include "stdlib/blas/base/shared.h" +#include "stdlib/strided/base/min_view_buffer_index.h" /** * Computes the dot product of two single-precision floating-point vectors with extended accumulation and result. @@ -28,8 +29,26 @@ * @param strideX X stride length * @param Y second array * @param strideY Y stride length -* @return the dot product +* @return dot product */ double API_SUFFIX(c_dsdot)( const CBLAS_INT N, const float *X, const CBLAS_INT strideX, const float *Y, const CBLAS_INT strideY ) { return API_SUFFIX(cblas_dsdot)( N, X, strideX, Y, strideY ); } + +/** +* Computes the dot product of two single-precision floating-point vectors with extended accumulation and result and using alternative indexing semantics. +* +* @param N number of indexed elements +* @param X first array +* @param strideX X stride length +* @param offsetX starting index for X +* @param Y second array +* @param strideY Y stride length +* @param offsetY starting index for Y +* @return dot product +*/ +double API_SUFFIX(c_dsdot_ndarray)( const CBLAS_INT N, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ) { + X += stdlib_strided_min_view_buffer_index( N, strideX, offsetX ); // adjust array pointer + Y += stdlib_strided_min_view_buffer_index( N, strideY, offsetY ); // adjust array pointer + return API_SUFFIX(cblas_dsdot)( N, X, strideX, Y, strideY ); +} diff --git a/lib/node_modules/@stdlib/blas/base/dsdot/src/dsdot_f.c b/lib/node_modules/@stdlib/blas/base/dsdot/src/dsdot_f.c index 4b3955e82fc0..d94a3352041c 100644 --- a/lib/node_modules/@stdlib/blas/base/dsdot/src/dsdot_f.c +++ b/lib/node_modules/@stdlib/blas/base/dsdot/src/dsdot_f.c @@ -19,6 +19,7 @@ #include "stdlib/blas/base/dsdot.h" #include "stdlib/blas/base/dsdot_fortran.h" #include "stdlib/blas/base/shared.h" +#include "stdlib/strided/base/min_view_buffer_index.h" /** * Computes the dot product of two single-precision floating-point vectors with extended accumulation and result. @@ -28,10 +29,31 @@ * @param strideX X stride length * @param Y second array * @param strideY Y stride length -* @return the dot product +* @return dot product */ double API_SUFFIX(c_dsdot)( const CBLAS_INT N, const float *X, const CBLAS_INT strideX, const float *Y, const CBLAS_INT strideY ) { double dot; dsdotsub( &N, X, &strideX, Y, &strideY, &dot ); return dot; } + +/** +* Computes the dot product of two single-precision floating-point vectors with extended accumulation and result and using alternative indexing semantics. +* +* @param N number of indexed elements +* @param X first array +* @param strideX X stride length +* @param offsetX starting index for X +* @param Y second array +* @param strideY Y stride length +* @param offsetY starting index for Y +* @return dot product +*/ +double API_SUFFIX(c_dsdot_ndarray)( const CBLAS_INT N, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ) { + double dot; + + X += stdlib_strided_min_view_buffer_index( N, strideX, offsetX ); // adjust array pointer + Y += stdlib_strided_min_view_buffer_index( N, strideY, offsetY ); // adjust array pointer + dsdotsub( &N, X, &strideX, Y, &strideY, &dot ); + return dot; +} diff --git a/lib/node_modules/@stdlib/blas/base/dsdot/src/dsdot_ndarray.c b/lib/node_modules/@stdlib/blas/base/dsdot/src/dsdot_ndarray.c new file mode 100644 index 000000000000..672f3a59035e --- /dev/null +++ b/lib/node_modules/@stdlib/blas/base/dsdot/src/dsdot_ndarray.c @@ -0,0 +1,79 @@ +/** +* @license Apache-2.0 +* +* Copyright (c) 2024 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#include "stdlib/blas/base/dsdot.h" +#include "stdlib/blas/base/shared.h" + +static const CBLAS_INT M = 5; + +/** +* Computes the dot product of two single-precision floating-point vectors with extended accumulation and result and using alternative indexing semantics. +* +* @param N number of indexed elements +* @param X first array +* @param strideX X stride length +* @param offsetX starting index for X +* @param Y second array +* @param strideY Y stride length +* @param offsetY starting index for Y +* @return dot product +*/ +double API_SUFFIX(c_dsdot_ndarray)( const CBLAS_INT N, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ) { + double dot; + CBLAS_INT ix; + CBLAS_INT iy; + CBLAS_INT m; + CBLAS_INT i; + + dot = 0.0; + if ( N <= 0 ) { + return dot; + } + ix = offsetX; + iy = offsetY; + + // If both strides are equal to `1`, use unrolled loops... + if ( strideX == 1 && strideY == 1 ) { + m = N % M; + + // If we have a remainder, do a clean-up loop... + if ( m > 0 ) { + for ( i = 0; i < m; i++ ) { + dot += (double)X[ ix ] * (double)Y[ iy ]; + ix += strideX; + iy += strideY; + } + } + if ( N < M ) { + return dot; + } + for ( i = m; i < N; i += M ) { + dot += ( (double)X[ ix ]*(double)Y[ iy ] ) + ( (double)X[ ix+1 ]*(double)Y[ iy+1 ]) + ( (double)X[ ix+2 ]*(double)Y[ iy+2 ] ) + ( (double)X[ ix+3 ]*(double)Y[ iy+3 ] ) + ( (double)X[ ix+4 ]*(double)Y[ iy+4 ] ); + ix += M; + iy += M; + } + return dot; + } + for ( i = 0; i < N; i++ ) { + dot += (double)X[ ix ] * (double)Y[ iy ]; + ix += strideX; + iy += strideY; + } + return dot; +} +