refactor: simplify implementation

kgryte · kgryte · commit b6754fa04cce · 2025-05-21T18:21:57.000-07:00
---
type: pre_commit_static_analysis_report
description: Results of running static analysis checks when committing changes.
report:
  - task: lint_filenames
    status: passed
  - task: lint_editorconfig
    status: passed
  - task: lint_markdown
    status: na
  - task: lint_package_json
    status: na
  - task: lint_repl_help
    status: na
  - task: lint_javascript_src
    status: passed
  - task: lint_javascript_cli
    status: na
  - task: lint_javascript_examples
    status: na
  - task: lint_javascript_tests
    status: na
  - task: lint_javascript_benchmarks
    status: na
  - task: lint_python
    status: na
  - task: lint_r
    status: na
  - task: lint_c_src
    status: passed
  - task: lint_c_examples
    status: na
  - task: lint_c_benchmarks
    status: na
  - task: lint_c_tests_fixtures
    status: na
  - task: lint_shell
    status: na
  - task: lint_typescript_declarations
    status: na
  - task: lint_typescript_tests
    status: na
  - task: lint_license_headers
    status: passed
---
diff --git a/lib/node_modules/@stdlib/blas/ext/base/sdsapxsumpw/lib/ndarray.js b/lib/node_modules/@stdlib/blas/ext/base/sdsapxsumpw/lib/ndarray.js
@@ -20,14 +20,8 @@
 
 // MODULES //
 
-var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' );
-var floor = require( '@stdlib/math/base/special/floor' );
-
-
-// VARIABLES //
-
-// Blocksize for pairwise summation (NOTE: decreasing the blocksize decreases rounding error as more pairs are summed, but also decreases performance. Because the inner loop is unrolled eight times, the blocksize is effectively `16`.):
-var BLOCKSIZE = 128;
+var f32 = require( '@stdlib/number/float64/base/to-float32' );
+var sdssumpw = require( '@stdlib/blas/ext/base/sdssumpw' ).ndarray;
 
 
 // MAIN //
@@ -59,74 +53,7 @@ var BLOCKSIZE = 128;
 * // returns 25.0
 */
 function sdsapxsumpw( N, alpha, x, strideX, offsetX ) {
-	var ix;
-	var s0;
-	var s1;
-	var s2;
-	var s3;
-	var s4;
-	var s5;
-	var s6;
-	var s7;
-	var M;
-	var s;
-	var n;
-	var i;
-
-	if ( N <= 0 ) {
-		return 0.0;
-	}
-	ix = offsetX;
-	if ( strideX === 0 ) {
-		return float64ToFloat32( N * float64ToFloat32( alpha + x[ ix ] ) );
-	}
-	if ( N < 8 ) {
-		// Use simple summation...
-		s = 0.0;
-		for ( i = 0; i < N; i++ ) {
-			s += alpha + x[ ix ];
-			ix += strideX;
-		}
-		return float64ToFloat32( s );
-	}
-	if ( N <= BLOCKSIZE ) {
-		// Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)...
-		s0 = alpha + x[ ix ];
-		s1 = alpha + x[ ix+strideX ];
-		s2 = alpha + x[ ix+(2*strideX) ];
-		s3 = alpha + x[ ix+(3*strideX) ];
-		s4 = alpha + x[ ix+(4*strideX) ];
-		s5 = alpha + x[ ix+(5*strideX) ];
-		s6 = alpha + x[ ix+(6*strideX) ];
-		s7 = alpha + x[ ix+(7*strideX) ];
-		ix += 8 * strideX;
-
-		M = N % 8;
-		for ( i = 8; i < N-M; i += 8 ) {
-			s0 += alpha + x[ ix ];
-			s1 += alpha + x[ ix+strideX ];
-			s2 += alpha + x[ ix+(2*strideX) ];
-			s3 += alpha + x[ ix+(3*strideX) ];
-			s4 += alpha + x[ ix+(4*strideX) ];
-			s5 += alpha + x[ ix+(5*strideX) ];
-			s6 += alpha + x[ ix+(6*strideX) ];
-			s7 += alpha + x[ ix+(7*strideX) ];
-			ix += 8 * strideX;
-		}
-		// Pairwise sum the accumulators:
-		s = ( (s0+s1) + (s2+s3) ) + ( (s4+s5) + (s6+s7) );
-
-		// Clean-up loop...
-		for ( i; i < N; i++ ) {
-			s += alpha + x[ ix ];
-			ix += strideX;
-		}
-		return float64ToFloat32( s );
-	}
-	// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
-	n = floor( N/2 );
-	n -= n % 8;
-	return float64ToFloat32( sdsapxsumpw( n, alpha, x, strideX, ix ) + sdsapxsumpw( N-n, alpha, x, strideX, ix+(n*strideX) ) ); // eslint-disable-line max-len
+	return f32( ( N * alpha ) + sdssumpw( N, x, strideX, offsetX ) );
 }
 
 
diff --git a/lib/node_modules/@stdlib/blas/ext/base/sdsapxsumpw/manifest.json b/lib/node_modules/@stdlib/blas/ext/base/sdsapxsumpw/manifest.json
@@ -43,7 +43,8 @@
         "@stdlib/napi/argv-strided-float32array",
         "@stdlib/napi/create-double",
         "@stdlib/strided/base/stride2offset",
-        "@stdlib/blas/base/shared"
+        "@stdlib/blas/base/shared",
+        "@stdlib/blas/ext/base/sdssumpw"
       ]
     },
     {
@@ -58,7 +59,8 @@
       "libpath": [],
       "dependencies": [
         "@stdlib/strided/base/stride2offset",
-        "@stdlib/blas/base/shared"
+        "@stdlib/blas/base/shared",
+        "@stdlib/blas/ext/base/sdssumpw"
       ]
     },
     {
@@ -73,7 +75,8 @@
       "libpath": [],
       "dependencies": [
         "@stdlib/strided/base/stride2offset",
-        "@stdlib/blas/base/shared"
+        "@stdlib/blas/base/shared",
+        "@stdlib/blas/ext/base/sdssumpw"
       ]
     }
   ]
diff --git a/lib/node_modules/@stdlib/blas/ext/base/sdsapxsumpw/src/main.c b/lib/node_modules/@stdlib/blas/ext/base/sdsapxsumpw/src/main.c
@@ -19,6 +19,7 @@
 #include "stdlib/blas/ext/base/sdsapxsumpw.h"
 #include "stdlib/strided/base/stride2offset.h"
 #include "stdlib/blas/base/shared.h"
+#include "stdlib/blas/ext/base/sdssumpw.h"
 
 /**
 * Adds a scalar constant to each single-precision floating-point strided array element and computes the sum using pairwise summation with extended accumulation.
@@ -53,73 +54,5 @@ float API_SUFFIX(stdlib_strided_sdsapxsumpw)( const CBLAS_INT N, const float alp
 * @return         output value
 */
 float API_SUFFIX(stdlib_strided_sdsapxsumpw_ndarray)( const CBLAS_INT N, const float alpha, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX ) {
-	CBLAS_INT ix;
-	CBLAS_INT M;
-	CBLAS_INT n;
-	CBLAS_INT i;
-	double sum;
-	double s0;
-	double s1;
-	double s2;
-	double s3;
-	double s4;
-	double s5;
-	double s6;
-	double s7;
-
-	if ( N <= 0 ) {
-		return 0.0;
-	}
-	ix = offsetX;
-	if ( strideX == 0 ) {
-		return  N * ( alpha + X[ ix ] );
-	}
-	if ( N < 8 ) {
-		// Use simple summation...
-		sum = 0.0;
-		for ( i = 0; i < N; i++ ) {
-			sum += alpha + X[ ix ];
-			ix += strideX;
-		}
-		return sum;
-	}
-	// Blocksize for pairwise summation: 128 (NOTE: decreasing the blocksize decreases rounding error as more pairs are summed, but also decreases performance. Because the inner loop is unrolled eight times, the blocksize is effectively `16`.)
-	if ( N <= 128 ) {
-		// Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)...
-		s0 = alpha + X[ ix ];
-		s1 = alpha + X[ ix+strideX ];
-		s2 = alpha + X[ ix+(2*strideX) ];
-		s3 = alpha + X[ ix+(3*strideX) ];
-		s4 = alpha + X[ ix+(4*strideX) ];
-		s5 = alpha + X[ ix+(5*strideX) ];
-		s6 = alpha + X[ ix+(6*strideX) ];
-		s7 = alpha + X[ ix+(7*strideX) ];
-		ix += 8 * strideX;
-
-		M = N % 8;
-		for ( i = 8; i < N-M; i += 8 ) {
-			s0 += alpha + X[ ix ];
-			s1 += alpha + X[ ix+strideX ];
-			s2 += alpha + X[ ix+(2*strideX) ];
-			s3 += alpha + X[ ix+(3*strideX) ];
-			s4 += alpha + X[ ix+(4*strideX) ];
-			s5 += alpha + X[ ix+(5*strideX) ];
-			s6 += alpha + X[ ix+(6*strideX) ];
-			s7 += alpha + X[ ix+(7*strideX) ];
-			ix += 8 * strideX;
-		}
-		// Pairwise sum the accumulators:
-		sum = ( (s0+s1) + (s2+s3) ) + ( (s4+s5) + (s6+s7) );
-
-		// Clean-up loop...
-		for (; i < N; i++ ) {
-			sum += alpha + X[ ix ];
-			ix += strideX;
-		}
-		return sum;
-	}
-	// Recurse by dividing by two, but avoiding non-multiples of unroll factor...
-	n = N / 2;
-	n -= n % 8;
-	return API_SUFFIX(stdlib_strided_sdsapxsumpw_ndarray)( n, alpha, X, strideX, ix ) + API_SUFFIX(stdlib_strided_sdsapxsumpw_ndarray)( N-n, alpha, X, strideX, ix+(n*strideX) );
+	return ( N * (double)alpha ) + (double)API_SUFFIX(stdlib_strided_sdssumpw_ndarray)( N, X, strideX, offsetX );
 }

Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,8 @@`
`43`	`43`	`"@stdlib/napi/argv-strided-float32array",`
`44`	`44`	`"@stdlib/napi/create-double",`
`45`	`45`	`"@stdlib/strided/base/stride2offset",`
`46`		`- "@stdlib/blas/base/shared"`
	`46`	`+ "@stdlib/blas/base/shared",`
	`47`	`+ "@stdlib/blas/ext/base/sdssumpw"`
`47`	`48`	`]`
`48`	`49`	`},`
`49`	`50`	`{`
`@@ -58,7 +59,8 @@`
`58`	`59`	`"libpath": [],`
`59`	`60`	`"dependencies": [`
`60`	`61`	`"@stdlib/strided/base/stride2offset",`
`61`		`- "@stdlib/blas/base/shared"`
	`62`	`+ "@stdlib/blas/base/shared",`
	`63`	`+ "@stdlib/blas/ext/base/sdssumpw"`
`62`	`64`	`]`
`63`	`65`	`},`
`64`	`66`	`{`
`@@ -73,7 +75,8 @@`
`73`	`75`	`"libpath": [],`
`74`	`76`	`"dependencies": [`
`75`	`77`	`"@stdlib/strided/base/stride2offset",`
`76`		`- "@stdlib/blas/base/shared"`
	`78`	`+ "@stdlib/blas/base/shared",`
	`79`	`+ "@stdlib/blas/ext/base/sdssumpw"`
`77`	`80`	`]`
`78`	`81`	`}`
`79`	`82`	`]`