Skip to content

Commit d9cab85

Browse files
committed
feat: add c implementation for blas/bae/sspmv
--- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: na - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: missing_dependencies - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: na - task: lint_typescript_tests status: na - task: lint_license_headers status: passed ---
1 parent 776c7e4 commit d9cab85

File tree

2 files changed

+175
-0
lines changed

2 files changed

+175
-0
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#/
2+
# @license Apache-2.0
3+
#
4+
# Copyright (c) 2020 The Stdlib Authors.
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#/
18+
19+
# VARIABLES #
20+
21+
ifndef VERBOSE
22+
QUIET := @
23+
else
24+
QUIET :=
25+
endif
26+
27+
# Determine the OS ([1][1], [2][2]).
28+
#
29+
# [1]: https://en.wikipedia.org/wiki/Uname#Examples
30+
# [2]: http://stackoverflow.com/a/27776822/2225624
31+
OS ?= $(shell uname)
32+
ifneq (, $(findstring MINGW,$(OS)))
33+
OS := WINNT
34+
else
35+
ifneq (, $(findstring MSYS,$(OS)))
36+
OS := WINNT
37+
else
38+
ifneq (, $(findstring CYGWIN,$(OS)))
39+
OS := WINNT
40+
else
41+
ifneq (, $(findstring Windows_NT,$(OS)))
42+
OS := WINNT
43+
endif
44+
endif
45+
endif
46+
endif
47+
48+
49+
# RULES #
50+
51+
#/
52+
# Removes generated files for building an add-on.
53+
#
54+
# @example
55+
# make clean-addon
56+
#/
57+
clean-addon:
58+
$(QUIET) -rm -f *.o *.node
59+
60+
.PHONY: clean-addon
61+
62+
#/
63+
# Removes generated files.
64+
#
65+
# @example
66+
# make clean
67+
#/
68+
clean: clean-addon
69+
70+
.PHONY: clean
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2024 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
#include "stdlib/blas/base/sspmv.h"
20+
#include "stdlib/blas/base/shared.h"
21+
#include "stdlib/blas/ext/base/sfill.h"
22+
#include "stdlib/blas/base/sscal.h"
23+
24+
/**
25+
* Performs the matrix-vector operation `Y = α*A*X + β*Y` where `α` and `β` are scalars, `X` and `Y` are `N` element vectors, and `A` is an `N` by `N` symmetric matrix supplied in packed form.
26+
*
27+
* @param order storage layout
28+
* @param uplo specifies whether the upper or lower triangular part of the symmetric matrix `A` is supplied
29+
* @param N number of elements along each dimension of `A`
30+
* @param alpha scalar constant
31+
* @param AP packed form of a symmetric matrix `A`
32+
* @param strideAP `AP` stride length
33+
* @param offsetAP starting index for `AP`
34+
* @param X first input array
35+
* @param strideX `X` stride length
36+
* @param offsetX starting `X` index
37+
* @param beta scalar constant
38+
* @param Y second input array
39+
* @param strideY `Y` stride length
40+
* @param offsetY starting `Y` index
41+
*/
42+
void API_SUFFIX(c_sspmv_ndarray)( const CBLAS_LAYOUT order, const CBLAS_UPLO uplo, const CBLAS_INT N, const float alpha, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const float beta, float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ) {
43+
CBLAS_INT ix;
44+
CBLAS_INT i;
45+
CBLAS_INT m;
46+
47+
if ( beta != 1.0f ) {
48+
if ( beta == 0.0f ) {
49+
stdlib_strided_sfill_ndarray( N, beta, Y, strideY, offsetY );
50+
} else {
51+
c_sscal_ndarray( N, beta, Y, strideY, offsetY );
52+
}
53+
}
54+
if ( alpha === 0.0f ) {
55+
return;
56+
}
57+
kk = offsetAP;
58+
ox = offsetX;
59+
oy = offsetY;
60+
if ( ( CblasColMajor && CblasUpper ) || ( CblasRowMajor && uplo === CblasLower ) ) {
61+
ix1 = ox;
62+
iy1 = oy;
63+
for ( i1 = 0; i1 < N; i1++ ) {
64+
tmp1 = alpha * X[ ix1 ];
65+
tmp2 = 0.0f;
66+
ix0 = ox;
67+
iy0 = oy;
68+
iap = kk;
69+
for ( i0 = 0; i0 < i1; i0++ ) {
70+
Y[ iy0 ] += tmp1 * AP[ iap ];
71+
tmp2 += AP[ iap ] * X[ ix0 ];
72+
ix0 += strideX;
73+
iy0 += strideY;
74+
iap += strideAP;
75+
}
76+
Y[ iy1 ] += ( tmp1 * AP[ iap ] ) + ( alpha * tmp2 );
77+
ix1 += strideX;
78+
iy1 += strideY;
79+
kk += ( i1 + 1 ) * strideAP;
80+
}
81+
return;
82+
}
83+
ix1 = ox;
84+
iy1 = oy;
85+
for ( i1 = 0; i1 < N; i1++ ) {
86+
tmp1 = alpha * X[ ix1 ];
87+
tmp2 = 0.0f;
88+
Y[ iy1 ] += tmp1 * AP[ kk ];
89+
iap = kk;
90+
ix0 = ix1;
91+
iy0 = iy1;
92+
for ( i0 = i1+1; i0 < N; i0++ ) {
93+
ix0 += strideX;
94+
iy0 += strideY;
95+
iap += strideAP;
96+
Y[ iy0 ] += tmp1 * AP[ iap ];
97+
tmp2 += AP[ iap ] * X[ ix0 ];
98+
}
99+
Y[ iy1 ] += alpha * tmp2;
100+
ix1 += strideX;
101+
iy1 += strideY;
102+
kk += ( N - i1 ) * strideAP;
103+
}
104+
return;
105+
}

0 commit comments

Comments
 (0)