|
| 1 | +/** |
| 2 | +* @license Apache-2.0 |
| 3 | +* |
| 4 | +* Copyright (c) 2024 The Stdlib Authors. |
| 5 | +* |
| 6 | +* Licensed under the Apache License, Version 2.0 (the "License"); |
| 7 | +* you may not use this file except in compliance with the License. |
| 8 | +* You may obtain a copy of the License at |
| 9 | +* |
| 10 | +* http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | +* |
| 12 | +* Unless required by applicable law or agreed to in writing, software |
| 13 | +* distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | +* See the License for the specific language governing permissions and |
| 16 | +* limitations under the License. |
| 17 | +*/ |
| 18 | + |
| 19 | +#include "stdlib/blas/base/sspmv.h" |
| 20 | +#include "stdlib/blas/base/shared.h" |
| 21 | +#include "stdlib/blas/ext/base/sfill.h" |
| 22 | +#include "stdlib/blas/base/sscal.h" |
| 23 | + |
| 24 | +/** |
| 25 | +* Performs the matrix-vector operation `Y = α*A*X + β*Y` where `α` and `β` are scalars, `X` and `Y` are `N` element vectors, and `A` is an `N` by `N` symmetric matrix supplied in packed form. |
| 26 | +* |
| 27 | +* @param order storage layout |
| 28 | +* @param uplo specifies whether the upper or lower triangular part of the symmetric matrix `A` is supplied |
| 29 | +* @param N number of elements along each dimension of `A` |
| 30 | +* @param alpha scalar constant |
| 31 | +* @param AP packed form of a symmetric matrix `A` |
| 32 | +* @param strideAP `AP` stride length |
| 33 | +* @param offsetAP starting index for `AP` |
| 34 | +* @param X first input array |
| 35 | +* @param strideX `X` stride length |
| 36 | +* @param offsetX starting `X` index |
| 37 | +* @param beta scalar constant |
| 38 | +* @param Y second input array |
| 39 | +* @param strideY `Y` stride length |
| 40 | +* @param offsetY starting `Y` index |
| 41 | +*/ |
| 42 | +void API_SUFFIX(c_sspmv_ndarray)( const CBLAS_LAYOUT order, const CBLAS_UPLO uplo, const CBLAS_INT N, const float alpha, const float *X, const CBLAS_INT strideX, const CBLAS_INT offsetX, const float beta, float *Y, const CBLAS_INT strideY, const CBLAS_INT offsetY ) { |
| 43 | + CBLAS_INT ix; |
| 44 | + CBLAS_INT i; |
| 45 | + CBLAS_INT m; |
| 46 | + |
| 47 | + if ( beta != 1.0f ) { |
| 48 | + if ( beta == 0.0f ) { |
| 49 | + stdlib_strided_sfill_ndarray( N, beta, Y, strideY, offsetY ); |
| 50 | + } else { |
| 51 | + c_sscal_ndarray( N, beta, Y, strideY, offsetY ); |
| 52 | + } |
| 53 | + } |
| 54 | + if ( alpha === 0.0f ) { |
| 55 | + return; |
| 56 | + } |
| 57 | + kk = offsetAP; |
| 58 | + ox = offsetX; |
| 59 | + oy = offsetY; |
| 60 | + if ( ( CblasColMajor && CblasUpper ) || ( CblasRowMajor && uplo === CblasLower ) ) { |
| 61 | + ix1 = ox; |
| 62 | + iy1 = oy; |
| 63 | + for ( i1 = 0; i1 < N; i1++ ) { |
| 64 | + tmp1 = alpha * X[ ix1 ]; |
| 65 | + tmp2 = 0.0f; |
| 66 | + ix0 = ox; |
| 67 | + iy0 = oy; |
| 68 | + iap = kk; |
| 69 | + for ( i0 = 0; i0 < i1; i0++ ) { |
| 70 | + Y[ iy0 ] += tmp1 * AP[ iap ]; |
| 71 | + tmp2 += AP[ iap ] * X[ ix0 ]; |
| 72 | + ix0 += strideX; |
| 73 | + iy0 += strideY; |
| 74 | + iap += strideAP; |
| 75 | + } |
| 76 | + Y[ iy1 ] += ( tmp1 * AP[ iap ] ) + ( alpha * tmp2 ); |
| 77 | + ix1 += strideX; |
| 78 | + iy1 += strideY; |
| 79 | + kk += ( i1 + 1 ) * strideAP; |
| 80 | + } |
| 81 | + return; |
| 82 | + } |
| 83 | + ix1 = ox; |
| 84 | + iy1 = oy; |
| 85 | + for ( i1 = 0; i1 < N; i1++ ) { |
| 86 | + tmp1 = alpha * X[ ix1 ]; |
| 87 | + tmp2 = 0.0f; |
| 88 | + Y[ iy1 ] += tmp1 * AP[ kk ]; |
| 89 | + iap = kk; |
| 90 | + ix0 = ix1; |
| 91 | + iy0 = iy1; |
| 92 | + for ( i0 = i1+1; i0 < N; i0++ ) { |
| 93 | + ix0 += strideX; |
| 94 | + iy0 += strideY; |
| 95 | + iap += strideAP; |
| 96 | + Y[ iy0 ] += tmp1 * AP[ iap ]; |
| 97 | + tmp2 += AP[ iap ] * X[ ix0 ]; |
| 98 | + } |
| 99 | + Y[ iy1 ] += alpha * tmp2; |
| 100 | + ix1 += strideX; |
| 101 | + iy1 += strideY; |
| 102 | + kk += ( N - i1 ) * strideAP; |
| 103 | + } |
| 104 | + return; |
| 105 | +} |
0 commit comments