Skip to content

Commit f91057c

Browse files
committed
s390x: move common vector definitions and utils into header
... to facilitate reuse beyond gemm_vec.c and avoid code duplication. Signed-off-by: Marius Hillenbrand <[email protected]>
1 parent 992d7ca commit f91057c

File tree

2 files changed

+66
-32
lines changed

2 files changed

+66
-32
lines changed

kernel/zarch/gemm_vec.c

Lines changed: 2 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,13 @@
3030
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3131
*/
3232
#include "common.h"
33-
#include <vecintrin.h>
33+
#include "vector-common.h"
3434

3535
#include <stdbool.h>
3636
#include <stdio.h>
3737
#include <stdlib.h>
3838

39+
3940
#ifdef COMPLEX
4041
#error "Handling for complex numbers is not supported in this kernel"
4142
#endif
@@ -153,37 +154,6 @@ static const bool backwards = false;
153154
* 3, May 2008.
154155
*/
155156

156-
#define VLEN_BYTES 16
157-
#define VLEN_FLOATS (VLEN_BYTES / sizeof(FLOAT))
158-
159-
typedef FLOAT vector_float __attribute__ ((vector_size (16)));
160-
161-
/**
162-
* Load a vector into register, and hint on 8-byte alignment to improve
163-
* performance. gcc-9 and newer will create these hints by itself. For older
164-
* compiler versions, use inline assembly to explicitly express the hint.
165-
* Provide explicit hex encoding to cater for binutils versions that do not know
166-
* about vector-load with alignment hints yet.
167-
*
168-
* Note that, for block sizes where we apply vectorization, vectors in A will
169-
* always be 8-byte aligned.
170-
*/
171-
static inline vector_float vec_load_hinted(FLOAT const *restrict a) {
172-
vector_float const *restrict addr = (vector_float const *restrict)a;
173-
vector_float y;
174-
175-
#if __GNUC__ < 9 && !defined(__clang__)
176-
// hex-encode vl %[out],%[addr],3
177-
asm(".insn vrx,0xe70000003006,%[out],%[addr],3"
178-
: [ out ] "=v"(y)
179-
: [ addr ] "R"(*addr));
180-
#else
181-
y = *addr;
182-
#endif
183-
184-
return y;
185-
}
186-
187157
/**
188158
* Calculate for a row-block in C_i of size ROWSxCOLS using vector intrinsics.
189159
*

kernel/zarch/vector-common.h

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Copyright (c) IBM Corporation 2020.
3+
* All rights reserved.
4+
*
5+
* Redistribution and use in source and binary forms, with or without
6+
* modification, are permitted provided that the following conditions are
7+
* met:
8+
*
9+
* 1. Redistributions of source code must retain the above copyright
10+
* notice, this list of conditions and the following disclaimer.
11+
*
12+
* 2. Redistributions in binary form must reproduce the above copyright
13+
* notice, this list of conditions and the following disclaimer in
14+
* the documentation and/or other materials provided with the
15+
* distribution.
16+
* 3. Neither the name of the OpenBLAS project nor the names of
17+
* its contributors may be used to endorse or promote products
18+
* derived from this software without specific prior written
19+
* permission.
20+
*
21+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
30+
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31+
*/
32+
33+
#include <vecintrin.h>
34+
35+
#define VLEN_BYTES 16
36+
#define VLEN_FLOATS (VLEN_BYTES / sizeof(FLOAT))
37+
38+
typedef FLOAT vector_float __attribute__ ((vector_size (VLEN_BYTES)));
39+
40+
/**
41+
* Load a vector into register, and hint on 8-byte alignment to improve
42+
* performance. gcc-9 and newer will create these hints by itself. For older
43+
* compiler versions, use inline assembly to explicitly express the hint.
44+
* Provide explicit hex encoding to cater for binutils versions that do not know
45+
* about vector-load with alignment hints yet.
46+
*
47+
* Note that, for block sizes where we apply vectorization, vectors in A will
48+
* always be 8-byte aligned.
49+
*/
50+
static inline vector_float vec_load_hinted(FLOAT const *restrict a) {
51+
vector_float const *restrict addr = (vector_float const *restrict)a;
52+
vector_float y;
53+
54+
#if __GNUC__ < 9 && !defined(__clang__)
55+
// hex-encode vl %[out],%[addr],3
56+
asm(".insn vrx,0xe70000003006,%[out],%[addr],3"
57+
: [ out ] "=v"(y)
58+
: [ addr ] "R"(*addr));
59+
#else
60+
y = *addr;
61+
#endif
62+
63+
return y;
64+
}

0 commit comments

Comments
 (0)