|
| 1 | +/* |
| 2 | + * Copyright (c) IBM Corporation 2020. |
| 3 | + * All rights reserved. |
| 4 | + * |
| 5 | + * Redistribution and use in source and binary forms, with or without |
| 6 | + * modification, are permitted provided that the following conditions are |
| 7 | + * met: |
| 8 | + * |
| 9 | + * 1. Redistributions of source code must retain the above copyright |
| 10 | + * notice, this list of conditions and the following disclaimer. |
| 11 | + * |
| 12 | + * 2. Redistributions in binary form must reproduce the above copyright |
| 13 | + * notice, this list of conditions and the following disclaimer in |
| 14 | + * the documentation and/or other materials provided with the |
| 15 | + * distribution. |
| 16 | + * 3. Neither the name of the OpenBLAS project nor the names of |
| 17 | + * its contributors may be used to endorse or promote products |
| 18 | + * derived from this software without specific prior written |
| 19 | + * permission. |
| 20 | + * |
| 21 | + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 22 | + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 23 | + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 24 | + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 25 | + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 26 | + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| 27 | + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| 28 | + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| 29 | + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE |
| 30 | + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 31 | + */ |
| 32 | + |
| 33 | +#include <vecintrin.h> |
| 34 | + |
| 35 | +#define VLEN_BYTES 16 |
| 36 | +#define VLEN_FLOATS (VLEN_BYTES / sizeof(FLOAT)) |
| 37 | + |
| 38 | +typedef FLOAT vector_float __attribute__ ((vector_size (VLEN_BYTES))); |
| 39 | + |
| 40 | +/** |
| 41 | + * Load a vector into register, and hint on 8-byte alignment to improve |
| 42 | + * performance. gcc-9 and newer will create these hints by itself. For older |
| 43 | + * compiler versions, use inline assembly to explicitly express the hint. |
| 44 | + * Provide explicit hex encoding to cater for binutils versions that do not know |
| 45 | + * about vector-load with alignment hints yet. |
| 46 | + * |
| 47 | + * Note that, for block sizes where we apply vectorization, vectors in A will |
| 48 | + * always be 8-byte aligned. |
| 49 | + */ |
| 50 | +static inline vector_float vec_load_hinted(FLOAT const *restrict a) { |
| 51 | + vector_float const *restrict addr = (vector_float const *restrict)a; |
| 52 | + vector_float y; |
| 53 | + |
| 54 | +#if __GNUC__ < 9 && !defined(__clang__) |
| 55 | + // hex-encode vl %[out],%[addr],3 |
| 56 | + asm(".insn vrx,0xe70000003006,%[out],%[addr],3" |
| 57 | + : [ out ] "=v"(y) |
| 58 | + : [ addr ] "R"(*addr)); |
| 59 | +#else |
| 60 | + y = *addr; |
| 61 | +#endif |
| 62 | + |
| 63 | + return y; |
| 64 | +} |
0 commit comments