Skip to content

Commit 8189a98

Browse files
authored
Merge pull request #12 from xianyi/develop
rebase
2 parents 3dbb32c + 3e1e74f commit 8189a98

File tree

11 files changed

+267
-43
lines changed

11 files changed

+267
-43
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,4 @@ benchmark/*.goto
9191
benchmark/smallscaling
9292
CMakeCache.txt
9393
CMakeFiles/*
94+
.vscode

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,8 @@ Please note that it is not possible to combine support for different architectur
212212
- **Android**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>.
213213
- **AIX**: Supported on PPC up to POWER8
214214
- **Haiku**: Supported by the community. We don't actively test the library on this OS.
215-
- **SunOS**: Supported by the community. We don't actively test the library on this OS:
215+
- **SunOS**: Supported by the community. We don't actively test the library on this OS.
216+
- **Cortex-M**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-on-Cortex-M>.
216217

217218
## Usage
218219

appveyor.yml

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,11 @@ environment:
3030
CONDA_INSTALL_LOCN: C:\\Miniconda36-x64
3131
matrix:
3232
- COMPILER: clang-cl
33-
WITH_FORTRAN: yes
33+
WITH_FORTRAN: ON
34+
USE_OPENMP: ON
3435
- COMPILER: clang-cl
3536
DYNAMIC_ARCH: ON
36-
WITH_FORTRAN: no
37+
WITH_FORTRAN: OFF
3738
- COMPILER: cl
3839
- COMPILER: MinGW64-gcc-7.2.0-mingw
3940
DYNAMIC_ARCH: OFF
@@ -47,12 +48,7 @@ environment:
4748
install:
4849
- if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
4950
- if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force
50-
- if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake
51-
52-
- if [%WITH_FORTRAN%]==[no] conda install --yes --quiet ninja
53-
- if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet -c isuruf kitware-ninja
54-
- if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet flang
55-
51+
- if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake ninja flang=11.0.1
5652
- if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64
5753
- if [%COMPILER%]==[clang-cl] set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%"
5854
- if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%"
@@ -68,8 +64,9 @@ before_build:
6864
- if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] cmake -G "MinGW Makefiles" -DNOFORTRAN=1 ..
6965
- if [%COMPILER%]==[MinGW-gcc-6.3.0-32] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
7066
- if [%COMPILER%]==[MinGW-gcc-5.3.0] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
71-
- if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DMSVC_STATIC_CRT=ON ..
72-
- if [%WITH_FORTRAN%]==[yes] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 ..
67+
- if [%WITH_FORTRAN%]==[OFF] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DMSVC_STATIC_CRT=ON ..
68+
- if [%WITH_FORTRAN%]==[ON] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 ..
69+
- if [%USE_OPENMP%]==[ON] cmake -DUSE_OPENMP=ON ..
7370
- if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON -DDYNAMIC_LIST='CORE2;NEHALEM;SANDYBRIDGE;BULLDOZER;HASWELL' ..
7471

7572
build_script:

cmake/system.cmake

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,16 +148,20 @@ endif ()
148148
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
149149
if (DEFINED TARGET)
150150
if (${TARGET} STREQUAL COOPERLAKE AND NOT NO_AVX512)
151-
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
151+
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
152152
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
153-
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
153+
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 10.09)
154154
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
155155
else()
156156
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
157157
endif()
158-
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
159-
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
160-
# endif()
158+
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang")
159+
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 8.99)
160+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
161+
else()
162+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
163+
endif()
164+
endif()
161165
endif()
162166
if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512)
163167
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
@@ -233,6 +237,11 @@ if (BINARY64)
233237
endif ()
234238
endif ()
235239

240+
if(EMBEDDED)
241+
set(CCOMMON_OPT "${CCOMMON_OPT} -DOS_EMBEDDED")
242+
set(CCOMMON_OPT "${CCOMMON_OPT} -mthumb -mcpu=cortex-m4 -mfloat-abi=hard -mfpu=fpv4-sp-d16")
243+
endif()
244+
236245
if (NEED_PIC)
237246
if (${CMAKE_C_COMPILER} STREQUAL "IBM")
238247
set(CCOMMON_OPT "${CCOMMON_OPT} -qpic=large")

common.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ extern "C" {
122122
#define ATOM GOTO_ATOM
123123
#undef GOTO_ATOM
124124
#endif
125-
#else
125+
#elif !defined(OS_EMBEDDED)
126126
#include <sys/mman.h>
127127
#ifndef NO_SYSV_IPC
128128
#include <sys/shm.h>
@@ -134,6 +134,9 @@ extern "C" {
134134
#if defined(SMP) || defined(USE_LOCKING)
135135
#include <pthread.h>
136136
#endif
137+
#else
138+
#include <time.h>
139+
#include <math.h>
137140
#endif
138141

139142
#if defined(OS_SUNOS)
@@ -488,10 +491,12 @@ static inline unsigned long long rpcc(void){
488491
struct timespec ts;
489492
clock_gettime(CLOCK_MONOTONIC, &ts);
490493
return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec;
491-
#else
494+
#elif !defined(OS_EMBEDDED)
492495
struct timeval tv;
493496
gettimeofday(&tv,NULL);
494497
return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000;
498+
#else
499+
return 0;
495500
#endif
496501
}
497502
#define RPCC_DEFINED
@@ -521,6 +526,10 @@ static void __inline blas_lock(volatile BLASULONG *address){
521526
#include "common_linux.h"
522527
#endif
523528

529+
#ifdef OS_EMBEDDED
530+
#define DTB_DEFAULT_ENTRIES 64
531+
#endif
532+
524533
#define MMAP_ACCESS (PROT_READ | PROT_WRITE)
525534

526535
#ifdef __NetBSD__

driver/others/memory.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1668,16 +1668,23 @@ void gotoblas_dummy_for_PGI(void) {
16681668
#ifndef MEM_LARGE_PAGES
16691669
#define MEM_LARGE_PAGES 0x20000000
16701670
#endif
1671-
#else
1671+
#elif !defined(OS_EMBEDDED)
16721672
#define ALLOC_MMAP
16731673
#define ALLOC_MALLOC
1674+
#else
1675+
#define ALLOC_MALLOC
1676+
1677+
inline int puts(const char *str) { return 0; }
1678+
inline int printf(const char *format, ...) { return 0; }
1679+
inline char *getenv(const char *name) { return ""; }
1680+
inline int atoi(const char *str) { return 0; }
16741681
#endif
16751682

16761683
#include <stdlib.h>
16771684
#include <stdio.h>
16781685
#include <fcntl.h>
16791686

1680-
#if !defined(OS_WINDOWS) || defined(OS_CYGWIN_NT)
1687+
#if (!defined(OS_WINDOWS) || defined(OS_CYGWIN_NT)) && !defined(OS_EMBEDDED)
16811688
#include <sys/mman.h>
16821689
#ifndef NO_SYSV_IPC
16831690
#include <sys/shm.h>

interface/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1634,10 +1634,10 @@ cblas_srotg.$(SUFFIX) cblas_srotg.$(PSUFFIX): rotg.c
16341634
cblas_drotg.$(SUFFIX) cblas_drotg.$(PSUFFIX): rotg.c
16351635
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
16361636

1637-
cblas_crotg.$(SUFFIX) crotg.$(PSUFFIX): zrotg.c
1637+
cblas_crotg.$(SUFFIX) cblas_crotg.$(PSUFFIX): zrotg.c
16381638
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
16391639

1640-
cblas_zrotg.$(SUFFIX) zrotg.$(PSUFFIX): zrotg.c
1640+
cblas_zrotg.$(SUFFIX) cblas_zrotg.$(PSUFFIX): zrotg.c
16411641
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
16421642

16431643
cblas_srotm.$(SUFFIX) cblas_srotm.$(PSUFFIX): rotm.c

kernel/power/cscal_microk_power10.c

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
/***************************************************************************
2+
Copyright (c) 2021, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#define HAVE_KERNEL_8 1
29+
30+
static void zscal_kernel_8 (long n, float *x, float alpha_r, float alpha_i)
31+
{
32+
__vector float t0 = {-alpha_i, alpha_i, -alpha_i, alpha_i};
33+
__vector unsigned char mask = { 11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4};
34+
__asm__
35+
(
36+
"dcbt 0, %2 \n\t"
37+
"xscvdpspn 32, %x3 \n\t"
38+
"xxspltw 32, 32, 0 \n\t"
39+
40+
"lxvp 40, 0(%2) \n\t"
41+
"lxvp 42, 32(%2) \n\t"
42+
"lxvp 44, 64(%2) \n\t"
43+
"lxvp 46, 96(%2) \n\t"
44+
45+
"addic. %1, %1, -16 \n\t"
46+
"ble two%= \n\t"
47+
48+
".align 5 \n"
49+
"one%=: \n\t"
50+
51+
"xvmulsp 48, 40, 32 \n\t" // x0_r * alpha_r, x0_i * alpha_r
52+
"xvmulsp 49, 41, 32 \n\t"
53+
"xvmulsp 50, 42, 32 \n\t"
54+
"xvmulsp 51, 43, 32 \n\t"
55+
"xvmulsp 52, 44, 32 \n\t"
56+
"xvmulsp 53, 45, 32 \n\t"
57+
"xvmulsp 54, 46, 32 \n\t"
58+
"xvmulsp 55, 47, 32 \n\t"
59+
60+
"xxperm 34, 40, %x5 \n\t"
61+
"xxperm 35, 41, %x5 \n\t"
62+
"xxperm 36, 42, %x5 \n\t"
63+
"xxperm 37, 43, %x5 \n\t"
64+
"xxperm 38, 44, %x5 \n\t"
65+
"xxperm 39, 45, %x5 \n\t"
66+
"xxperm 56, 46, %x5 \n\t"
67+
"xxperm 57, 47, %x5 \n\t"
68+
69+
"xvmulsp 34, 34, %x4 \n\t" // x0_i * -alpha_i, x0_r * alpha_i
70+
"xvmulsp 35, 35, %x4 \n\t"
71+
72+
"lxvp 40, 128(%2) \n\t"
73+
74+
"xvmulsp 36, 36, %x4 \n\t"
75+
"xvmulsp 37, 37, %x4 \n\t"
76+
77+
"lxvp 42, 160(%2) \n\t"
78+
79+
"xvmulsp 38, 38, %x4 \n\t"
80+
"xvmulsp 39, 39, %x4 \n\t"
81+
82+
"lxvp 44, 192(%2) \n\t"
83+
84+
"xvmulsp 56, 56, %x4 \n\t"
85+
"xvmulsp 57, 57, %x4 \n\t"
86+
87+
"lxvp 46, 224(%2) \n\t"
88+
89+
"xvaddsp 48, 48, 34 \n\t"
90+
"xvaddsp 49, 49, 35 \n\t"
91+
"xvaddsp 50, 50, 36 \n\t"
92+
"xvaddsp 51, 51, 37 \n\t"
93+
94+
"stxvp 48, 0(%2) \n\t"
95+
96+
"xvaddsp 52, 52, 38 \n\t"
97+
"xvaddsp 53, 53, 39 \n\t"
98+
99+
"stxvp 50, 32(%2) \n\t"
100+
101+
"xvaddsp 54, 54, 56 \n\t"
102+
"xvaddsp 55, 55, 57 \n\t"
103+
104+
"stxvp 52, 64(%2) \n\t"
105+
"stxvp 54, 96(%2) \n\t"
106+
107+
"addi %2, %2, 128 \n\t"
108+
109+
"addic. %1, %1, -16 \n\t"
110+
"bgt one%= \n"
111+
112+
"two%=: \n\t"
113+
114+
"xvmulsp 48, 40, 32 \n\t" // x0_r * alpha_r, x0_i * alpha_r
115+
"xvmulsp 49, 41, 32 \n\t"
116+
"xvmulsp 50, 42, 32 \n\t"
117+
"xvmulsp 51, 43, 32 \n\t"
118+
"xvmulsp 52, 44, 32 \n\t"
119+
"xvmulsp 53, 45, 32 \n\t"
120+
"xvmulsp 54, 46, 32 \n\t"
121+
"xvmulsp 55, 47, 32 \n\t"
122+
123+
"xxperm 34, 40, %x5 \n\t"
124+
"xxperm 35, 41, %x5 \n\t"
125+
"xxperm 36, 42, %x5 \n\t"
126+
"xxperm 37, 43, %x5 \n\t"
127+
"xxperm 38, 44, %x5 \n\t"
128+
"xxperm 39, 45, %x5 \n\t"
129+
"xxperm 56, 46, %x5 \n\t"
130+
"xxperm 57, 47, %x5 \n\t"
131+
132+
133+
"xvmulsp 34, 34, %x4 \n\t" // x0_i * -alpha_i, x0_r * alpha_i
134+
"xvmulsp 35, 35, %x4 \n\t"
135+
"xvmulsp 36, 36, %x4 \n\t"
136+
"xvmulsp 37, 37, %x4 \n\t"
137+
"xvmulsp 38, 38, %x4 \n\t"
138+
"xvmulsp 39, 39, %x4 \n\t"
139+
"xvmulsp 56, 56, %x4 \n\t"
140+
"xvmulsp 57, 57, %x4 \n\t"
141+
142+
"xvaddsp 48, 48, 34 \n\t"
143+
"xvaddsp 49, 49, 35 \n\t"
144+
"xvaddsp 50, 50, 36 \n\t"
145+
"xvaddsp 51, 51, 37 \n\t"
146+
147+
"stxvp 48, 0(%2) \n\t"
148+
149+
"xvaddsp 52, 52, 38 \n\t"
150+
"xvaddsp 53, 53, 39 \n\t"
151+
152+
"stxvp 50, 32(%2) \n\t"
153+
154+
"xvaddsp 54, 54, 56 \n\t"
155+
"xvaddsp 55, 55, 57 \n\t"
156+
157+
"stxvp 52, 64(%2) \n\t"
158+
"stxvp 54, 96(%2) \n\t"
159+
160+
"#n=%1 x=%0=%2 alpha=(%3,%4)\n"
161+
:
162+
"+m" (*x),
163+
"+r" (n), // 1
164+
"+b" (x) // 2
165+
:
166+
"f" (alpha_r), // 3
167+
"wa" (t0), // 4
168+
"wa" (mask) // 5
169+
:
170+
"cr0",
171+
"vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39",
172+
"vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47",
173+
"vs48","vs49","vs50","vs51","vs52","vs53","vs54","vs55",
174+
"vs56","vs57"
175+
);
176+
}

kernel/power/zscal.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3838

3939
#pragma GCC optimize "O1"
4040

41-
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
4241
#if defined(__VEC__) || defined(__ALTIVEC__)
42+
#if defined(POWER8) || defined(POWER9)
4343
#if defined(DOUBLE)
4444
#include "zscal_microk_power8.c"
4545
#endif
46+
#elif defined(POWER10)
47+
#if defined(DOUBLE)
48+
#include "zscal_microk_power8.c"
49+
#else
50+
#include "cscal_microk_power10.c"
51+
#endif
4652
#endif
4753
#endif
4854

@@ -145,7 +151,11 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, F
145151
{
146152

147153

154+
#if defined(DOUBLE)
148155
n1 = n & -8;
156+
#else
157+
n1 = n & -16;
158+
#endif
149159
if ( n1 > 0 )
150160
{
151161
zscal_kernel_8(n1, x, da_r, da_i);

0 commit comments

Comments
 (0)