Skip to content

Commit 9d1ea75

Browse files
authored
Merge pull request #80 from xianyi/develop
rebase
2 parents b87a77d + 776d005 commit 9d1ea75

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+453
-336
lines changed

Makefile.zarch

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,9 @@ ifeq ($(CORE), Z14)
88
CCOMMON_OPT += -march=z14 -mzvector -O3
99
FCOMMON_OPT += -march=z14 -mzvector
1010
endif
11+
12+
# Enable floating-point expression contraction for clang, since it is the
13+
# default for gcc
14+
ifeq ($(C_COMPILER), CLANG)
15+
CCOMMON_OPT += -ffp-contract=fast
16+
endif

c_check

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
88
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
99
$hostarch = `uname -p` if ($hostos eq "AIX");
1010
$hostarch = "x86_64" if ($hostarch eq "amd64");
11-
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
11+
$hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/);
1212
$hostarch = "arm64" if ($hostarch eq "aarch64");
1313
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/);
1414
$hostarch = "zarch" if ($hostarch eq "s390x");

cmake/system.cmake

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,11 @@ if (NO_AVX2)
110110
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2")
111111
endif ()
112112

113+
if (NO_AVX512)
114+
message(STATUS "Disabling Advanced Vector Extensions 512 (AVX512).")
115+
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX512")
116+
endif ()
117+
113118
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
114119
set(GETARCH_FLAGS "${GETARCH_FLAGS} ${CMAKE_C_FLAGS_DEBUG}")
115120
endif ()

cmake/system_check.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,15 @@ else()
109109
endif()
110110

111111
if (X86_64 OR X86)
112+
if (NOT NO_AVX512)
112113
file(WRITE ${PROJECT_BINARY_DIR}/avx512.c "#include <immintrin.h>\n\nint main(void){ __asm__ volatile(\"vbroadcastss -4 * 4(%rsi), %zmm2\"); }")
113114
execute_process(COMMAND ${CMAKE_C_COMPILER} -march=skylake-avx512 -c -v -o ${PROJECT_BINARY_DIR}/avx512.o ${PROJECT_BINARY_DIR}/avx512.c OUTPUT_QUIET ERROR_QUIET RESULT_VARIABLE NO_AVX512)
114115
if (NO_AVX512 EQUAL 1)
115116
set (CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512")
116117
endif()
117118
file(REMOVE "avx512.c" "avx512.o")
118119
endif()
120+
endif()
119121

120122
include(CheckIncludeFile)
121123
CHECK_INCLUDE_FILE("stdatomic.h" HAVE_C11)

kernel/zarch/camax.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ static FLOAT camax_kernel_32(BLASLONG n, FLOAT *x) {
136136
"wfmaxsb %%v0,%%v0,%%v16,0\n\t"
137137
"ler %[amax],%%f0"
138138
: [amax] "=f"(amax),[n] "+&r"(n)
139-
: "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x)
139+
: "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x)
140140
: "cc", "r1", "v0", "v1", "v2", "v16", "v17", "v18", "v19", "v20",
141141
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30",
142142
"v31");

kernel/zarch/camin.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ static FLOAT camin_kernel_32(BLASLONG n, FLOAT *x) {
136136
"wfminsb %%v0,%%v0,%%v16,0\n\t"
137137
"ler %[amin],%%f0"
138138
: [amin] "=f"(amin),[n] "+&r"(n)
139-
: "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x)
139+
: "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x)
140140
: "cc", "r1", "v0", "v1", "v2", "v16", "v17", "v18", "v19", "v20",
141141
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30",
142142
"v31");

kernel/zarch/casum.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ static FLOAT casum_kernel_32(BLASLONG n, FLOAT *x) {
108108
"vfasb %%v24,%%v24,%%v25\n\t"
109109
"vstef %%v24,%[asum],0"
110110
: [asum] "=Q"(asum),[n] "+&r"(n)
111-
: "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x)
111+
: "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x)
112112
: "cc", "r1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
113113
"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");
114114

kernel/zarch/caxpy.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,9 @@ static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) {
9999
"vst %%v19,112(%%r1,%[y])\n\t"
100100
"agfi %%r1,128\n\t"
101101
"brctg %[n],0b"
102-
: "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n)
103-
: [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x),
104-
"m"(*(const struct { FLOAT x[2]; } *) alpha),[alpha] "a"(alpha)
102+
: "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n)
103+
: [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x),
104+
"m"(*(const FLOAT (*)[2]) alpha),[alpha] "a"(alpha)
105105
: "cc", "r1", "v0", "v1", "v8", "v9", "v10", "v11", "v12", "v13",
106106
"v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
107107
"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31");

kernel/zarch/ccopy.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ static void ccopy_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y) {
3636
"la %[x],256(%[x])\n\t"
3737
"la %[y],256(%[y])\n\t"
3838
"brctg %[n],0b"
39-
: "=m"(*(struct { FLOAT x[n * 2]; } *) y),[x] "+&a"(x),[y] "+&a"(y),
39+
: "=m"(*(FLOAT (*)[n * 2]) y),[x] "+&a"(x),[y] "+&a"(y),
4040
[n] "+&r"(n)
41-
: "m"(*(const struct { FLOAT x[n * 2]; } *) x)
41+
: "m"(*(const FLOAT (*)[n * 2]) x)
4242
: "cc");
4343
}
4444

kernel/zarch/cdot.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,9 @@ static void cdot_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d) {
9797
"vstef %%v24,4(%[d]),1\n\t"
9898
"vstef %%v25,8(%[d]),1\n\t"
9999
"vstef %%v25,12(%[d]),0"
100-
: "=m"(*(struct { FLOAT x[4]; } *) d),[n] "+&r"(n)
101-
: [d] "a"(d), "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x),
102-
"m"(*(const struct { FLOAT x[n * 2]; } *) y),[y] "a"(y)
100+
: "=m"(*(FLOAT (*)[4]) d),[n] "+&r"(n)
101+
: [d] "a"(d), "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x),
102+
"m"(*(const FLOAT (*)[n * 2]) y),[y] "a"(y)
103103
: "cc", "r1", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19", "v20",
104104
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30",
105105
"v31");

0 commit comments

Comments
 (0)