Skip to content

Commit 17609f8

Browse files
authored
Merge pull request #11 from xianyi/develop
sync with upstream
2 parents c0d570a + 3a2df19 commit 17609f8

File tree

7 files changed

+54
-41
lines changed

7 files changed

+54
-41
lines changed

.travis.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,16 +162,16 @@ matrix:
162162
before_script:
163163
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
164164
- brew update
165-
- brew install gcc # for gfortran
165+
- brew install gcc@8 # for gfortran
166166
script:
167167
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
168168
env:
169-
- BTYPE="BINARY=64 INTERFACE64=1"
169+
- BTYPE="BINARY=64 INTERFACE64=1 FC=gfortran-8"
170170

171171
- <<: *test-macos
172172
osx_image: xcode8.3
173173
env:
174-
- BTYPE="BINARY=32"
174+
- BTYPE="BINARY=32 FC=gfortran-8"
175175

176176
# whitelist
177177
branches:

common_arm64.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -103,12 +103,14 @@ static inline int blas_quickdivide(blasint x, blasint y){
103103

104104
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
105105

106-
#define PROLOGUE \
107-
.text ;\
108-
.align 4 ;\
109-
.global REALNAME ;\
110-
.type REALNAME, %function ;\
106+
.macro PROLOGUE
107+
.text ;
108+
.p2align 2 ;
109+
.global REALNAME ;
110+
.type REALNAME, %function ;
111111
REALNAME:
112+
.endm
113+
112114

113115
#define EPILOGUE
114116

kernel/arm64/nrm2.S

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,37 +54,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5454
#if !defined(DOUBLE)
5555
ldr s4, [X], #4
5656
fcmp s4, REGZERO
57-
beq KERNEL_F1_NEXT_\@
57+
beq 2f /* KERNEL_F1_NEXT_\@ */
5858
fabs s4, s4
5959
fcmp SCALE, s4
60-
bge KERNEL_F1_SCALE_GE_X_\@
60+
bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */
6161
fdiv s2, SCALE, s4
6262
fmul s2, s2, s2
6363
fmul s3, SSQ, s2
6464
fadd SSQ, REGONE, s3
6565
fmov SCALE, s4
66-
b KERNEL_F1_NEXT_\@
67-
KERNEL_F1_SCALE_GE_X_\@:
66+
b 2f /* KERNEL_F1_NEXT_\@ */
67+
1: /* KERNEL_F1_SCALE_GE_X_\@: */
6868
fdiv s2, s4, SCALE
6969
fmla SSQ, s2, v2.s[0]
7070
#else
7171
ldr d4, [X], #8
7272
fcmp d4, REGZERO
73-
beq KERNEL_F1_NEXT_\@
73+
beq 2f /* KERNEL_F1_NEXT_\@ */
7474
fabs d4, d4
7575
fcmp SCALE, d4
76-
bge KERNEL_F1_SCALE_GE_X_\@
76+
bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */
7777
fdiv d2, SCALE, d4
7878
fmul d2, d2, d2
7979
fmul d3, SSQ, d2
8080
fadd SSQ, REGONE, d3
8181
fmov SCALE, d4
82-
b KERNEL_F1_NEXT_\@
83-
KERNEL_F1_SCALE_GE_X_\@:
82+
b 2f /* KERNEL_F1_NEXT_\@ */
83+
1: /* KERNEL_F1_SCALE_GE_X_\@: */
8484
fdiv d2, d4, SCALE
8585
fmla SSQ, d2, v2.d[0]
8686
#endif
87-
KERNEL_F1_NEXT_\@:
87+
2: /* KERNEL_F1_NEXT_\@: */
8888
.endm
8989

9090
.macro KERNEL_S1

kernel/arm64/znrm2.S

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -54,69 +54,69 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5454
#if !defined(DOUBLE)
5555
ldr s4, [X], #4
5656
fcmp s4, REGZERO
57-
beq KERNEL_F1_NEXT_\@
57+
beq 2f /* KERNEL_F1_NEXT_\@ */
5858
fabs s4, s4
5959
fcmp SCALE, s4
60-
bge KERNEL_F1_SCALE_GE_XR_\@
60+
bge 1f /* KERNEL_F1_SCALE_GE_XR_\@ */
6161
fdiv s2, SCALE, s4
6262
fmul s2, s2, s2
6363
fmul s3, SSQ, s2
6464
fadd SSQ, REGONE, s3
6565
fmov SCALE, s4
66-
b KERNEL_F1_NEXT_\@
67-
KERNEL_F1_SCALE_GE_XR_\@:
66+
b 2f /* KERNEL_F1_NEXT_\@ */
67+
1: /* KERNEL_F1_SCALE_GE_XR_\@: */
6868
fdiv s2, s4, SCALE
6969
fmla SSQ, s2, v2.s[0]
70-
KERNEL_F1_NEXT_\@:
70+
2: /* KERNEL_F1_NEXT_\@: */
7171
ldr s5, [X], #4
7272
fcmp s5, REGZERO
73-
beq KERNEL_F1_END_\@
73+
beq 4f /* KERNEL_F1_END_\@ */
7474
fabs s5, s5
7575
fcmp SCALE, s5
76-
bge KERNEL_F1_SCALE_GE_XI_\@
76+
bge 3f /* KERNEL_F1_SCALE_GE_XI_\@ */
7777
fdiv s2, SCALE, s5
7878
fmul s2, s2, s2
7979
fmul s3, SSQ, s2
8080
fadd SSQ, REGONE, s3
8181
fmov SCALE, s5
82-
b KERNEL_F1_END_\@
83-
KERNEL_F1_SCALE_GE_XI_\@:
82+
b 4f /* KERNEL_F1_END_\@ */
83+
3: /* KERNEL_F1_SCALE_GE_XI_\@: */
8484
fdiv s2, s5, SCALE
8585
fmla SSQ, s2, v2.s[0]
8686
#else
8787
ldr d4, [X], #8
8888
fcmp d4, REGZERO
89-
beq KERNEL_F1_NEXT_\@
89+
beq 2f /* KERNEL_F1_NEXT_\@ */
9090
fabs d4, d4
9191
fcmp SCALE, d4
92-
bge KERNEL_F1_SCALE_GE_XR_\@
92+
bge 1f /* KERNEL_F1_SCALE_GE_XR_\@ */
9393
fdiv d2, SCALE, d4
9494
fmul d2, d2, d2
9595
fmul d3, SSQ, d2
9696
fadd SSQ, REGONE, d3
9797
fmov SCALE, d4
98-
b KERNEL_F1_NEXT_\@
99-
KERNEL_F1_SCALE_GE_XR_\@:
98+
b 2f /* KERNEL_F1_NEXT_\@ */
99+
1: /* KERNEL_F1_SCALE_GE_XR_\@: */
100100
fdiv d2, d4, SCALE
101101
fmla SSQ, d2, v2.d[0]
102-
KERNEL_F1_NEXT_\@:
102+
2: /* KERNEL_F1_NEXT_\@: */
103103
ldr d5, [X], #8
104104
fcmp d5, REGZERO
105-
beq KERNEL_F1_END_\@
105+
beq 4f /* KERNEL_F1_END_\@ */
106106
fabs d5, d5
107107
fcmp SCALE, d5
108-
bge KERNEL_F1_SCALE_GE_XI_\@
108+
bge 3f /* KERNEL_F1_SCALE_GE_XI_\@ */
109109
fdiv d2, SCALE, d5
110110
fmul d2, d2, d2
111111
fmul d3, SSQ, d2
112112
fadd SSQ, REGONE, d3
113113
fmov SCALE, d5
114-
b KERNEL_F1_END_\@
115-
KERNEL_F1_SCALE_GE_XI_\@:
114+
b 4f /* KERNEL_F1_END_\@ */
115+
3: /* KERNEL_F1_SCALE_GE_XI_\@: */
116116
fdiv d2, d5, SCALE
117117
fmla SSQ, d2, v2.d[0]
118118
#endif
119-
KERNEL_F1_END_\@:
119+
4: /* KERNEL_F1_END_\@: */
120120
.endm
121121

122122
.macro KERNEL_S1

kernel/power/caxpy_power8.S

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ caxpy_k:
3434
lfs 0,4(10)
3535
fmuls 10,2,10
3636
#ifdef CONJ
37-
fmsubs 11,11,1,10
38-
#else
3937
fmadds 11,11,1,10
38+
#else
39+
fmsubs 11,11,1,10
4040
#endif
4141
fadds 12,12,11
4242
stfs 12,0(10)
@@ -241,8 +241,13 @@ caxpy_k:
241241
lfsx 12,8,5
242242
lfsx 0,10,5
243243
fmuls 11,2,11
244+
#ifdef CONJ
244245
fmsubs 12,1,12,11
245246
fsubs 0,0,12
247+
#else
248+
fmadds 12,1,12,11
249+
fadds 0,0,12
250+
#endif
246251
stfsx 0,10,5
247252
ble 7,.L39
248253
sldi 6,6,2

kernel/power/cdot_power9.S

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
1-
.file "cdot.c"
1+
#define ASSEMBLER
2+
#include "common.h"
3+
/*
4+
.file "cdot.c"
25
.abiversion 2
36
.section ".text"
47
.align 2
58
.p2align 4,,15
69
.globl cdot_k
710
.type cdot_k, @function
11+
*/
12+
PROLOGUE
13+
814
cdot_k:
915
.LCF0:
1016
0: addis 2,12,.TOC.-.LCF0@ha

kernel/power/sgemm_logic_power9.S

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,8 @@ LSGEMM_L8x16_BEGIN:
136136
#endif
137137

138138
ZERO8x16
139-
mtctr L
140139
ble LSGEMM_L8x16_SUB0
140+
mtctr L
141141
bl LSGEMM_L8x16_LMAIN_SUB
142142
andi. L, T12, 127
143143
ble LSGEMM_L8x16_SAVE
@@ -146,7 +146,7 @@ LSGEMM_L8x16_BEGIN:
146146
LSGEMM_L8x16_SUB0:
147147
#if defined(TRMMKERNEL)
148148
andi. L, T11, 255
149-
cmpwi T11,128
149+
cmpwi T11,129
150150
#else
151151
andi. L, K, 255
152152
cmpwi K,129

0 commit comments

Comments
 (0)