Skip to content

Commit b073d75

Browse files
committed
x86_64: clobber all xmm registers after vzeroupper
As observed using GCC 10 using -march=native -ftree-vectorize on Knights Landing, it is now smart enough to find clobbers inside non-inlined static functions. In particular, sgemv counted on a kernel to preserve the whole %ymm2 register (since it was not in the clobber list), but the top part was destroyed by vzeroupper. This caused many tests to fail. This patch makes sure all xmm (and ymm/zmm by extension) registers are listed as clobbered to avoid this happening, as most kernels already did correctly in fact.
1 parent 8e20ab2 commit b073d75

22 files changed

+63
-44
lines changed

kernel/x86_64/caxpy_microk_bulldozer-2.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
122122
"r" (alpha), // 4
123123
"r" (mvec) // 5
124124
: "cc",
125-
"%xmm0", "%xmm1",
125+
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
126126
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
127127
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
128128
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
@@ -189,9 +189,10 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
189189
"r" (alpha), // 4
190190
"r" (mvec) // 5
191191
: "cc",
192-
"%xmm0", "%xmm1",
192+
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
193193
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
194194
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
195+
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
195196
"memory"
196197
);
197198

kernel/x86_64/caxpy_microk_haswell-2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
120120
"r" (alpha), // 4
121121
"r" (mvec) // 5
122122
: "cc",
123-
"%xmm0", "%xmm1",
123+
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
124124
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
125125
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
126126
"%xmm12", "%xmm13", "%xmm14", "%xmm15",

kernel/x86_64/caxpy_microk_sandy-2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
104104
"r" (alpha), // 4
105105
"r" (mvec) // 5
106106
: "cc",
107-
"%xmm0", "%xmm1",
107+
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
108108
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
109109
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
110110
"%xmm12", "%xmm13", "%xmm14", "%xmm15",

kernel/x86_64/caxpy_microk_steamroller-2.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
122122
"r" (alpha), // 4
123123
"r" (mvec) // 5
124124
: "cc",
125-
"%xmm0", "%xmm1",
125+
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
126126
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
127127
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
128128
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
@@ -189,9 +189,10 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
189189
"r" (alpha), // 4
190190
"r" (mvec) // 5
191191
: "cc",
192-
"%xmm0", "%xmm1",
192+
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
193193
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
194194
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
195+
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
195196
"memory"
196197
);
197198

kernel/x86_64/daxpy_microk_haswell-2.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,9 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
6767
"r" (y), // 3
6868
"r" (alpha) // 4
6969
: "cc",
70-
"%xmm0",
71-
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
70+
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
71+
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
72+
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
7273
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
7374
"memory"
7475
);

kernel/x86_64/ddot_microk_haswell-2.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,9 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
8484
"r" (y), // 3
8585
"r" (dot) // 4
8686
: "cc",
87-
"%xmm4", "%xmm5",
88-
"%xmm6", "%xmm7",
87+
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
88+
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
89+
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
8990
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
9091
"memory"
9192
);

kernel/x86_64/ddot_microk_piledriver-2.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
9191
: "cc",
9292
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
9393
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
94+
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
9495
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
9596
"memory"
9697
);
@@ -155,6 +156,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
155156
: "cc",
156157
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
157158
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
159+
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
158160
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
159161
"memory"
160162
);

kernel/x86_64/ddot_microk_sandy-2.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,9 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
8989
"r" (y), // 3
9090
"r" (dot) // 4
9191
: "cc",
92-
"%xmm4", "%xmm5",
93-
"%xmm6", "%xmm7",
92+
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
93+
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
94+
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
9495
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
9596
"memory"
9697
);

kernel/x86_64/ddot_microk_steamroller-2.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
8888
: "cc",
8989
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
9090
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
91+
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
9192
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
9293
"memory"
9394
);

kernel/x86_64/dgemv_n_microk_haswell-4.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,8 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
105105
"r" (alpha) // 8
106106
: "cc",
107107
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
108-
"%xmm4", "%xmm5",
109-
"%xmm6", "%xmm7",
110-
"%xmm8", "%xmm9",
108+
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
109+
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
111110
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
112111
"memory"
113112
);
@@ -182,11 +181,10 @@ static void dgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
182181
"r" (ap[1]), // 5
183182
"r" (alpha) // 6
184183
: "cc",
185-
"%xmm0", "%xmm1",
186-
"%xmm4", "%xmm5",
187-
"%xmm6",
188-
"%xmm8",
189-
"%xmm12", "%xmm13",
184+
"%xmm0", "%xmm1", "%xmm2", "%xmm3",
185+
"%xmm4", "%xmm5", "%xmm6", "%xmm7",
186+
"%xmm8", "%xmm9", "%xmm10", "%xmm11",
187+
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
190188
"memory"
191189
);
192190
}

0 commit comments

Comments
 (0)