We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 2359c7c commit 7c1925aCopy full SHA for 7c1925a
kernel/x86_64/caxpy_microk_sandy-2.c
@@ -50,11 +50,11 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
50
"vmulps (%5), %%ymm0 , %%ymm0 \n\t"
51
#endif
52
53
- ".align 16 \n\t"
+ ".p2align 4 \n\t"
54
"1: \n\t"
55
56
"vmovups (%2,%0,4), %%ymm5 \n\t" // 4 complex values from x
57
- ".align 2 \n\t"
+ ".p2align 1 \n\t"
58
"vmovups 32(%2,%0,4), %%ymm7 \n\t" // 4 complex values from x
59
"vmovups 64(%2,%0,4), %%ymm9 \n\t" // 4 complex values from x
60
"vmovups 96(%2,%0,4), %%ymm11 \n\t" // 4 complex values from x
@@ -85,7 +85,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
85
"vaddps %%ymm10, %%ymm11, %%ymm11 \n\t"
86
87
"vmovups %%ymm5 , (%3,%0,4) \n\t"
88
89
"vmovups %%ymm7 , 32(%3,%0,4) \n\t"
90
"vmovups %%ymm9 , 64(%3,%0,4) \n\t"
91
"vmovups %%ymm11, 96(%3,%0,4) \n\t"
kernel/x86_64/cdot_microk_sandy-2.c
@@ -46,7 +46,7 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
46
"vxorps %%ymm6, %%ymm6, %%ymm6 \n\t"
47
"vxorps %%ymm7, %%ymm7, %%ymm7 \n\t"
48
49
"vmovups (%2,%0,4), %%ymm8 \n\t" // 2 * x
"vmovups 32(%2,%0,4), %%ymm9 \n\t" // 2 * x
kernel/x86_64/daxpy_microk_sandy-2.c
@@ -50,7 +50,7 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"subq $16, %1 \n\t"
"jz 2f \n\t"
"vmulpd %%ymm4, %%ymm0, %%ymm4 \n\t"
kernel/x86_64/ddot_microk_sandy-2.c
@@ -41,7 +41,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
41
"vxorpd %%ymm6, %%ymm6, %%ymm6 \n\t"
42
"vxorpd %%ymm7, %%ymm7, %%ymm7 \n\t"
43
44
45
"vmovups (%2,%0,8), %%ymm12 \n\t" // 2 * x
"vmovups 32(%2,%0,8), %%ymm13 \n\t" // 2 * x
kernel/x86_64/dger_microk_sandy-2.c
@@ -53,7 +53,7 @@ static void dger_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"subq $8, %1 \n\t"
- ".align 8 \n\t"
+ ".p2align 3 \n\t"
"vmulpd %%xmm4, %%xmm0, %%xmm4 \n\t"
kernel/x86_64/dscal_microk_sandy-2.c
@@ -58,7 +58,7 @@ static void dscal_kernel_8( BLASLONG n, FLOAT *alpha, FLOAT *x)
"subq $1 , %0 \n\t"
61
62
63
"prefetcht0 640(%1) \n\t"
64
@@ -156,7 +156,7 @@ static void dscal_kernel_8_zero( BLASLONG n, FLOAT *alpha, FLOAT *x)
156
"cmpq $0, %0 \n\t"
157
"je 2f \n\t"
158
159
160
161
162
"vmovups %%xmm0 ,-128(%1) \n\t"
kernel/x86_64/dsymv_L_microk_sandy-2.c
@@ -44,7 +44,7 @@ static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
"vbroadcastsd 16(%8), %%ymm6 \n\t" // temp1[1]
"vbroadcastsd 24(%8), %%ymm7 \n\t" // temp1[1]
"vmovups (%3,%0,8), %%ymm9 \n\t" // 2 * y
kernel/x86_64/dsymv_U_microk_sandy-2.c
@@ -46,7 +46,7 @@ static void dsymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT
"xorq %0,%0 \n\t"
kernel/x86_64/saxpy_microk_sandy-2.c
@@ -50,7 +50,7 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"subq $32, %1 \n\t"
"vmulps %%ymm4, %%ymm0, %%ymm4 \n\t"
kernel/x86_64/sdot_microk_sandy-2.c
@@ -41,7 +41,7 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vmovups (%2,%0,4), %%ymm12 \n\t" // 2 * x
"vmovups 32(%2,%0,4), %%ymm13 \n\t" // 2 * x
0 commit comments