Skip to content

Commit e6a0a3d

Browse files
authored
Merge pull request #1471 from martin-frbg/p2align
Use .p2align instead of .align for portability on Haswell and Sandybridge
2 parents e3a80e6 + 7c1925a commit e6a0a3d

35 files changed

+63
-63
lines changed

kernel/x86_64/caxpy_microk_haswell-2.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,11 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
5050
"vmulps (%5), %%ymm0 , %%ymm0 \n\t"
5151
#endif
5252

53-
".align 16 \n\t"
53+
".p2align 4 \n\t"
5454
"1: \n\t"
5555

5656
"vmovups (%2,%0,4), %%ymm5 \n\t" // 4 complex values from x
57-
".align 2 \n\t"
57+
".p2align 1 \n\t"
5858
"vmovups 32(%2,%0,4), %%ymm7 \n\t" // 4 complex values from x
5959
"vmovups 64(%2,%0,4), %%ymm9 \n\t" // 4 complex values from x
6060
"vmovups 96(%2,%0,4), %%ymm11 \n\t" // 4 complex values from x
@@ -70,7 +70,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
7070
"vpermilps $0xb1 , %%ymm11, %%ymm10 \n\t" // exchange real and imag part
7171

7272
"vfmadd213ps (%3,%0,4), %%ymm0 , %%ymm5 \n\t"
73-
".align 2 \n\t"
73+
".p2align 1 \n\t"
7474
"vfmadd213ps 32(%3,%0,4), %%ymm0 , %%ymm7 \n\t"
7575
"vfmadd213ps 64(%3,%0,4), %%ymm0 , %%ymm9 \n\t"
7676
"vfmadd213ps 96(%3,%0,4), %%ymm0 , %%ymm11 \n\t"
@@ -96,7 +96,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
9696
"vfmadd231ps %%ymm1 , %%ymm10, %%ymm15 \n\t"
9797

9898
"vmovups %%ymm5 , (%3,%0,4) \n\t"
99-
".align 2 \n\t"
99+
".p2align 1 \n\t"
100100
"vmovups %%ymm7 , 32(%3,%0,4) \n\t"
101101
"vmovups %%ymm9 , 64(%3,%0,4) \n\t"
102102
"vmovups %%ymm11, 96(%3,%0,4) \n\t"

kernel/x86_64/caxpy_microk_sandy-2.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,11 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
5050
"vmulps (%5), %%ymm0 , %%ymm0 \n\t"
5151
#endif
5252

53-
".align 16 \n\t"
53+
".p2align 4 \n\t"
5454
"1: \n\t"
5555

5656
"vmovups (%2,%0,4), %%ymm5 \n\t" // 4 complex values from x
57-
".align 2 \n\t"
57+
".p2align 1 \n\t"
5858
"vmovups 32(%2,%0,4), %%ymm7 \n\t" // 4 complex values from x
5959
"vmovups 64(%2,%0,4), %%ymm9 \n\t" // 4 complex values from x
6060
"vmovups 96(%2,%0,4), %%ymm11 \n\t" // 4 complex values from x
@@ -85,7 +85,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
8585
"vaddps %%ymm10, %%ymm11, %%ymm11 \n\t"
8686

8787
"vmovups %%ymm5 , (%3,%0,4) \n\t"
88-
".align 2 \n\t"
88+
".p2align 1 \n\t"
8989
"vmovups %%ymm7 , 32(%3,%0,4) \n\t"
9090
"vmovups %%ymm9 , 64(%3,%0,4) \n\t"
9191
"vmovups %%ymm11, 96(%3,%0,4) \n\t"

kernel/x86_64/cdot_microk_haswell-2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
4646
"vxorps %%ymm6, %%ymm6, %%ymm6 \n\t"
4747
"vxorps %%ymm7, %%ymm7, %%ymm7 \n\t"
4848

49-
".align 16 \n\t"
49+
".p2align 4 \n\t"
5050
"1: \n\t"
5151
"vmovups (%2,%0,4), %%ymm8 \n\t" // 2 * x
5252
"vmovups 32(%2,%0,4), %%ymm9 \n\t" // 2 * x

kernel/x86_64/cdot_microk_sandy-2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
4646
"vxorps %%ymm6, %%ymm6, %%ymm6 \n\t"
4747
"vxorps %%ymm7, %%ymm7, %%ymm7 \n\t"
4848

49-
".align 16 \n\t"
49+
".p2align 4 \n\t"
5050
"1: \n\t"
5151
"vmovups (%2,%0,4), %%ymm8 \n\t" // 2 * x
5252
"vmovups 32(%2,%0,4), %%ymm9 \n\t" // 2 * x

kernel/x86_64/cscal_microk_haswell-2.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ static void cscal_kernel_16( BLASLONG n, FLOAT *alpha, FLOAT *x)
5454
"subq $16, %0 \n\t"
5555
"jz 2f \n\t"
5656

57-
".align 16 \n\t"
57+
".p2align 4 \n\t"
5858
"1: \n\t"
5959

6060
//"prefetcht0 128(%1) \n\t"
@@ -156,7 +156,7 @@ static void cscal_kernel_16_zero_r( BLASLONG n, FLOAT *alpha, FLOAT *x)
156156
"subq $16, %0 \n\t"
157157
"jz 2f \n\t"
158158

159-
".align 16 \n\t"
159+
".p2align 4 \n\t"
160160
"1: \n\t"
161161

162162
//"prefetcht0 128(%1) \n\t"
@@ -245,7 +245,7 @@ static void cscal_kernel_16_zero_i( BLASLONG n, FLOAT *alpha, FLOAT *x)
245245
"subq $16, %0 \n\t"
246246
"jz 2f \n\t"
247247

248-
".align 16 \n\t"
248+
".p2align 4 \n\t"
249249
"1: \n\t"
250250

251251
//"prefetcht0 128(%1) \n\t"
@@ -312,7 +312,7 @@ static void cscal_kernel_16_zero( BLASLONG n, FLOAT *alpha, FLOAT *x)
312312

313313
"addq $128, %1 \n\t"
314314

315-
".align 16 \n\t"
315+
".p2align 4 \n\t"
316316
"1: \n\t"
317317

318318
//"prefetcht0 128(%1) \n\t"

kernel/x86_64/daxpy_microk_haswell-2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
3838
(
3939
"vbroadcastsd (%4), %%ymm0 \n\t" // alpha
4040

41-
".align 16 \n\t"
41+
".p2align 4 \n\t"
4242
"1: \n\t"
4343

4444
"vmovups (%3,%0,8), %%ymm12 \n\t" // 4 * y

kernel/x86_64/daxpy_microk_sandy-2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
5050
"subq $16, %1 \n\t"
5151
"jz 2f \n\t"
5252

53-
".align 16 \n\t"
53+
".p2align 4 \n\t"
5454
"1: \n\t"
5555

5656
"vmulpd %%ymm4, %%ymm0, %%ymm4 \n\t"

kernel/x86_64/ddot_microk_haswell-2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
4141
"vxorpd %%ymm6, %%ymm6, %%ymm6 \n\t"
4242
"vxorpd %%ymm7, %%ymm7, %%ymm7 \n\t"
4343

44-
".align 16 \n\t"
44+
".p2align 4 \n\t"
4545
"1: \n\t"
4646
"vmovups (%2,%0,8), %%ymm12 \n\t" // 2 * x
4747
"vmovups 32(%2,%0,8), %%ymm13 \n\t" // 2 * x

kernel/x86_64/ddot_microk_sandy-2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
4141
"vxorpd %%ymm6, %%ymm6, %%ymm6 \n\t"
4242
"vxorpd %%ymm7, %%ymm7, %%ymm7 \n\t"
4343

44-
".align 16 \n\t"
44+
".p2align 4 \n\t"
4545
"1: \n\t"
4646
"vmovups (%2,%0,8), %%ymm12 \n\t" // 2 * x
4747
"vmovups 32(%2,%0,8), %%ymm13 \n\t" // 2 * x

kernel/x86_64/dger_microk_sandy-2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ static void dger_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
5353
"subq $8, %1 \n\t"
5454
"jz 2f \n\t"
5555

56-
".align 8 \n\t"
56+
".p2align 3 \n\t"
5757
"1: \n\t"
5858

5959
"vmulpd %%xmm4, %%xmm0, %%xmm4 \n\t"

0 commit comments

Comments
 (0)