Skip to content

Commit f6f2929

Browse files
committed
[X86] Fix HSW/BDW masked store schedules
Vector masked stores don't use Port5 or Port 7. Confirmed by augner/uops.info
1 parent 0cb5846 commit f6f2929

File tree

6 files changed

+36
-36
lines changed

6 files changed

+36
-36
lines changed

llvm/lib/Target/X86/X86SchedBroadwell.td

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -247,10 +247,10 @@ defm : X86WriteRes<WriteFStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
247247
defm : X86WriteRes<WriteFStoreNTX, [BWPort237,BWPort4], 1, [1,1], 2>;
248248
defm : X86WriteRes<WriteFStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
249249

250-
defm : X86WriteRes<WriteFMaskedStore32, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
251-
defm : X86WriteRes<WriteFMaskedStore32Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
252-
defm : X86WriteRes<WriteFMaskedStore64, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
253-
defm : X86WriteRes<WriteFMaskedStore64Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
250+
defm : X86WriteRes<WriteFMaskedStore32, [BWPort0,BWPort4,BWPort23,BWPort1], 5, [1,1,1,1], 4>;
251+
defm : X86WriteRes<WriteFMaskedStore32Y, [BWPort0,BWPort4,BWPort23,BWPort1], 5, [1,1,1,1], 4>;
252+
defm : X86WriteRes<WriteFMaskedStore64, [BWPort0,BWPort4,BWPort23,BWPort1], 5, [1,1,1,1], 4>;
253+
defm : X86WriteRes<WriteFMaskedStore64Y, [BWPort0,BWPort4,BWPort23,BWPort1], 5, [1,1,1,1], 4>;
254254

255255
defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>;
256256
defm : X86WriteRes<WriteFMoveX, [BWPort5], 1, [1], 1>;
@@ -420,10 +420,10 @@ defm : X86WriteRes<WriteVecStoreX, [BWPort237,BWPort4], 1, [1,1], 2>;
420420
defm : X86WriteRes<WriteVecStoreY, [BWPort237,BWPort4], 1, [1,1], 2>;
421421
defm : X86WriteRes<WriteVecStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
422422
defm : X86WriteRes<WriteVecStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
423-
defm : X86WriteRes<WriteVecMaskedStore32, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
424-
defm : X86WriteRes<WriteVecMaskedStore32Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
425-
defm : X86WriteRes<WriteVecMaskedStore64, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
426-
defm : X86WriteRes<WriteVecMaskedStore64Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
423+
defm : X86WriteRes<WriteVecMaskedStore32, [BWPort0,BWPort4,BWPort23,BWPort1], 5, [1,1,1,1], 4>;
424+
defm : X86WriteRes<WriteVecMaskedStore32Y, [BWPort0,BWPort4,BWPort23,BWPort1], 5, [1,1,1,1], 4>;
425+
defm : X86WriteRes<WriteVecMaskedStore64, [BWPort0,BWPort4,BWPort23,BWPort1], 5, [1,1,1,1], 4>;
426+
defm : X86WriteRes<WriteVecMaskedStore64Y, [BWPort0,BWPort4,BWPort23,BWPort1], 5, [1,1,1,1], 4>;
427427
defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>;
428428
defm : X86WriteRes<WriteVecMoveX, [BWPort015], 1, [1], 1>;
429429
defm : X86WriteRes<WriteVecMoveY, [BWPort015], 1, [1], 1>;

llvm/lib/Target/X86/X86SchedHaswell.td

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -249,10 +249,10 @@ defm : X86WriteRes<WriteFStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
249249
defm : X86WriteRes<WriteFStoreNTX, [HWPort237,HWPort4], 1, [1,1], 2>;
250250
defm : X86WriteRes<WriteFStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
251251

252-
defm : X86WriteRes<WriteFMaskedStore32, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
253-
defm : X86WriteRes<WriteFMaskedStore32Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
254-
defm : X86WriteRes<WriteFMaskedStore64, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
255-
defm : X86WriteRes<WriteFMaskedStore64Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
252+
defm : X86WriteRes<WriteFMaskedStore32, [HWPort0,HWPort4,HWPort23,HWPort1], 5, [1,1,1,1], 4>;
253+
defm : X86WriteRes<WriteFMaskedStore32Y, [HWPort0,HWPort4,HWPort23,HWPort1], 5, [1,1,1,1], 4>;
254+
defm : X86WriteRes<WriteFMaskedStore64, [HWPort0,HWPort4,HWPort23,HWPort1], 5, [1,1,1,1], 4>;
255+
defm : X86WriteRes<WriteFMaskedStore64Y, [HWPort0,HWPort4,HWPort23,HWPort1], 5, [1,1,1,1], 4>;
256256

257257
defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>;
258258
defm : X86WriteRes<WriteFMoveX, [HWPort5], 1, [1], 1>;
@@ -420,10 +420,10 @@ defm : X86WriteRes<WriteVecStoreX, [HWPort237,HWPort4], 1, [1,1], 2>;
420420
defm : X86WriteRes<WriteVecStoreY, [HWPort237,HWPort4], 1, [1,1], 2>;
421421
defm : X86WriteRes<WriteVecStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
422422
defm : X86WriteRes<WriteVecStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
423-
defm : X86WriteRes<WriteVecMaskedStore32, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
424-
defm : X86WriteRes<WriteVecMaskedStore32Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
425-
defm : X86WriteRes<WriteVecMaskedStore64, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
426-
defm : X86WriteRes<WriteVecMaskedStore64Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
423+
defm : X86WriteRes<WriteVecMaskedStore32, [HWPort0,HWPort4,HWPort23,HWPort1], 5, [1,1,1,1], 4>;
424+
defm : X86WriteRes<WriteVecMaskedStore32Y, [HWPort0,HWPort4,HWPort23,HWPort1], 5, [1,1,1,1], 4>;
425+
defm : X86WriteRes<WriteVecMaskedStore64, [HWPort0,HWPort4,HWPort23,HWPort1], 5, [1,1,1,1], 4>;
426+
defm : X86WriteRes<WriteVecMaskedStore64Y, [HWPort0,HWPort4,HWPort23,HWPort1], 5, [1,1,1,1], 4>;
427427
defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>;
428428
defm : X86WriteRes<WriteVecMoveX, [HWPort015], 1, [1], 1>;
429429
defm : X86WriteRes<WriteVecMoveY, [HWPort015], 1, [1], 1>;

llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1736,7 +1736,7 @@ vzeroupper
17361736

17371737
# CHECK: Resource pressure per iteration:
17381738
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
1739-
# CHECK-NEXT: - 257.00 216.25 247.25 173.17 173.17 38.00 421.25 3.25 12.67
1739+
# CHECK-NEXT: - 257.00 216.25 249.25 173.83 173.83 38.00 419.25 3.25 11.33
17401740

17411741
# CHECK: Resource pressure by instruction:
17421742
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1932,12 +1932,12 @@ vzeroupper
19321932
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmaskmovdqu %xmm0, %xmm1
19331933
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmaskmovpd (%rax), %xmm0, %xmm2
19341934
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmaskmovpd (%rax), %ymm0, %ymm2
1935-
# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vmaskmovpd %xmm0, %xmm1, (%rax)
1936-
# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vmaskmovpd %ymm0, %ymm1, (%rax)
1935+
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vmaskmovpd %xmm0, %xmm1, (%rax)
1936+
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vmaskmovpd %ymm0, %ymm1, (%rax)
19371937
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmaskmovps (%rax), %xmm0, %xmm2
19381938
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmaskmovps (%rax), %ymm0, %ymm2
1939-
# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vmaskmovps %xmm0, %xmm1, (%rax)
1940-
# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vmaskmovps %ymm0, %ymm1, (%rax)
1939+
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vmaskmovps %xmm0, %xmm1, (%rax)
1940+
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vmaskmovps %ymm0, %ymm1, (%rax)
19411941
# CHECK-NEXT: - - - 1.00 - - - - - - vmaxpd %xmm0, %xmm1, %xmm2
19421942
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vmaxpd (%rax), %xmm1, %xmm2
19431943
# CHECK-NEXT: - - - 1.00 - - - - - - vmaxpd %ymm0, %ymm1, %ymm2

llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx2.s

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,7 @@ vpxor (%rax), %ymm1, %ymm2
776776

777777
# CHECK: Resource pressure per iteration:
778778
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
779-
# CHECK-NEXT: - - 96.67 60.67 99.67 99.67 21.00 266.67 4.00 1.67
779+
# CHECK-NEXT: - - 96.67 62.67 100.33 100.33 21.00 264.67 4.00 0.33
780780

781781
# CHECK: Resource pressure by instruction:
782782
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -914,12 +914,12 @@ vpxor (%rax), %ymm1, %ymm2
914914
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpmaddwd (%rax), %ymm1, %ymm2
915915
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpmaskmovd (%rax), %xmm0, %xmm2
916916
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpmaskmovd (%rax), %ymm0, %ymm2
917-
# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vpmaskmovd %xmm0, %xmm1, (%rax)
918-
# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vpmaskmovd %ymm0, %ymm1, (%rax)
917+
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vpmaskmovd %xmm0, %xmm1, (%rax)
918+
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vpmaskmovd %ymm0, %ymm1, (%rax)
919919
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpmaskmovq (%rax), %xmm0, %xmm2
920920
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpmaskmovq (%rax), %ymm0, %ymm2
921-
# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vpmaskmovq %xmm0, %xmm1, (%rax)
922-
# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vpmaskmovq %ymm0, %ymm1, (%rax)
921+
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vpmaskmovq %xmm0, %xmm1, (%rax)
922+
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vpmaskmovq %ymm0, %ymm1, (%rax)
923923
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - vpmaxsb %ymm0, %ymm1, %ymm2
924924
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - vpmaxsb (%rax), %ymm1, %ymm2
925925
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - vpmaxsd %ymm0, %ymm1, %ymm2

llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1736,7 +1736,7 @@ vzeroupper
17361736

17371737
# CHECK: Resource pressure per iteration:
17381738
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
1739-
# CHECK-NEXT: - 336.00 215.58 248.58 173.17 173.17 38.00 424.58 3.25 12.67
1739+
# CHECK-NEXT: - 336.00 215.58 250.58 173.83 173.83 38.00 422.58 3.25 11.33
17401740

17411741
# CHECK: Resource pressure by instruction:
17421742
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1932,12 +1932,12 @@ vzeroupper
19321932
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmaskmovdqu %xmm0, %xmm1
19331933
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmaskmovpd (%rax), %xmm0, %xmm2
19341934
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmaskmovpd (%rax), %ymm0, %ymm2
1935-
# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vmaskmovpd %xmm0, %xmm1, (%rax)
1936-
# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vmaskmovpd %ymm0, %ymm1, (%rax)
1935+
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vmaskmovpd %xmm0, %xmm1, (%rax)
1936+
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vmaskmovpd %ymm0, %ymm1, (%rax)
19371937
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmaskmovps (%rax), %xmm0, %xmm2
19381938
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmaskmovps (%rax), %ymm0, %ymm2
1939-
# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vmaskmovps %xmm0, %xmm1, (%rax)
1940-
# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vmaskmovps %ymm0, %ymm1, (%rax)
1939+
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vmaskmovps %xmm0, %xmm1, (%rax)
1940+
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vmaskmovps %ymm0, %ymm1, (%rax)
19411941
# CHECK-NEXT: - - - 1.00 - - - - - - vmaxpd %xmm0, %xmm1, %xmm2
19421942
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vmaxpd (%rax), %xmm1, %xmm2
19431943
# CHECK-NEXT: - - - 1.00 - - - - - - vmaxpd %ymm0, %ymm1, %ymm2

llvm/test/tools/llvm-mca/X86/Haswell/resources-avx2.s

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,7 @@ vpxor (%rax), %ymm1, %ymm2
776776

777777
# CHECK: Resource pressure per iteration:
778778
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
779-
# CHECK-NEXT: - - 206.67 90.67 99.67 99.67 5.00 284.67 30.00 1.67
779+
# CHECK-NEXT: - - 206.67 92.67 100.33 100.33 5.00 282.67 30.00 0.33
780780

781781
# CHECK: Resource pressure by instruction:
782782
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -914,12 +914,12 @@ vpxor (%rax), %ymm1, %ymm2
914914
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpmaddwd (%rax), %ymm1, %ymm2
915915
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpmaskmovd (%rax), %xmm0, %xmm2
916916
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpmaskmovd (%rax), %ymm0, %ymm2
917-
# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vpmaskmovd %xmm0, %xmm1, (%rax)
918-
# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vpmaskmovd %ymm0, %ymm1, (%rax)
917+
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vpmaskmovd %xmm0, %xmm1, (%rax)
918+
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vpmaskmovd %ymm0, %ymm1, (%rax)
919919
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpmaskmovq (%rax), %xmm0, %xmm2
920920
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpmaskmovq (%rax), %ymm0, %ymm2
921-
# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vpmaskmovq %xmm0, %xmm1, (%rax)
922-
# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vpmaskmovq %ymm0, %ymm1, (%rax)
921+
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vpmaskmovq %xmm0, %xmm1, (%rax)
922+
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vpmaskmovq %ymm0, %ymm1, (%rax)
923923
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - vpmaxsb %ymm0, %ymm1, %ymm2
924924
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - vpmaxsb (%rax), %ymm1, %ymm2
925925
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - vpmaxsd %ymm0, %ymm1, %ymm2

0 commit comments

Comments
 (0)