Skip to content

Commit 8ae2a18

Browse files
authored
[X86] Use proxy scheduler models for bdver3/bdver4 cpus (#114873)
We don't have specific models for bdver3/bdver4 cpus but we can use the bdver2/znver1 models as proxy standins - these days the models are more useful for analysis than for perfect instruction scheduling so these should be fine. While they don't accurately represent the bdver3/bdver4 architecture (specifically the different fp-pipe layout), they give more accurate latency/throughputs (vs Agner) than the default SandyBridge model, and enable PostRA scheduling which all recent AMD models have benefitted from. I had to use the znver1 model for bdver4 so that we have AVX2 instruction coverage (none of the TBM/XOP/LWP/FMA4 instructions have explicit schedules so this shouldn't be a problem) - they both double-pump 256-bit instructions so this works pretty well. This patch is based off a discussion at the devmtg regarding how easily we can provide an actual scheduler model (or at least approximation) to more of the X86 cpu targets - we can then add specific models if the (unlikely) need arises.
1 parent 1878b94 commit 8ae2a18

File tree

3 files changed

+39
-104
lines changed

3 files changed

+39
-104
lines changed

llvm/lib/Target/X86/X86.td

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1912,11 +1912,13 @@ def : ProcModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features,
19121912
def : ProcModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features,
19131913
ProcessorFeatures.BdVer2Tuning>;
19141914
// Steamroller
1915-
def : Proc<"bdver3", ProcessorFeatures.BdVer3Features,
1916-
ProcessorFeatures.BdVer3Tuning>;
1915+
// NOTE: BdVer2Model is only an approx model for Steamroller.
1916+
def : ProcModel<"bdver3", BdVer2Model, ProcessorFeatures.BdVer3Features,
1917+
ProcessorFeatures.BdVer3Tuning>;
19171918
// Excavator
1918-
def : Proc<"bdver4", ProcessorFeatures.BdVer4Features,
1919-
ProcessorFeatures.BdVer4Tuning>;
1919+
// NOTE: Znver1Model is only an approx model for Excavator (with AVX2).
1920+
def : ProcModel<"bdver4", Znver1Model, ProcessorFeatures.BdVer4Features,
1921+
ProcessorFeatures.BdVer4Tuning>;
19201922

19211923
def : ProcModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures,
19221924
ProcessorFeatures.ZNTuning>;

llvm/test/CodeGen/X86/lwp-intrinsics.ll

Lines changed: 19 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=i686-unknown -mattr=+lwp | FileCheck %s --check-prefixes=X86,X86_LWP
3-
; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver1 | FileCheck %s --check-prefixes=X86,X86_BDVER1
4-
; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver2 | FileCheck %s --check-prefixes=X86,X86_BDVER2
5-
; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver3 | FileCheck %s --check-prefixes=X86,X86_BDVER3
6-
; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver4 | FileCheck %s --check-prefixes=X86,X86_BDVER4
3+
; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver1 | FileCheck %s --check-prefixes=X86,X86_BDVER
4+
; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver2 | FileCheck %s --check-prefixes=X86,X86_BDVER
5+
; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver3 | FileCheck %s --check-prefixes=X86,X86_BDVER
6+
; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver4 | FileCheck %s --check-prefixes=X86,X86_BDVER
77
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+lwp | FileCheck %s --check-prefix=X64
88
; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver1 | FileCheck %s --check-prefix=X64
99
; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver2 | FileCheck %s --check-prefix=X64
@@ -49,41 +49,14 @@ define i8 @test_lwpins32_rri(i32 %a0, i32 %a1) nounwind {
4949
; X86_LWP-NEXT: setb %al
5050
; X86_LWP-NEXT: retl
5151
;
52-
; X86_BDVER1-LABEL: test_lwpins32_rri:
53-
; X86_BDVER1: # %bb.0:
54-
; X86_BDVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx
55-
; X86_BDVER1-NEXT: movl {{[0-9]+}}(%esp), %eax
56-
; X86_BDVER1-NEXT: addl %ecx, %ecx
57-
; X86_BDVER1-NEXT: lwpins $-1985229329, %ecx, %eax # imm = 0x89ABCDEF
58-
; X86_BDVER1-NEXT: setb %al
59-
; X86_BDVER1-NEXT: retl
60-
;
61-
; X86_BDVER2-LABEL: test_lwpins32_rri:
62-
; X86_BDVER2: # %bb.0:
63-
; X86_BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx
64-
; X86_BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax
65-
; X86_BDVER2-NEXT: addl %ecx, %ecx
66-
; X86_BDVER2-NEXT: lwpins $-1985229329, %ecx, %eax # imm = 0x89ABCDEF
67-
; X86_BDVER2-NEXT: setb %al
68-
; X86_BDVER2-NEXT: retl
69-
;
70-
; X86_BDVER3-LABEL: test_lwpins32_rri:
71-
; X86_BDVER3: # %bb.0:
72-
; X86_BDVER3-NEXT: movl {{[0-9]+}}(%esp), %eax
73-
; X86_BDVER3-NEXT: movl {{[0-9]+}}(%esp), %ecx
74-
; X86_BDVER3-NEXT: addl %ecx, %ecx
75-
; X86_BDVER3-NEXT: lwpins $-1985229329, %ecx, %eax # imm = 0x89ABCDEF
76-
; X86_BDVER3-NEXT: setb %al
77-
; X86_BDVER3-NEXT: retl
78-
;
79-
; X86_BDVER4-LABEL: test_lwpins32_rri:
80-
; X86_BDVER4: # %bb.0:
81-
; X86_BDVER4-NEXT: movl {{[0-9]+}}(%esp), %eax
82-
; X86_BDVER4-NEXT: movl {{[0-9]+}}(%esp), %ecx
83-
; X86_BDVER4-NEXT: addl %ecx, %ecx
84-
; X86_BDVER4-NEXT: lwpins $-1985229329, %ecx, %eax # imm = 0x89ABCDEF
85-
; X86_BDVER4-NEXT: setb %al
86-
; X86_BDVER4-NEXT: retl
52+
; X86_BDVER-LABEL: test_lwpins32_rri:
53+
; X86_BDVER: # %bb.0:
54+
; X86_BDVER-NEXT: movl {{[0-9]+}}(%esp), %ecx
55+
; X86_BDVER-NEXT: movl {{[0-9]+}}(%esp), %eax
56+
; X86_BDVER-NEXT: addl %ecx, %ecx
57+
; X86_BDVER-NEXT: lwpins $-1985229329, %ecx, %eax # imm = 0x89ABCDEF
58+
; X86_BDVER-NEXT: setb %al
59+
; X86_BDVER-NEXT: retl
8760
;
8861
; X64-LABEL: test_lwpins32_rri:
8962
; X64: # %bb.0:
@@ -124,37 +97,13 @@ define void @test_lwpval32_rri(i32 %a0, i32 %a1) nounwind {
12497
; X86_LWP-NEXT: lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98
12598
; X86_LWP-NEXT: retl
12699
;
127-
; X86_BDVER1-LABEL: test_lwpval32_rri:
128-
; X86_BDVER1: # %bb.0:
129-
; X86_BDVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx
130-
; X86_BDVER1-NEXT: movl {{[0-9]+}}(%esp), %eax
131-
; X86_BDVER1-NEXT: addl %ecx, %ecx
132-
; X86_BDVER1-NEXT: lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98
133-
; X86_BDVER1-NEXT: retl
134-
;
135-
; X86_BDVER2-LABEL: test_lwpval32_rri:
136-
; X86_BDVER2: # %bb.0:
137-
; X86_BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx
138-
; X86_BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax
139-
; X86_BDVER2-NEXT: addl %ecx, %ecx
140-
; X86_BDVER2-NEXT: lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98
141-
; X86_BDVER2-NEXT: retl
142-
;
143-
; X86_BDVER3-LABEL: test_lwpval32_rri:
144-
; X86_BDVER3: # %bb.0:
145-
; X86_BDVER3-NEXT: movl {{[0-9]+}}(%esp), %eax
146-
; X86_BDVER3-NEXT: movl {{[0-9]+}}(%esp), %ecx
147-
; X86_BDVER3-NEXT: addl %ecx, %ecx
148-
; X86_BDVER3-NEXT: lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98
149-
; X86_BDVER3-NEXT: retl
150-
;
151-
; X86_BDVER4-LABEL: test_lwpval32_rri:
152-
; X86_BDVER4: # %bb.0:
153-
; X86_BDVER4-NEXT: movl {{[0-9]+}}(%esp), %eax
154-
; X86_BDVER4-NEXT: movl {{[0-9]+}}(%esp), %ecx
155-
; X86_BDVER4-NEXT: addl %ecx, %ecx
156-
; X86_BDVER4-NEXT: lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98
157-
; X86_BDVER4-NEXT: retl
100+
; X86_BDVER-LABEL: test_lwpval32_rri:
101+
; X86_BDVER: # %bb.0:
102+
; X86_BDVER-NEXT: movl {{[0-9]+}}(%esp), %ecx
103+
; X86_BDVER-NEXT: movl {{[0-9]+}}(%esp), %eax
104+
; X86_BDVER-NEXT: addl %ecx, %ecx
105+
; X86_BDVER-NEXT: lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98
106+
; X86_BDVER-NEXT: retl
158107
;
159108
; X64-LABEL: test_lwpval32_rri:
160109
; X64: # %bb.0:

llvm/test/CodeGen/X86/rotate_vec.ll

Lines changed: 14 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -162,21 +162,13 @@ define <4 x i32> @rot_v4i32_mask_ashr1(<4 x i32> %a0) {
162162
}
163163

164164
define <8 x i16> @or_fshl_v8i16(<8 x i16> %x, <8 x i16> %y) {
165-
; XOPAVX1-LABEL: or_fshl_v8i16:
166-
; XOPAVX1: # %bb.0:
167-
; XOPAVX1-NEXT: vpor %xmm0, %xmm1, %xmm1
168-
; XOPAVX1-NEXT: vpsrlw $11, %xmm0, %xmm0
169-
; XOPAVX1-NEXT: vpsllw $5, %xmm1, %xmm1
170-
; XOPAVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
171-
; XOPAVX1-NEXT: retq
172-
;
173-
; XOPAVX2-LABEL: or_fshl_v8i16:
174-
; XOPAVX2: # %bb.0:
175-
; XOPAVX2-NEXT: vpor %xmm0, %xmm1, %xmm1
176-
; XOPAVX2-NEXT: vpsllw $5, %xmm1, %xmm1
177-
; XOPAVX2-NEXT: vpsrlw $11, %xmm0, %xmm0
178-
; XOPAVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
179-
; XOPAVX2-NEXT: retq
165+
; XOP-LABEL: or_fshl_v8i16:
166+
; XOP: # %bb.0:
167+
; XOP-NEXT: vpor %xmm0, %xmm1, %xmm1
168+
; XOP-NEXT: vpsrlw $11, %xmm0, %xmm0
169+
; XOP-NEXT: vpsllw $5, %xmm1, %xmm1
170+
; XOP-NEXT: vpor %xmm0, %xmm1, %xmm0
171+
; XOP-NEXT: retq
180172
;
181173
; AVX512-LABEL: or_fshl_v8i16:
182174
; AVX512: # %bb.0:
@@ -193,21 +185,13 @@ define <8 x i16> @or_fshl_v8i16(<8 x i16> %x, <8 x i16> %y) {
193185
}
194186

195187
define <4 x i32> @or_fshl_v4i32(<4 x i32> %x, <4 x i32> %y) {
196-
; XOPAVX1-LABEL: or_fshl_v4i32:
197-
; XOPAVX1: # %bb.0:
198-
; XOPAVX1-NEXT: vpor %xmm0, %xmm1, %xmm1
199-
; XOPAVX1-NEXT: vpsrld $11, %xmm0, %xmm0
200-
; XOPAVX1-NEXT: vpslld $21, %xmm1, %xmm1
201-
; XOPAVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
202-
; XOPAVX1-NEXT: retq
203-
;
204-
; XOPAVX2-LABEL: or_fshl_v4i32:
205-
; XOPAVX2: # %bb.0:
206-
; XOPAVX2-NEXT: vpor %xmm0, %xmm1, %xmm1
207-
; XOPAVX2-NEXT: vpslld $21, %xmm1, %xmm1
208-
; XOPAVX2-NEXT: vpsrld $11, %xmm0, %xmm0
209-
; XOPAVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
210-
; XOPAVX2-NEXT: retq
188+
; XOP-LABEL: or_fshl_v4i32:
189+
; XOP: # %bb.0:
190+
; XOP-NEXT: vpor %xmm0, %xmm1, %xmm1
191+
; XOP-NEXT: vpsrld $11, %xmm0, %xmm0
192+
; XOP-NEXT: vpslld $21, %xmm1, %xmm1
193+
; XOP-NEXT: vpor %xmm0, %xmm1, %xmm0
194+
; XOP-NEXT: retq
211195
;
212196
; AVX512-LABEL: or_fshl_v4i32:
213197
; AVX512: # %bb.0:

0 commit comments

Comments
 (0)