Skip to content

Commit c6d3b51

Browse files
authored
[DAGCombiner] Remove most NoSignedZerosFPMath uses (#161180)
Remained two uses are related to fneg and foldFPToIntToFP, some AMDGPU tests are duplicated and regenerated.
1 parent aa42b64 commit c6d3b51

File tree

10 files changed

+2310
-1946
lines changed

10 files changed

+2310
-1946
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11849,9 +11849,7 @@ static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
1184911849
if (!VT.isFloatingPoint())
1185011850
return false;
1185111851

11852-
const TargetOptions &Options = DAG.getTarget().Options;
11853-
11854-
return (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) &&
11852+
return Flags.hasNoSignedZeros() &&
1185511853
TLI.isProfitableToCombineMinNumMaxNum(VT) &&
1185611854
(Flags.hasNoNaNs() ||
1185711855
(DAG.isKnownNeverNaN(RHS) && DAG.isKnownNeverNaN(LHS)));
@@ -17351,7 +17349,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
1735117349
// Always prefer FMAD to FMA for precision.
1735217350
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
1735317351
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
17354-
bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
17352+
bool NoSignedZero = Flags.hasNoSignedZeros();
1735517353

1735617354
// Is the node an FMUL and contractable either due to global flags or
1735717355
// SDNodeFlags.
@@ -18327,11 +18325,9 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
1832718325
return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
1832818326
}
1832918327

18330-
// FIXME: use fast math flags instead of Options.UnsafeFPMath
18331-
// TODO: Finally migrate away from global TargetOptions.
1833218328
if ((Options.NoNaNsFPMath && Options.NoInfsFPMath) ||
1833318329
(N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs())) {
18334-
if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros() ||
18330+
if (N->getFlags().hasNoSignedZeros() ||
1833518331
(N2CFP && !N2CFP->isExactlyValue(-0.0))) {
1833618332
if (N0CFP && N0CFP->isZero())
1833718333
return N2;
@@ -18636,8 +18632,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
1863618632
}
1863718633

1863818634
// Fold X/Sqrt(X) -> Sqrt(X)
18639-
if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
18640-
Flags.hasAllowReassociation())
18635+
if (Flags.hasNoSignedZeros() && Flags.hasAllowReassociation())
1864118636
if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
1864218637
return N1;
1864318638

llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll

Lines changed: 723 additions & 698 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/fmax_legacy.ll

Lines changed: 179 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
; RUN: llc -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=SI-SAFE,GCN,FUNC %s
2-
; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=GCN-NONAN,GCN,FUNC %s
1+
; RUN: llc -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=SI,GCN,FUNC %s
32

4-
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=VI-SAFE,GCN,FUNC %s
5-
; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=GCN-NONAN,GCN,FUNC %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=VI,GCN,FUNC %s
64

75
; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope --check-prefixes=EG,FUNC %s
86

@@ -12,12 +10,10 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
1210
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
1311
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
1412

15-
; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
13+
; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
1614

17-
; VI-SAFE: v_cmp_nlt_f32_e32 vcc, [[A]], [[B]]
18-
; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
19-
20-
; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
15+
; VI: v_cmp_nlt_f32_e32 vcc, [[A]], [[B]]
16+
; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
2117

2218
; EG: MAX
2319
define amdgpu_kernel void @test_fmax_legacy_uge_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
@@ -34,18 +30,38 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32(ptr addrspace(1) %out, ptr a
3430
ret void
3531
}
3632

33+
; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32_fast:
34+
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
35+
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
36+
37+
; GCN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
38+
39+
; EG: MAX
40+
define amdgpu_kernel void @test_fmax_legacy_uge_f32_fast(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
41+
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
42+
%gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
43+
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
44+
45+
%a = load volatile float, ptr addrspace(1) %gep.0, align 4
46+
%b = load volatile float, ptr addrspace(1) %gep.1, align 4
47+
48+
%cmp = fcmp uge float %a, %b
49+
%val = select nnan nsz i1 %cmp, float %a, float %b
50+
store float %val, ptr addrspace(1) %out, align 4
51+
ret void
52+
}
53+
3754
; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32_nnan_src:
3855
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
3956
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
4057
; GCN-DAG: v_add_f32_e32 [[ADD_A:v[0-9]+]], 1.0, [[A]]
4158
; GCN-DAG: v_add_f32_e32 [[ADD_B:v[0-9]+]], 2.0, [[B]]
4259

43-
; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]]
60+
; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]]
4461

45-
; VI-SAFE: v_cmp_nlt_f32_e32 vcc, [[ADD_A]], [[ADD_B]]
46-
; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[ADD_B]], [[ADD_A]]
62+
; VI: v_cmp_nlt_f32_e32 vcc, [[ADD_A]], [[ADD_B]]
63+
; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[ADD_B]], [[ADD_A]]
4764

48-
; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]]
4965

5066
; EG: MAX
5167
define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
@@ -64,16 +80,40 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(ptr addrspace(1) %o
6480
ret void
6581
}
6682

83+
; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32_nnan_src_fast:
84+
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
85+
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
86+
; GCN-DAG: v_add_f32_e32 [[ADD_A:v[0-9]+]], 1.0, [[A]]
87+
; GCN-DAG: v_add_f32_e32 [[ADD_B:v[0-9]+]], 2.0, [[B]]
88+
89+
; GCN: v_max_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]]
90+
91+
; EG: MAX
92+
define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src_fast(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
93+
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
94+
%gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
95+
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
96+
97+
%a = load volatile float, ptr addrspace(1) %gep.0, align 4
98+
%b = load volatile float, ptr addrspace(1) %gep.1, align 4
99+
%a.nnan = fadd nnan float %a, 1.0
100+
%b.nnan = fadd nnan float %b, 2.0
101+
102+
%cmp = fcmp uge float %a.nnan, %b.nnan
103+
%val = select nnan nsz i1 %cmp, float %a.nnan, float %b.nnan
104+
store float %val, ptr addrspace(1) %out, align 4
105+
ret void
106+
}
107+
67108
; FUNC-LABEL: {{^}}test_fmax_legacy_oge_f32:
68109
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
69110
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
70111

71-
; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
112+
; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
72113

73-
; VI-SAFE: v_cmp_ge_f32_e32 vcc, [[A]], [[B]]
74-
; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
114+
; VI: v_cmp_ge_f32_e32 vcc, [[A]], [[B]]
115+
; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
75116

76-
; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
77117
; EG: MAX
78118
define amdgpu_kernel void @test_fmax_legacy_oge_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
79119
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
@@ -89,17 +129,35 @@ define amdgpu_kernel void @test_fmax_legacy_oge_f32(ptr addrspace(1) %out, ptr a
89129
ret void
90130
}
91131

92-
; FUNC-LABEL: {{^}}test_fmax_legacy_ugt_f32:
132+
; FUNC-LABEL: {{^}}test_fmax_legacy_oge_f32_fast:
93133
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
94134
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
95135

96-
; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
136+
; GCN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
137+
; EG: MAX
138+
define amdgpu_kernel void @test_fmax_legacy_oge_f32_fast(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
139+
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
140+
%gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
141+
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
142+
143+
%a = load volatile float, ptr addrspace(1) %gep.0, align 4
144+
%b = load volatile float, ptr addrspace(1) %gep.1, align 4
97145

98-
; VI-SAFE: v_cmp_nle_f32_e32 vcc, [[A]], [[B]]
99-
; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
146+
%cmp = fcmp oge float %a, %b
147+
%val = select nnan nsz i1 %cmp, float %a, float %b
148+
store float %val, ptr addrspace(1) %out, align 4
149+
ret void
150+
}
100151

152+
; FUNC-LABEL: {{^}}test_fmax_legacy_ugt_f32:
153+
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
154+
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
155+
156+
; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
157+
158+
; VI: v_cmp_nle_f32_e32 vcc, [[A]], [[B]]
159+
; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
101160

102-
; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
103161
; EG: MAX
104162
define amdgpu_kernel void @test_fmax_legacy_ugt_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
105163
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
@@ -115,16 +173,35 @@ define amdgpu_kernel void @test_fmax_legacy_ugt_f32(ptr addrspace(1) %out, ptr a
115173
ret void
116174
}
117175

176+
; FUNC-LABEL: {{^}}test_fmax_legacy_ugt_f32_fast:
177+
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
178+
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
179+
180+
; GCN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
181+
; EG: MAX
182+
define amdgpu_kernel void @test_fmax_legacy_ugt_f32_fast(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
183+
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
184+
%gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
185+
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
186+
187+
%a = load volatile float, ptr addrspace(1) %gep.0, align 4
188+
%b = load volatile float, ptr addrspace(1) %gep.1, align 4
189+
190+
%cmp = fcmp ugt float %a, %b
191+
%val = select nnan nsz i1 %cmp, float %a, float %b
192+
store float %val, ptr addrspace(1) %out, align 4
193+
ret void
194+
}
195+
118196
; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_f32:
119197
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
120198
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
121199

122-
; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
200+
; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
123201

124-
; VI-SAFE: v_cmp_gt_f32_e32 vcc, [[A]], [[B]]
125-
; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
202+
; VI: v_cmp_gt_f32_e32 vcc, [[A]], [[B]]
203+
; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
126204

127-
; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
128205
; EG: MAX
129206
define amdgpu_kernel void @test_fmax_legacy_ogt_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
130207
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
@@ -140,17 +217,35 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_f32(ptr addrspace(1) %out, ptr a
140217
ret void
141218
}
142219

143-
; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32:
220+
; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_f32_fast:
144221
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
145222
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
146223

147-
; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
224+
; GCN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
225+
; EG: MAX
226+
define amdgpu_kernel void @test_fmax_legacy_ogt_f32_fast(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
227+
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
228+
%gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
229+
%gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
230+
231+
%a = load volatile float, ptr addrspace(1) %gep.0, align 4
232+
%b = load volatile float, ptr addrspace(1) %gep.1, align 4
148233

149-
; VI-SAFE: v_cmp_gt_f32_e32 vcc, [[A]], [[B]]
150-
; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
234+
%cmp = fcmp ogt float %a, %b
235+
%val = select nnan nsz i1 %cmp, float %a, float %b
236+
store float %val, ptr addrspace(1) %out, align 4
237+
ret void
238+
}
151239

240+
; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32:
241+
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
242+
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
243+
244+
; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
245+
246+
; VI: v_cmp_gt_f32_e32 vcc, [[A]], [[B]]
247+
; VI: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]]
152248

153-
; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
154249
; EG: MAX
155250
define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
156251
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
@@ -166,23 +261,39 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(ptr addrspace(1) %out, ptr
166261
ret void
167262
}
168263

264+
; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32_fast:
265+
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
266+
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
267+
268+
; GCN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
269+
; EG: MAX
270+
define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32_fast(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
271+
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
272+
%gep.0 = getelementptr <1 x float>, ptr addrspace(1) %in, i32 %tid
273+
%gep.1 = getelementptr <1 x float>, ptr addrspace(1) %gep.0, i32 1
274+
275+
%a = load volatile <1 x float>, ptr addrspace(1) %gep.0
276+
%b = load volatile <1 x float>, ptr addrspace(1) %gep.1
277+
278+
%cmp = fcmp ogt <1 x float> %a, %b
279+
%val = select nnan nsz <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
280+
store <1 x float> %val, ptr addrspace(1) %out
281+
ret void
282+
}
283+
169284
; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v3f32:
170-
; SI-SAFE: v_max_legacy_f32_e32
171-
; SI-SAFE: v_max_legacy_f32_e32
172-
; SI-SAFE: v_max_legacy_f32_e32
173-
174-
; VI-SAFE: v_cmp_gt_f32_e32
175-
; VI-SAFE: v_cndmask_b32_e32
176-
; VI-SAFE: v_cmp_gt_f32_e32
177-
; VI-SAFE: v_cndmask_b32_e32
178-
; VI-SAFE: v_cmp_gt_f32_e32
179-
; VI-SAFE: v_cndmask_b32_e32
180-
; VI-SAFE-NOT: v_cmp
181-
; VI-SAFE-NOT: v_cndmask
182-
183-
; GCN-NONAN: v_max_f32_e32
184-
; GCN-NONAN: v_max_f32_e32
185-
; GCN-NONAN: v_max_f32_e32
285+
; SI: v_max_legacy_f32_e32
286+
; SI: v_max_legacy_f32_e32
287+
; SI: v_max_legacy_f32_e32
288+
289+
; VI: v_cmp_gt_f32_e32
290+
; VI: v_cndmask_b32_e32
291+
; VI: v_cmp_gt_f32_e32
292+
; VI: v_cndmask_b32_e32
293+
; VI: v_cmp_gt_f32_e32
294+
; VI: v_cndmask_b32_e32
295+
; VI-NOT: v_cmp
296+
; VI-NOT: v_cndmask
186297

187298
; GCN-NOT: v_max
188299
define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
@@ -199,6 +310,27 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(ptr addrspace(1) %out, ptr
199310
ret void
200311
}
201312

313+
; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v3f32_fast:
314+
315+
; GCN: v_max_f32_e32
316+
; GCN: v_max_f32_e32
317+
; GCN: v_max_f32_e32
318+
319+
; GCN-NOT: v_max
320+
define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32_fast(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
321+
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
322+
%gep.0 = getelementptr <3 x float>, ptr addrspace(1) %in, i32 %tid
323+
%gep.1 = getelementptr <3 x float>, ptr addrspace(1) %gep.0, i32 1
324+
325+
%a = load <3 x float>, ptr addrspace(1) %gep.0
326+
%b = load <3 x float>, ptr addrspace(1) %gep.1
327+
328+
%cmp = fcmp ogt <3 x float> %a, %b
329+
%val = select nnan nsz <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
330+
store <3 x float> %val, ptr addrspace(1) %out
331+
ret void
332+
}
333+
202334
; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_f32_multi_use:
203335
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
204336
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]

0 commit comments

Comments
 (0)