Skip to content

Commit 2129ee6

Browse files
arsenmtru
authored andcommitted
AMDGPU: Fix fast f32 exp2
Mirror of the previous log changes, OpenCL conformance doesn't like interpreting afn as ignore denormal handling but was previously hidden by flag dropping. (cherry picked from commit 81b278e)
1 parent 216fdc8 commit 2129ee6

File tree

6 files changed

+220
-61
lines changed

6 files changed

+220
-61
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -994,8 +994,7 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
994994
half). Not implemented for double. Hardware provides
995995
1ULP accuracy for float, and 0.51ULP for half. Float
996996
instruction does not natively support denormal
997-
inputs. Backend will optimize out denormal scaling if
998-
marked with the :ref:`afn <fastmath_afn>` flag.
997+
inputs.
999998

1000999
llvm.amdgcn.wave.reduce.umin Performs an arithmetic unsigned min reduction on the unsigned values
10011000
provided by each lane in the wavefront.

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2751,7 +2751,7 @@ SDValue AMDGPUTargetLowering::lowerFEXP2(SDValue Op, SelectionDAG &DAG) const {
27512751

27522752
assert(VT == MVT::f32);
27532753

2754-
if (allowApproxFunc(DAG, Flags) || !needsDenormHandlingF32(DAG, Src, Flags))
2754+
if (!needsDenormHandlingF32(DAG, Src, Flags))
27552755
return DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Src, Flags);
27562756

27572757
// bool needs_scaling = x < -0x1.f80000p+6f;

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3286,11 +3286,10 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI,
32863286

32873287
assert(Ty == F32);
32883288

3289-
if (allowApproxFunc(B.getMF(), Flags) ||
3290-
!needsDenormHandlingF32(B.getMF(), Src, Flags)) {
3289+
if (!needsDenormHandlingF32(B.getMF(), Src, Flags)) {
32913290
B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst}, false)
3292-
.addUse(Src)
3293-
.setMIFlags(Flags);
3291+
.addUse(Src)
3292+
.setMIFlags(Flags);
32943293
MI.eraseFromParent();
32953294
return true;
32963295
}

llvm/test/CodeGen/AMDGPU/input-mods.ll

Lines changed: 0 additions & 24 deletions
This file was deleted.
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG %s
3+
; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck --check-prefix=CM %s
4+
5+
define amdgpu_ps void @test(<4 x float> inreg %reg0) {
6+
; EG-LABEL: test:
7+
; EG: ; %bb.0:
8+
; EG-NEXT: ALU 8, @4, KC0[], KC1[]
9+
; EG-NEXT: EXPORT T0.X___
10+
; EG-NEXT: CF_END
11+
; EG-NEXT: PAD
12+
; EG-NEXT: ALU clause starting at 4:
13+
; EG-NEXT: SETGT * T0.W, literal.x, -|T0.X|,
14+
; EG-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
15+
; EG-NEXT: CNDE * T1.W, PV.W, 0.0, literal.x,
16+
; EG-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
17+
; EG-NEXT: ADD T1.W, -|T0.X|, PV.W,
18+
; EG-NEXT: CNDE * T0.W, T0.W, 1.0, literal.x,
19+
; EG-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
20+
; EG-NEXT: EXP_IEEE * T0.X, PV.W,
21+
; EG-NEXT: MUL_IEEE * T0.X, PS, T0.W,
22+
;
23+
; CM-LABEL: test:
24+
; CM: ; %bb.0:
25+
; CM-NEXT: ALU 11, @4, KC0[], KC1[]
26+
; CM-NEXT: EXPORT T0.X___
27+
; CM-NEXT: CF_END
28+
; CM-NEXT: PAD
29+
; CM-NEXT: ALU clause starting at 4:
30+
; CM-NEXT: SETGT * T0.W, literal.x, -|T0.X|,
31+
; CM-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
32+
; CM-NEXT: CNDE * T1.W, PV.W, 0.0, literal.x,
33+
; CM-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
34+
; CM-NEXT: CNDE T0.Z, T0.W, 1.0, literal.x,
35+
; CM-NEXT: ADD * T0.W, -|T0.X|, PV.W,
36+
; CM-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
37+
; CM-NEXT: EXP_IEEE T0.X, T0.W,
38+
; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
39+
; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
40+
; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
41+
; CM-NEXT: MUL_IEEE * T0.X, PV.X, T0.Z,
42+
%r0 = extractelement <4 x float> %reg0, i32 0
43+
%r1 = call float @llvm.fabs.f32(float %r0)
44+
%r2 = fsub float -0.000000e+00, %r1
45+
%r3 = call afn float @llvm.exp2.f32(float %r2)
46+
%vec = insertelement <4 x float> undef, float %r3, i32 0
47+
call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
48+
ret void
49+
}
50+
51+
declare float @llvm.exp2.f32(float) readnone
52+
declare float @llvm.fabs.f32(float) readnone
53+
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

llvm/test/CodeGen/AMDGPU/llvm.exp2.ll

Lines changed: 162 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,11 +1168,33 @@ define float @v_exp2_fneg_f32(float %in) {
11681168
}
11691169

11701170
define float @v_exp2_f32_fast(float %in) {
1171-
; GCN-LABEL: v_exp2_f32_fast:
1172-
; GCN: ; %bb.0:
1173-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1174-
; GCN-NEXT: v_exp_f32_e32 v0, v0
1175-
; GCN-NEXT: s_setpc_b64 s[30:31]
1171+
; GCN-SDAG-LABEL: v_exp2_f32_fast:
1172+
; GCN-SDAG: ; %bb.0:
1173+
; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1174+
; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1175+
; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1176+
; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1177+
; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1178+
; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1179+
; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1180+
; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1181+
; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1182+
; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1183+
; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1184+
;
1185+
; GCN-GISEL-LABEL: v_exp2_f32_fast:
1186+
; GCN-GISEL: ; %bb.0:
1187+
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1188+
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1189+
; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1190+
; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1191+
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1192+
; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1193+
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1194+
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1195+
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1196+
; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1197+
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
11761198
;
11771199
; R600-LABEL: v_exp2_f32_fast:
11781200
; R600: ; %bb.0:
@@ -1188,11 +1210,33 @@ define float @v_exp2_f32_fast(float %in) {
11881210
}
11891211

11901212
define float @v_exp2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
1191-
; GCN-LABEL: v_exp2_f32_unsafe_math_attr:
1192-
; GCN: ; %bb.0:
1193-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1194-
; GCN-NEXT: v_exp_f32_e32 v0, v0
1195-
; GCN-NEXT: s_setpc_b64 s[30:31]
1213+
; GCN-SDAG-LABEL: v_exp2_f32_unsafe_math_attr:
1214+
; GCN-SDAG: ; %bb.0:
1215+
; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1216+
; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1217+
; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1218+
; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1219+
; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1220+
; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1221+
; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1222+
; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1223+
; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1224+
; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1225+
; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1226+
;
1227+
; GCN-GISEL-LABEL: v_exp2_f32_unsafe_math_attr:
1228+
; GCN-GISEL: ; %bb.0:
1229+
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1230+
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1231+
; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1232+
; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1233+
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1234+
; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1235+
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1236+
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1237+
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1238+
; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1239+
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
11961240
;
11971241
; R600-LABEL: v_exp2_f32_unsafe_math_attr:
11981242
; R600: ; %bb.0:
@@ -1208,11 +1252,33 @@ define float @v_exp2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
12081252
}
12091253

12101254
define float @v_exp2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
1211-
; GCN-LABEL: v_exp2_f32_approx_fn_attr:
1212-
; GCN: ; %bb.0:
1213-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1214-
; GCN-NEXT: v_exp_f32_e32 v0, v0
1215-
; GCN-NEXT: s_setpc_b64 s[30:31]
1255+
; GCN-SDAG-LABEL: v_exp2_f32_approx_fn_attr:
1256+
; GCN-SDAG: ; %bb.0:
1257+
; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1258+
; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1259+
; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1260+
; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1261+
; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1262+
; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1263+
; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1264+
; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1265+
; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1266+
; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1267+
; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1268+
;
1269+
; GCN-GISEL-LABEL: v_exp2_f32_approx_fn_attr:
1270+
; GCN-GISEL: ; %bb.0:
1271+
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1272+
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1273+
; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1274+
; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1275+
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1276+
; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1277+
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1278+
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1279+
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1280+
; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1281+
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
12161282
;
12171283
; R600-LABEL: v_exp2_f32_approx_fn_attr:
12181284
; R600: ; %bb.0:
@@ -1270,11 +1336,33 @@ define float @v_exp2_f32_ninf(float %in) {
12701336
}
12711337

12721338
define float @v_exp2_f32_afn(float %in) {
1273-
; GCN-LABEL: v_exp2_f32_afn:
1274-
; GCN: ; %bb.0:
1275-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1276-
; GCN-NEXT: v_exp_f32_e32 v0, v0
1277-
; GCN-NEXT: s_setpc_b64 s[30:31]
1339+
; GCN-SDAG-LABEL: v_exp2_f32_afn:
1340+
; GCN-SDAG: ; %bb.0:
1341+
; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1342+
; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1343+
; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1344+
; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1345+
; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1346+
; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1347+
; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1348+
; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1349+
; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1350+
; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1351+
; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1352+
;
1353+
; GCN-GISEL-LABEL: v_exp2_f32_afn:
1354+
; GCN-GISEL: ; %bb.0:
1355+
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1356+
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1357+
; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1358+
; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1359+
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1360+
; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1361+
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1362+
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1363+
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1364+
; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1365+
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
12781366
;
12791367
; R600-LABEL: v_exp2_f32_afn:
12801368
; R600: ; %bb.0:
@@ -1310,11 +1398,33 @@ define float @v_exp2_f32_afn_daz(float %in) #0 {
13101398
}
13111399

13121400
define float @v_exp2_f32_afn_dynamic(float %in) #1 {
1313-
; GCN-LABEL: v_exp2_f32_afn_dynamic:
1314-
; GCN: ; %bb.0:
1315-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1316-
; GCN-NEXT: v_exp_f32_e32 v0, v0
1317-
; GCN-NEXT: s_setpc_b64 s[30:31]
1401+
; GCN-SDAG-LABEL: v_exp2_f32_afn_dynamic:
1402+
; GCN-SDAG: ; %bb.0:
1403+
; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1404+
; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1405+
; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
1406+
; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1407+
; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1408+
; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2
1409+
; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1410+
; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1411+
; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1412+
; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1413+
; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1414+
;
1415+
; GCN-GISEL-LABEL: v_exp2_f32_afn_dynamic:
1416+
; GCN-GISEL: ; %bb.0:
1417+
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1418+
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1419+
; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1420+
; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
1421+
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1422+
; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1
1423+
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1424+
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1425+
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1426+
; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1427+
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
13181428
;
13191429
; R600-LABEL: v_exp2_f32_afn_dynamic:
13201430
; R600: ; %bb.0:
@@ -1330,11 +1440,33 @@ define float @v_exp2_f32_afn_dynamic(float %in) #1 {
13301440
}
13311441

13321442
define float @v_fabs_exp2_f32_afn(float %in) {
1333-
; GCN-LABEL: v_fabs_exp2_f32_afn:
1334-
; GCN: ; %bb.0:
1335-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1336-
; GCN-NEXT: v_exp_f32_e64 v0, |v0|
1337-
; GCN-NEXT: s_setpc_b64 s[30:31]
1443+
; GCN-SDAG-LABEL: v_fabs_exp2_f32_afn:
1444+
; GCN-SDAG: ; %bb.0:
1445+
; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1446+
; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000
1447+
; GCN-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
1448+
; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000
1449+
; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
1450+
; GCN-SDAG-NEXT: v_add_f32_e64 v0, |v0|, v2
1451+
; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0
1452+
; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000
1453+
; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1454+
; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
1455+
; GCN-SDAG-NEXT: s_setpc_b64 s[30:31]
1456+
;
1457+
; GCN-GISEL-LABEL: v_fabs_exp2_f32_afn:
1458+
; GCN-GISEL: ; %bb.0:
1459+
; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1460+
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
1461+
; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
1462+
; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1
1463+
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
1464+
; GCN-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1
1465+
; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0
1466+
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000
1467+
; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
1468+
; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
1469+
; GCN-GISEL-NEXT: s_setpc_b64 s[30:31]
13381470
;
13391471
; R600-LABEL: v_fabs_exp2_f32_afn:
13401472
; R600: ; %bb.0:

0 commit comments

Comments
 (0)