Skip to content

Commit daee549

Browse files
committed
[FPEnv][SelectionDAG] Relax chain requirements
This patch implements the following changes: 1) SelectionDAGBuilder::visitConstrainedFPIntrinsic currently treats each constrained intrinsic like a global barrier (e.g. a function call) and fully serializes all pending chains. This is actually not required; it is allowed for constrained intrinsics to be reordered w.r.t one another or (nonvolatile) memory accesses. The MI-level scheduler already allows for that flexibility, so it makes sense to allow it at the DAG level as well. This patch therefore changes the way chains for constrained intrisincs are created, and handles them basically like load operations are handled. This has the effect that constrained intrinsics are no longer serialized against one another or (nonvolatile) loads. They are still serialized against stores, but that seems hard to change with the current DAG chain setup, and it also doesn't seem to be a big problem preventing DAG 2) The OPC_CheckFoldableChainNode check requires that each of the intermediate nodes in a multi-node pattern match only has a single use. This check tends to fail if those intermediate nodes are strict operations as those have a chain output that typically indeed has another use. However, we don't really need to consider chains here at all, since they will all be rewritten anyway by UpdateChains later. Other parts of the matcher therefore already ignore chains, but this hasOneUse check doesn't. This patch replaces hasOneUse by a custom test that verifies there is no more than one use of any non-chain output value. In theory, this change could affect code unrelated to strict FP nodes, but at least on SystemZ I could not find any single instance of that happening 3) The SystemZ back-end currently does not allow matching multiply-and- extend operations (32x32 -> 64bit or 64x64 -> 128bit FP multiply) for strict FP operations. This was not possible in the past due to the problems described under 1) and 2) above. With those issues fixed, it is now possible to fully support those instructions in strict mode as well, and this patch does so. Differential Revision: https://reviews.llvm.org/D70913
1 parent f69f923 commit daee549

File tree

10 files changed

+769
-593
lines changed

10 files changed

+769
-593
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6888,7 +6888,10 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
68886888
ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
68896889
ValueVTs.push_back(MVT::Other); // Out chain
68906890

6891-
SDValue Chain = getRoot();
6891+
// We do not need to serialize constrained FP intrinsics against
6892+
// each other or against (nonvolatile) loads, so they can be
6893+
// chained like loads.
6894+
SDValue Chain = DAG.getRoot();
68926895
SmallVector<SDValue, 4> Opers;
68936896
Opers.push_back(Chain);
68946897
if (FPI.isUnaryOp()) {
@@ -6926,8 +6929,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
69266929
}
69276930

69286931
assert(Result.getNode()->getNumValues() == 2);
6932+
// See above -- chain is handled like for loads here.
69296933
SDValue OutChain = Result.getValue(1);
6930-
DAG.setRoot(OutChain);
6934+
PendingLoads.push_back(OutChain);
69316935
SDValue FPResult = Result.getValue(0);
69326936
setValue(&FPI, FPResult);
69336937
}

llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3171,13 +3171,19 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
31713171
case OPC_CheckFoldableChainNode: {
31723172
assert(NodeStack.size() != 1 && "No parent node");
31733173
// Verify that all intermediate nodes between the root and this one have
3174-
// a single use.
3174+
// a single use (ignoring chains, which are handled in UpdateChains).
31753175
bool HasMultipleUses = false;
3176-
for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
3177-
if (!NodeStack[i].getNode()->hasOneUse()) {
3178-
HasMultipleUses = true;
3179-
break;
3180-
}
3176+
for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i) {
3177+
unsigned NNonChainUses = 0;
3178+
SDNode *NS = NodeStack[i].getNode();
3179+
for (auto UI = NS->use_begin(), UE = NS->use_end(); UI != UE; ++UI)
3180+
if (UI.getUse().getValueType() != MVT::Other)
3181+
if (++NNonChainUses > 1) {
3182+
HasMultipleUses = true;
3183+
break;
3184+
}
3185+
if (HasMultipleUses) break;
3186+
}
31813187
if (HasMultipleUses) break;
31823188

31833189
// Check to see that the target thinks this is profitable to fold and that

llvm/lib/Target/SystemZ/SystemZInstrFP.td

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -467,34 +467,34 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
467467
// f64 multiplication of two FP32 registers.
468468
let Uses = [FPC], mayRaiseFPException = 1 in
469469
def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
470-
def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
471-
(f64 (fpextend FP32:$src2))),
470+
def : Pat<(any_fmul (f64 (any_fpextend FP32:$src1)),
471+
(f64 (any_fpextend FP32:$src2))),
472472
(MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
473473
FP32:$src1, subreg_h32), FP32:$src2)>;
474474

475475
// f64 multiplication of an FP32 register and an f32 memory.
476476
let Uses = [FPC], mayRaiseFPException = 1 in
477477
def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
478-
def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
479-
(f64 (extloadf32 bdxaddr12only:$addr))),
478+
def : Pat<(any_fmul (f64 (any_fpextend FP32:$src1)),
479+
(f64 (any_extloadf32 bdxaddr12only:$addr))),
480480
(MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
481481
bdxaddr12only:$addr)>;
482482

483483
// f128 multiplication of two FP64 registers.
484484
let Uses = [FPC], mayRaiseFPException = 1 in
485485
def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
486486
let Predicates = [FeatureNoVectorEnhancements1] in
487-
def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
488-
(f128 (fpextend FP64:$src2))),
487+
def : Pat<(any_fmul (f128 (any_fpextend FP64:$src1)),
488+
(f128 (any_fpextend FP64:$src2))),
489489
(MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
490490
FP64:$src1, subreg_h64), FP64:$src2)>;
491491

492492
// f128 multiplication of an FP64 register and an f64 memory.
493493
let Uses = [FPC], mayRaiseFPException = 1 in
494494
def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
495495
let Predicates = [FeatureNoVectorEnhancements1] in
496-
def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
497-
(f128 (extloadf64 bdxaddr12only:$addr))),
496+
def : Pat<(any_fmul (f128 (any_fpextend FP64:$src1)),
497+
(f128 (any_extloadf64 bdxaddr12only:$addr))),
498498
(MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
499499
bdxaddr12only:$addr)>;
500500

llvm/test/CodeGen/SystemZ/fp-strict-mul-02.ll

Lines changed: 97 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,24 @@
11
; Test strict multiplication of two f32s, producing an f64 result.
2-
; FIXME: We should use llvm.experimental.constrained.fpext, but we currently
3-
; cannot match a combination of two strict operations in ISel.
42
;
53
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
64

75
declare float @foo()
86
declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
97
declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
108
declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
9+
declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
1110

1211
; Check register multiplication.
1312
define double @f1(float %f1, float %f2) #0 {
1413
; CHECK-LABEL: f1:
1514
; CHECK: mdebr %f0, %f2
1615
; CHECK: br %r14
17-
%f1x = fpext float %f1 to double
18-
%f2x = fpext float %f2 to double
16+
%f1x = call double @llvm.experimental.constrained.fpext.f64.f32(
17+
float %f1,
18+
metadata !"fpexcept.strict") #0
19+
%f2x = call double @llvm.experimental.constrained.fpext.f64.f32(
20+
float %f2,
21+
metadata !"fpexcept.strict") #0
1922
%res = call double @llvm.experimental.constrained.fmul.f64(
2023
double %f1x, double %f2x,
2124
metadata !"round.dynamic",
@@ -29,8 +32,12 @@ define double @f2(float %f1, float *%ptr) #0 {
2932
; CHECK: mdeb %f0, 0(%r2)
3033
; CHECK: br %r14
3134
%f2 = load float, float *%ptr
32-
%f1x = fpext float %f1 to double
33-
%f2x = fpext float %f2 to double
35+
%f1x = call double @llvm.experimental.constrained.fpext.f64.f32(
36+
float %f1,
37+
metadata !"fpexcept.strict") #0
38+
%f2x = call double @llvm.experimental.constrained.fpext.f64.f32(
39+
float %f2,
40+
metadata !"fpexcept.strict") #0
3441
%res = call double @llvm.experimental.constrained.fmul.f64(
3542
double %f1x, double %f2x,
3643
metadata !"round.dynamic",
@@ -45,8 +52,12 @@ define double @f3(float %f1, float *%base) #0 {
4552
; CHECK: br %r14
4653
%ptr = getelementptr float, float *%base, i64 1023
4754
%f2 = load float, float *%ptr
48-
%f1x = fpext float %f1 to double
49-
%f2x = fpext float %f2 to double
55+
%f1x = call double @llvm.experimental.constrained.fpext.f64.f32(
56+
float %f1,
57+
metadata !"fpexcept.strict") #0
58+
%f2x = call double @llvm.experimental.constrained.fpext.f64.f32(
59+
float %f2,
60+
metadata !"fpexcept.strict") #0
5061
%res = call double @llvm.experimental.constrained.fmul.f64(
5162
double %f1x, double %f2x,
5263
metadata !"round.dynamic",
@@ -63,8 +74,12 @@ define double @f4(float %f1, float *%base) #0 {
6374
; CHECK: br %r14
6475
%ptr = getelementptr float, float *%base, i64 1024
6576
%f2 = load float, float *%ptr
66-
%f1x = fpext float %f1 to double
67-
%f2x = fpext float %f2 to double
77+
%f1x = call double @llvm.experimental.constrained.fpext.f64.f32(
78+
float %f1,
79+
metadata !"fpexcept.strict") #0
80+
%f2x = call double @llvm.experimental.constrained.fpext.f64.f32(
81+
float %f2,
82+
metadata !"fpexcept.strict") #0
6883
%res = call double @llvm.experimental.constrained.fmul.f64(
6984
double %f1x, double %f2x,
7085
metadata !"round.dynamic",
@@ -80,8 +95,12 @@ define double @f5(float %f1, float *%base) #0 {
8095
; CHECK: br %r14
8196
%ptr = getelementptr float, float *%base, i64 -1
8297
%f2 = load float, float *%ptr
83-
%f1x = fpext float %f1 to double
84-
%f2x = fpext float %f2 to double
98+
%f1x = call double @llvm.experimental.constrained.fpext.f64.f32(
99+
float %f1,
100+
metadata !"fpexcept.strict") #0
101+
%f2x = call double @llvm.experimental.constrained.fpext.f64.f32(
102+
float %f2,
103+
metadata !"fpexcept.strict") #0
85104
%res = call double @llvm.experimental.constrained.fmul.f64(
86105
double %f1x, double %f2x,
87106
metadata !"round.dynamic",
@@ -98,8 +117,12 @@ define double @f6(float %f1, float *%base, i64 %index) #0 {
98117
%ptr1 = getelementptr float, float *%base, i64 %index
99118
%ptr2 = getelementptr float, float *%ptr1, i64 100
100119
%f2 = load float, float *%ptr2
101-
%f1x = fpext float %f1 to double
102-
%f2x = fpext float %f2 to double
120+
%f1x = call double @llvm.experimental.constrained.fpext.f64.f32(
121+
float %f1,
122+
metadata !"fpexcept.strict") #0
123+
%f2x = call double @llvm.experimental.constrained.fpext.f64.f32(
124+
float %f2,
125+
metadata !"fpexcept.strict") #0
103126
%res = call double @llvm.experimental.constrained.fmul.f64(
104127
double %f1x, double %f2x,
105128
metadata !"round.dynamic",
@@ -195,8 +218,12 @@ define float @f7(float *%ptr0) #0 {
195218

196219
%ret = call float @foo() #0
197220

198-
%accext0 = fpext float %ret to double
199-
%ext0 = fpext float %frob0 to double
221+
%accext0 = call double @llvm.experimental.constrained.fpext.f64.f32(
222+
float %ret,
223+
metadata !"fpexcept.strict") #0
224+
%ext0 = call double @llvm.experimental.constrained.fpext.f64.f32(
225+
float %frob0,
226+
metadata !"fpexcept.strict") #0
200227
%mul0 = call double @llvm.experimental.constrained.fmul.f64(
201228
double %accext0, double %ext0,
202229
metadata !"round.dynamic",
@@ -210,8 +237,12 @@ define float @f7(float *%ptr0) #0 {
210237
metadata !"round.dynamic",
211238
metadata !"fpexcept.strict") #0
212239

213-
%accext1 = fpext float %trunc0 to double
214-
%ext1 = fpext float %frob1 to double
240+
%accext1 = call double @llvm.experimental.constrained.fpext.f64.f32(
241+
float %trunc0,
242+
metadata !"fpexcept.strict") #0
243+
%ext1 = call double @llvm.experimental.constrained.fpext.f64.f32(
244+
float %frob1,
245+
metadata !"fpexcept.strict") #0
215246
%mul1 = call double @llvm.experimental.constrained.fmul.f64(
216247
double %accext1, double %ext1,
217248
metadata !"round.dynamic",
@@ -225,8 +256,12 @@ define float @f7(float *%ptr0) #0 {
225256
metadata !"round.dynamic",
226257
metadata !"fpexcept.strict") #0
227258

228-
%accext2 = fpext float %trunc1 to double
229-
%ext2 = fpext float %frob2 to double
259+
%accext2 = call double @llvm.experimental.constrained.fpext.f64.f32(
260+
float %trunc1,
261+
metadata !"fpexcept.strict") #0
262+
%ext2 = call double @llvm.experimental.constrained.fpext.f64.f32(
263+
float %frob2,
264+
metadata !"fpexcept.strict") #0
230265
%mul2 = call double @llvm.experimental.constrained.fmul.f64(
231266
double %accext2, double %ext2,
232267
metadata !"round.dynamic",
@@ -240,8 +275,12 @@ define float @f7(float *%ptr0) #0 {
240275
metadata !"round.dynamic",
241276
metadata !"fpexcept.strict") #0
242277

243-
%accext3 = fpext float %trunc2 to double
244-
%ext3 = fpext float %frob3 to double
278+
%accext3 = call double @llvm.experimental.constrained.fpext.f64.f32(
279+
float %trunc2,
280+
metadata !"fpexcept.strict") #0
281+
%ext3 = call double @llvm.experimental.constrained.fpext.f64.f32(
282+
float %frob3,
283+
metadata !"fpexcept.strict") #0
245284
%mul3 = call double @llvm.experimental.constrained.fmul.f64(
246285
double %accext3, double %ext3,
247286
metadata !"round.dynamic",
@@ -255,8 +294,12 @@ define float @f7(float *%ptr0) #0 {
255294
metadata !"round.dynamic",
256295
metadata !"fpexcept.strict") #0
257296

258-
%accext4 = fpext float %trunc3 to double
259-
%ext4 = fpext float %frob4 to double
297+
%accext4 = call double @llvm.experimental.constrained.fpext.f64.f32(
298+
float %trunc3,
299+
metadata !"fpexcept.strict") #0
300+
%ext4 = call double @llvm.experimental.constrained.fpext.f64.f32(
301+
float %frob4,
302+
metadata !"fpexcept.strict") #0
260303
%mul4 = call double @llvm.experimental.constrained.fmul.f64(
261304
double %accext4, double %ext4,
262305
metadata !"round.dynamic",
@@ -270,8 +313,12 @@ define float @f7(float *%ptr0) #0 {
270313
metadata !"round.dynamic",
271314
metadata !"fpexcept.strict") #0
272315

273-
%accext5 = fpext float %trunc4 to double
274-
%ext5 = fpext float %frob5 to double
316+
%accext5 = call double @llvm.experimental.constrained.fpext.f64.f32(
317+
float %trunc4,
318+
metadata !"fpexcept.strict") #0
319+
%ext5 = call double @llvm.experimental.constrained.fpext.f64.f32(
320+
float %frob5,
321+
metadata !"fpexcept.strict") #0
275322
%mul5 = call double @llvm.experimental.constrained.fmul.f64(
276323
double %accext5, double %ext5,
277324
metadata !"round.dynamic",
@@ -285,8 +332,12 @@ define float @f7(float *%ptr0) #0 {
285332
metadata !"round.dynamic",
286333
metadata !"fpexcept.strict") #0
287334

288-
%accext6 = fpext float %trunc5 to double
289-
%ext6 = fpext float %frob6 to double
335+
%accext6 = call double @llvm.experimental.constrained.fpext.f64.f32(
336+
float %trunc5,
337+
metadata !"fpexcept.strict") #0
338+
%ext6 = call double @llvm.experimental.constrained.fpext.f64.f32(
339+
float %frob6,
340+
metadata !"fpexcept.strict") #0
290341
%mul6 = call double @llvm.experimental.constrained.fmul.f64(
291342
double %accext6, double %ext6,
292343
metadata !"round.dynamic",
@@ -300,8 +351,12 @@ define float @f7(float *%ptr0) #0 {
300351
metadata !"round.dynamic",
301352
metadata !"fpexcept.strict") #0
302353

303-
%accext7 = fpext float %trunc6 to double
304-
%ext7 = fpext float %frob7 to double
354+
%accext7 = call double @llvm.experimental.constrained.fpext.f64.f32(
355+
float %trunc6,
356+
metadata !"fpexcept.strict") #0
357+
%ext7 = call double @llvm.experimental.constrained.fpext.f64.f32(
358+
float %frob7,
359+
metadata !"fpexcept.strict") #0
305360
%mul7 = call double @llvm.experimental.constrained.fmul.f64(
306361
double %accext7, double %ext7,
307362
metadata !"round.dynamic",
@@ -315,8 +370,12 @@ define float @f7(float *%ptr0) #0 {
315370
metadata !"round.dynamic",
316371
metadata !"fpexcept.strict") #0
317372

318-
%accext8 = fpext float %trunc7 to double
319-
%ext8 = fpext float %frob8 to double
373+
%accext8 = call double @llvm.experimental.constrained.fpext.f64.f32(
374+
float %trunc7,
375+
metadata !"fpexcept.strict") #0
376+
%ext8 = call double @llvm.experimental.constrained.fpext.f64.f32(
377+
float %frob8,
378+
metadata !"fpexcept.strict") #0
320379
%mul8 = call double @llvm.experimental.constrained.fmul.f64(
321380
double %accext8, double %ext8,
322381
metadata !"round.dynamic",
@@ -330,8 +389,12 @@ define float @f7(float *%ptr0) #0 {
330389
metadata !"round.dynamic",
331390
metadata !"fpexcept.strict") #0
332391

333-
%accext9 = fpext float %trunc8 to double
334-
%ext9 = fpext float %frob9 to double
392+
%accext9 = call double @llvm.experimental.constrained.fpext.f64.f32(
393+
float %trunc8,
394+
metadata !"fpexcept.strict") #0
395+
%ext9 = call double @llvm.experimental.constrained.fpext.f64.f32(
396+
float %frob9,
397+
metadata !"fpexcept.strict") #0
335398
%mul9 = call double @llvm.experimental.constrained.fmul.f64(
336399
double %accext9, double %ext9,
337400
metadata !"round.dynamic",

0 commit comments

Comments
 (0)