Skip to content

Commit 8d6a1de

Browse files
authored
[SelectionDAGISel] Don't merge input chains if it would put a token factor in the way of a glue. (llvm#167805)
In the new test, we're trying to fold a load and a X86ISD::CALL. The call has a CopyToReg glued to it. The load and the call have different input chains so they need to be merged. This results in a TokenFactor that gets put between the CopyToReg and the final CALLm instruction. The DAG scheduler can't handle that. The load here was created by legalization of the extract_element using a stack temporary store and load. A normal IR load would be chained into call sequence by SelectionDAGBuilder. This would usually have the load chained in before the CopyToReg. The store/load created by legalization don't get chained into the rest of the DAG. Fixes llvm#63790
1 parent 9216e17 commit 8d6a1de

File tree

2 files changed

+38
-4
lines changed

2 files changed

+38
-4
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2783,7 +2783,7 @@ void SelectionDAGISel::UpdateChains(
27832783
/// be used as the input node chain for the generated nodes.
27842784
static SDValue
27852785
HandleMergeInputChains(const SmallVectorImpl<SDNode *> &ChainNodesMatched,
2786-
SelectionDAG *CurDAG) {
2786+
SDValue InputGlue, SelectionDAG *CurDAG) {
27872787

27882788
SmallPtrSet<const SDNode *, 16> Visited;
27892789
SmallVector<const SDNode *, 8> Worklist;
@@ -2826,8 +2826,16 @@ HandleMergeInputChains(const SmallVectorImpl<SDNode *> &ChainNodesMatched,
28262826
// node that is both the predecessor and successor of the
28272827
// to-be-merged nodes. Fail.
28282828
Visited.clear();
2829-
for (SDValue V : InputChains)
2829+
for (SDValue V : InputChains) {
2830+
// If we need to create a TokenFactor, and any of the input chain nodes will
2831+
// also be glued to the output, we cannot merge the chains. The TokenFactor
2832+
// would prevent the glue from being honored.
2833+
if (InputChains.size() != 1 &&
2834+
V->getValueType(V->getNumValues() - 1) == MVT::Glue &&
2835+
InputGlue.getNode() == V.getNode())
2836+
return SDValue();
28302837
Worklist.push_back(V.getNode());
2838+
}
28312839

28322840
for (auto *N : ChainNodesMatched)
28332841
if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true))
@@ -3989,7 +3997,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
39893997
}
39903998

39913999
// Merge the input chains if they are not intra-pattern references.
3992-
InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
4000+
InputChain = HandleMergeInputChains(ChainNodesMatched, InputGlue, CurDAG);
39934001

39944002
if (!InputChain.getNode())
39954003
break; // Failed to merge.
@@ -4033,7 +4041,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
40334041
break;
40344042

40354043
// Merge the input chains if they are not intra-pattern references.
4036-
InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
4044+
InputChain = HandleMergeInputChains(ChainNodesMatched, InputGlue, CurDAG);
40374045

40384046
if (!InputChain.getNode())
40394047
break; // Failed to merge.

llvm/test/CodeGen/X86/pr63790.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc < %s -mtriple=x86_64 | FileCheck %s
3+
4+
define void @f(ptr %0, i64 %1) {
5+
; CHECK-LABEL: f:
6+
; CHECK: # %bb.0: # %BB
7+
; CHECK-NEXT: subq $40, %rsp
8+
; CHECK-NEXT: .cfi_def_cfa_offset 48
9+
; CHECK-NEXT: andl $1, %esi
10+
; CHECK-NEXT: movaps (%rdi), %xmm0
11+
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
12+
; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp)
13+
; CHECK-NEXT: movl $42, %edi
14+
; CHECK-NEXT: callq *16(%rsp,%rsi,8)
15+
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
16+
; CHECK-NEXT: movaps %xmm0, (%rax)
17+
; CHECK-NEXT: addq $40, %rsp
18+
; CHECK-NEXT: .cfi_def_cfa_offset 8
19+
; CHECK-NEXT: retq
20+
BB:
21+
%fps = load <2 x ptr>, ptr %0
22+
%fp = extractelement <2 x ptr> %fps, i64 %1
23+
%p = call ptr %fp(i32 42)
24+
store <2 x ptr> %fps, ptr %p
25+
ret void
26+
}

0 commit comments

Comments
 (0)