Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 21 additions & 4 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57616,10 +57616,10 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
}

// Fold any similar generic ADD/SUB opcodes to reuse this node.
auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) {
auto MatchGeneric = [&](unsigned Opc, SDValue N0, SDValue N1, bool Negate) {
SDValue Ops[] = {N0, N1};
SDVTList VTs = DAG.getVTList(N->getValueType(0));
if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) {
if (SDNode *GenericAddSub = DAG.getNodeIfExists(Opc, VTs, Ops)) {
SDValue Op(N, 0);
if (Negate) {
// Bail if this is only used by a user of the x86 add/sub.
Expand All @@ -57631,8 +57631,25 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
DCI.CombineTo(GenericAddSub, Op);
}
};
MatchGeneric(LHS, RHS, false);
MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode());
MatchGeneric(GenericOpc, LHS, RHS, false);
MatchGeneric(GenericOpc, RHS, LHS, X86ISD::SUB == N->getOpcode());

if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(RHS)) {
SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
if (X86ISD::SUB == N->getOpcode()) {
// With LHS - C, fold LHS + (-C)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to handle -(LHS - C)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think LHS - C will be cananicalized to LHS + (-C) by DAGCombiner::visitSUB first, so we don't need to handle it here.
https://github.com/llvm/llvm-project/blob/683e2bf059a6e5e0bb9dc2628218b53dc2d1b490/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L4160-L4163

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it is cananicalized, why we still see LHS - C here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What we see here is an X86ISD::SUB, which sets eflags differently than X86ISD::ADD, so an X86ISD::SUB LHS, C cannot be canonicalized to X86ISD::ADD LHS, -C. Therefore we explicitly check for the X86ISD::SUB pattern here.

In contrast, a generic ISD::SUB LHS, C is equivalent to ISD::ADD LHS, -C and is canonicalized by the DAG, so we don't need to look for the ISD::SUB LHS, C to fold.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am new to the backend part, so please correct me if I am wrong. Thank you!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My initial question was do we need to handle -(LHS - C) in the X86ISD::SUB case, I have this question because I see you handled LHS - C and -(C - RHS).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean with -X86ISD::SUB(LHS, C) we can fold the generic sub(C, LHS)? Yes we can. It is handled by the MatchGeneric(GenericOpc, RHS, LHS, X86ISD::SUB == N->getOpcode()); above at line 57635.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I get the point, thanks!

MatchGeneric(ISD::ADD, LHS, NegC, false);
} else {
// With -(LHS + C), fold (-C) - LHS
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the comment is missleading. We are folding LHS + C to -((-C) - LHS). We are creating a new Negate instead of eliminating.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. Thanks for pointing that out. I have modified the comments to reflect that the generic op is being replaced, not simply eliminated.

MatchGeneric(ISD::SUB, NegC, LHS, true);
}
} else if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(LHS)) {
SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
if (X86ISD::SUB == N->getOpcode()) {
// With -(C - RHS), fold RHS + (-C)
MatchGeneric(ISD::ADD, RHS, NegC, true);
}
}

// TODO: Can we drop the ZeroSecondOpOnly limit? This is to guarantee that the
// EFLAGS result doesn't change.
Expand Down
48 changes: 48 additions & 0 deletions llvm/test/CodeGen/X86/combine-adc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,52 @@ define i32 @adc_merge_constants(i32 %a0) nounwind {
ret i32 %sum
}

define i32 @adc_merge_sub(i32 %a0) nounwind {
; X86-LABEL: adc_merge_sub:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: addl $42, %edi
; X86-NEXT: setb %al
; X86-NEXT: movl %edi, %esi
; X86-NEXT: negl %esi
; X86-NEXT: pushl %eax
; X86-NEXT: calll use@PLT
; X86-NEXT: addl $4, %esp
; X86-NEXT: xorl %edi, %esi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: adc_merge_sub:
; X64: # %bb.0:
; X64-NEXT: pushq %rbp
; X64-NEXT: pushq %rbx
; X64-NEXT: pushq %rax
; X64-NEXT: movl %edi, %ebx
; X64-NEXT: xorl %edi, %edi
; X64-NEXT: addl $42, %ebx
; X64-NEXT: setb %dil
; X64-NEXT: movl %ebx, %ebp
; X64-NEXT: negl %ebp
; X64-NEXT: callq use@PLT
; X64-NEXT: xorl %ebx, %ebp
; X64-NEXT: movl %ebp, %eax
; X64-NEXT: addq $8, %rsp
; X64-NEXT: popq %rbx
; X64-NEXT: popq %rbp
; X64-NEXT: retq
%adc = tail call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %a0, i32 42)
%carry = extractvalue { i8, i32 } %adc, 0
call void @use(i8 %carry)
%sum = extractvalue { i8, i32 } %adc, 1
%sub = sub i32 -42, %a0
%result = xor i32 %sum, %sub
ret i32 %result
}

declare { i8, i32 } @llvm.x86.addcarry.32(i8, i32, i32)
declare void @use(i8)
81 changes: 81 additions & 0 deletions llvm/test/CodeGen/X86/combine-sbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -333,4 +333,85 @@ define i32 @PR40483_sub6(ptr, i32) nounwind {
ret i32 %10
}

define i32 @sbb_merge_add1(i32 %a0) nounwind {
; X86-LABEL: sbb_merge_add1:
; X86: # %bb.0:
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl $42, {{[0-9]+}}(%esp)
; X86-NEXT: setb %al
; X86-NEXT: pushl %eax
; X86-NEXT: calll use@PLT
; X86-NEXT: addl $4, %esp
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sbb_merge_add1:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl $42, %edi
; X64-NEXT: setb %al
; X64-NEXT: movl %eax, %edi
; X64-NEXT: callq use@PLT
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%sbb = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %a0, i32 42)
%borrow = extractvalue { i8, i32 } %sbb, 0
call void @use(i8 %borrow)
%diff = extractvalue { i8, i32 } %sbb, 1
%add = add i32 %a0, -42
%result = xor i32 %diff, %add
ret i32 %result
}

define i32 @sbb_merge_add2(i32 %a0) nounwind {
; X86-LABEL: sbb_merge_add2:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl $42, %edi
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
; X86-NEXT: setb %al
; X86-NEXT: movl %edi, %esi
; X86-NEXT: negl %esi
; X86-NEXT: pushl %eax
; X86-NEXT: calll use@PLT
; X86-NEXT: addl $4, %esp
; X86-NEXT: xorl %edi, %esi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: sbb_merge_add2:
; X64: # %bb.0:
; X64-NEXT: pushq %rbp
; X64-NEXT: pushq %rbx
; X64-NEXT: pushq %rax
; X64-NEXT: movl $42, %ebp
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: subl %edi, %ebp
; X64-NEXT: setb %al
; X64-NEXT: movl %ebp, %ebx
; X64-NEXT: negl %ebx
; X64-NEXT: movl %eax, %edi
; X64-NEXT: callq use@PLT
; X64-NEXT: xorl %ebp, %ebx
; X64-NEXT: movl %ebx, %eax
; X64-NEXT: addq $8, %rsp
; X64-NEXT: popq %rbx
; X64-NEXT: popq %rbp
; X64-NEXT: retq
%sbb = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 42, i32 %a0)
%borrow = extractvalue { i8, i32 } %sbb, 0
call void @use(i8 %borrow)
%diff = extractvalue { i8, i32 } %sbb, 1
%add = add i32 %a0, -42
%result = xor i32 %diff, %add
ret i32 %result
}

declare { i8, i32 } @llvm.x86.subborrow.32(i8, i32, i32)
declare void @use(i8)
129 changes: 66 additions & 63 deletions llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,17 @@ entry:
define void @_Z2x6v() local_unnamed_addr {
; CHECK-LABEL: _Z2x6v:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq x1@GOTPCREL(%rip), %rax
; CHECK-NEXT: movl (%rax), %edx
; CHECK-NEXT: andl $511, %edx # imm = 0x1FF
; CHECK-NEXT: leaq 1(%rdx), %rax
; CHECK-NEXT: movq x4@GOTPCREL(%rip), %rcx
; CHECK-NEXT: movl %eax, (%rcx)
; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rcx
; CHECK-NEXT: movl (%rcx), %ecx
; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: je .LBB1_18
; CHECK-NEXT: # %bb.1: # %for.cond1thread-pre-split.lr.ph
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
Expand All @@ -114,58 +125,47 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq x1@GOTPCREL(%rip), %rax
; CHECK-NEXT: movl (%rax), %ebx
; CHECK-NEXT: andl $511, %ebx # imm = 0x1FF
; CHECK-NEXT: leaq 1(%rbx), %rax
; CHECK-NEXT: movq x4@GOTPCREL(%rip), %rcx
; CHECK-NEXT: movl %eax, (%rcx)
; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rcx
; CHECK-NEXT: movl (%rcx), %ecx
; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: je .LBB1_18
; CHECK-NEXT: # %bb.1: # %for.cond1thread-pre-split.lr.ph
; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rdx
; CHECK-NEXT: movq (%rdx), %rsi
; CHECK-NEXT: movl %ecx, %edx
; CHECK-NEXT: notl %edx
; CHECK-NEXT: leaq 8(,%rdx,8), %rdi
; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rsi
; CHECK-NEXT: movq (%rsi), %rsi
; CHECK-NEXT: movl %ecx, %edi
; CHECK-NEXT: notl %edi
; CHECK-NEXT: leaq 8(,%rdi,8), %rdi
; CHECK-NEXT: imulq %rax, %rdi
; CHECK-NEXT: addq %rsi, %rdi
; CHECK-NEXT: movq x2@GOTPCREL(%rip), %r8
; CHECK-NEXT: movl (%r8), %edx
; CHECK-NEXT: leal 8(,%rbx,8), %eax
; CHECK-NEXT: movl (%r8), %r9d
; CHECK-NEXT: leal 8(,%rdx,8), %eax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: leaq 32(%rsi), %r11
; CHECK-NEXT: leaq 8(,%rbx,8), %rbx
; CHECK-NEXT: xorl %r14d, %r14d
; CHECK-NEXT: movq x0@GOTPCREL(%rip), %r15
; CHECK-NEXT: movq %rsi, %r12
; CHECK-NEXT: leaq 32(%rsi), %rbx
; CHECK-NEXT: leaq 8(,%rdx,8), %r14
; CHECK-NEXT: xorl %r15d, %r15d
; CHECK-NEXT: movq x0@GOTPCREL(%rip), %r12
; CHECK-NEXT: movq %rsi, %r13
; CHECK-NEXT: jmp .LBB1_2
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_15: # %for.cond1.for.inc3_crit_edge
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movl %edx, (%r8)
; CHECK-NEXT: movl %r9d, (%r8)
; CHECK-NEXT: .LBB1_16: # %for.inc3
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: addq %rbx, %r12
; CHECK-NEXT: incq %r14
; CHECK-NEXT: addq %rbx, %r11
; CHECK-NEXT: addq %r14, %r13
; CHECK-NEXT: incq %r15
; CHECK-NEXT: addq %r14, %rbx
; CHECK-NEXT: incl %ecx
; CHECK-NEXT: je .LBB1_17
; CHECK-NEXT: .LBB1_2: # %for.cond1thread-pre-split
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB1_12 Depth 2
; CHECK-NEXT: # Child Loop BB1_14 Depth 2
; CHECK-NEXT: testl %edx, %edx
; CHECK-NEXT: testl %r9d, %r9d
; CHECK-NEXT: jns .LBB1_16
; CHECK-NEXT: # %bb.3: # %for.body2.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movslq %edx, %r13
; CHECK-NEXT: testq %r13, %r13
; CHECK-NEXT: movslq %r9d, %r9
; CHECK-NEXT: testq %r9, %r9
; CHECK-NEXT: movq $-1, %rbp
; CHECK-NEXT: cmovnsq %r13, %rbp
; CHECK-NEXT: subq %r13, %rbp
; CHECK-NEXT: cmovnsq %r9, %rbp
; CHECK-NEXT: subq %r9, %rbp
; CHECK-NEXT: incq %rbp
; CHECK-NEXT: cmpq $4, %rbp
; CHECK-NEXT: jb .LBB1_14
Expand All @@ -177,20 +177,20 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: # %bb.5: # %vector.memcheck
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; CHECK-NEXT: imulq %r14, %rax
; CHECK-NEXT: leaq (%rsi,%rax), %r10
; CHECK-NEXT: leaq (%r10,%r13,8), %r9
; CHECK-NEXT: testq %r13, %r13
; CHECK-NEXT: movq $-1, %r10
; CHECK-NEXT: cmovnsq %r13, %r10
; CHECK-NEXT: cmpq %r15, %r9
; CHECK-NEXT: imulq %r15, %rax
; CHECK-NEXT: leaq (%rsi,%rax), %r11
; CHECK-NEXT: leaq (%r11,%r9,8), %r10
; CHECK-NEXT: testq %r9, %r9
; CHECK-NEXT: movq $-1, %r11
; CHECK-NEXT: cmovnsq %r9, %r11
; CHECK-NEXT: cmpq %r12, %r10
; CHECK-NEXT: jae .LBB1_7
; CHECK-NEXT: # %bb.6: # %vector.memcheck
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: leaq 8(%rsi), %r9
; CHECK-NEXT: addq %r9, %rax
; CHECK-NEXT: leaq (%rax,%r10,8), %rax
; CHECK-NEXT: cmpq %r15, %rax
; CHECK-NEXT: leaq 8(%rsi), %r10
; CHECK-NEXT: addq %r10, %rax
; CHECK-NEXT: leaq (%rax,%r11,8), %rax
; CHECK-NEXT: cmpq %r12, %rax
; CHECK-NEXT: ja .LBB1_14
; CHECK-NEXT: .LBB1_7: # %vector.body.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
Expand All @@ -201,58 +201,54 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; CHECK-NEXT: movdqu %xmm0, (%r12,%r13,8)
; CHECK-NEXT: movdqu %xmm0, 16(%r12,%r13,8)
; CHECK-NEXT: movl $4, %r10d
; CHECK-NEXT: movdqu %xmm0, (%r13,%r9,8)
; CHECK-NEXT: movdqu %xmm0, 16(%r13,%r9,8)
; CHECK-NEXT: movl $4, %r11d
; CHECK-NEXT: shrq $2, %rax
; CHECK-NEXT: jne .LBB1_11
; CHECK-NEXT: jmp .LBB1_13
; CHECK-NEXT: .LBB1_8: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: xorl %r10d, %r10d
; CHECK-NEXT: xorl %r11d, %r11d
; CHECK-NEXT: shrq $2, %rax
; CHECK-NEXT: je .LBB1_13
; CHECK-NEXT: .LBB1_11: # %vector.body.preheader.new
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; CHECK-NEXT: movq %r10, %rax
; CHECK-NEXT: movq %r11, %rax
; CHECK-NEXT: subq %rdx, %rax
; CHECK-NEXT: addq %r13, %r10
; CHECK-NEXT: leaq (%r11,%r10,8), %r10
; CHECK-NEXT: addq %r9, %r11
; CHECK-NEXT: leaq (%rbx,%r11,8), %r11
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_12: # %vector.body
; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: movdqu %xmm0, -32(%r10)
; CHECK-NEXT: movdqu %xmm0, -16(%r10)
; CHECK-NEXT: movdqu %xmm0, (%r10)
; CHECK-NEXT: movdqu %xmm0, 16(%r10)
; CHECK-NEXT: addq $64, %r10
; CHECK-NEXT: movdqu %xmm0, -32(%r11)
; CHECK-NEXT: movdqu %xmm0, -16(%r11)
; CHECK-NEXT: movdqu %xmm0, (%r11)
; CHECK-NEXT: movdqu %xmm0, 16(%r11)
; CHECK-NEXT: addq $64, %r11
; CHECK-NEXT: addq $8, %rax
; CHECK-NEXT: jne .LBB1_12
; CHECK-NEXT: .LBB1_13: # %middle.block
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: addq %rdx, %r13
; CHECK-NEXT: addq %rdx, %r9
; CHECK-NEXT: cmpq %rdx, %rbp
; CHECK-NEXT: movq %r13, %rdx
; CHECK-NEXT: je .LBB1_15
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_14: # %for.body2
; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: movq (%r15), %rax
; CHECK-NEXT: movq %rax, (%r12,%r13,8)
; CHECK-NEXT: leaq 1(%r13), %rdx
; CHECK-NEXT: cmpq $-1, %r13
; CHECK-NEXT: movq %rdx, %r13
; CHECK-NEXT: movq (%r12), %rax
; CHECK-NEXT: movq %rax, (%r13,%r9,8)
; CHECK-NEXT: incq %r9
; CHECK-NEXT: jl .LBB1_14
; CHECK-NEXT: jmp .LBB1_15
; CHECK-NEXT: .LBB1_17: # %for.cond.for.end5_crit_edge
; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rax
; CHECK-NEXT: movq %rdi, (%rax)
; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rax
; CHECK-NEXT: movl $0, (%rax)
; CHECK-NEXT: .LBB1_18: # %for.end5
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
Expand All @@ -265,6 +261,13 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: .cfi_restore %rbx
; CHECK-NEXT: .cfi_restore %r12
; CHECK-NEXT: .cfi_restore %r13
; CHECK-NEXT: .cfi_restore %r14
; CHECK-NEXT: .cfi_restore %r15
; CHECK-NEXT: .cfi_restore %rbp
; CHECK-NEXT: .LBB1_18: # %for.end5
; CHECK-NEXT: retq
entry:
%0 = load i32, ptr @x1, align 4
Expand Down