Skip to content

Commit 40a469f

Browse files
authored
Reapply "[X86] Correct 32-bit immediate assertion and fix 64-bit lowering for huge frame offsets" (#152239)
The first commit is identical to 69bec0a. The second commit fixes the instruction verification failures by replacing the erroneous instruction with a trap after the error is reported and adds `-verify-machineinstrs` to the tests added in the original PR to catch the issue sooner. After that change, all tests pass with both `LLVM_ENABLE_EXPENSIVE_CHECKS={On,Off}`. cc @RKSimon @e-kud @phoebewang @arsenm as reviewers on the original PR
1 parent 9c8e716 commit 40a469f

12 files changed

+273
-55
lines changed

llvm/lib/CodeGen/PrologEpilogInserter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1550,7 +1550,7 @@ void PEIImpl::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
15501550
// If this instruction has a FrameIndex operand, we need to
15511551
// use that target machine register info object to eliminate
15521552
// it.
1553-
TRI.eliminateFrameIndex(MI, SPAdj, i);
1553+
TRI.eliminateFrameIndex(MI, SPAdj, i, RS);
15541554

15551555
// Reset the iterator if we were at the beginning of the BB.
15561556
if (AtBeginning) {

llvm/lib/Target/X86/X86FrameLowering.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "llvm/CodeGen/MachineInstrBuilder.h"
2525
#include "llvm/CodeGen/MachineModuleInfo.h"
2626
#include "llvm/CodeGen/MachineRegisterInfo.h"
27+
#include "llvm/CodeGen/RegisterScavenging.h"
2728
#include "llvm/CodeGen/WinEHFuncInfo.h"
2829
#include "llvm/IR/DataLayout.h"
2930
#include "llvm/IR/EHPersonalities.h"
@@ -2678,7 +2679,7 @@ StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,
26782679
// object.
26792680
// We need to factor in additional offsets applied during the prologue to the
26802681
// frame, base, and stack pointer depending on which is used.
2681-
int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
2682+
int64_t Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
26822683
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
26832684
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
26842685
uint64_t StackSize = MFI.getStackSize();
@@ -4212,6 +4213,14 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized(
42124213
// emitPrologue if it gets called and emits CFI.
42134214
MF.setHasWinCFI(false);
42144215

4216+
MachineFrameInfo &MFI = MF.getFrameInfo();
4217+
// If the frame is big enough that we might need to scavenge a register to
4218+
// handle huge offsets, reserve a stack slot for that now.
4219+
if (!isInt<32>(MFI.estimateStackSize(MF))) {
4220+
int FI = MFI.CreateStackObject(SlotSize, Align(SlotSize), false);
4221+
RS->addScavengingFrameIndex(FI);
4222+
}
4223+
42154224
// If we are using Windows x64 CFI, ensure that the stack is always 8 byte
42164225
// aligned. The format doesn't support misaligned stack adjustments.
42174226
if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())

llvm/lib/Target/X86/X86RegisterInfo.cpp

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
//===----------------------------------------------------------------------===//
1414

1515
#include "X86RegisterInfo.h"
16+
#include "MCTargetDesc/X86BaseInfo.h"
1617
#include "X86FrameLowering.h"
1718
#include "X86MachineFunctionInfo.h"
1819
#include "X86Subtarget.h"
@@ -21,8 +22,8 @@
2122
#include "llvm/ADT/SmallSet.h"
2223
#include "llvm/CodeGen/LiveRegMatrix.h"
2324
#include "llvm/CodeGen/MachineFrameInfo.h"
24-
#include "llvm/CodeGen/MachineFunction.h"
2525
#include "llvm/CodeGen/MachineRegisterInfo.h"
26+
#include "llvm/CodeGen/RegisterScavenging.h"
2627
#include "llvm/CodeGen/TargetFrameLowering.h"
2728
#include "llvm/CodeGen/TargetInstrInfo.h"
2829
#include "llvm/CodeGen/TileShapeInfo.h"
@@ -907,7 +908,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
907908
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
908909

909910
// Determine base register and offset.
910-
int FIOffset;
911+
int64_t FIOffset;
911912
Register BasePtr;
912913
if (MI.isReturn()) {
913914
assert((!hasStackRealignment(MF) ||
@@ -958,11 +959,41 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
958959
}
959960

960961
if (MI.getOperand(FIOperandNum+3).isImm()) {
961-
// Offset is a 32-bit integer.
962-
int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
963-
int Offset = FIOffset + Imm;
964-
assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
965-
"Requesting 64-bit offset in 32-bit immediate!");
962+
const X86InstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
963+
const DebugLoc &DL = MI.getDebugLoc();
964+
int64_t Imm = MI.getOperand(FIOperandNum + 3).getImm();
965+
int64_t Offset = FIOffset + Imm;
966+
bool FitsIn32Bits = isInt<32>(Offset);
967+
// If the offset will not fit in a 32-bit displacement, then for 64-bit
968+
// targets, scavenge a register to hold it. Otherwise...
969+
if (Is64Bit && !FitsIn32Bits) {
970+
assert(RS && "RegisterScavenger was NULL");
971+
972+
RS->enterBasicBlockEnd(MBB);
973+
RS->backward(std::next(II));
974+
975+
Register ScratchReg = RS->scavengeRegisterBackwards(
976+
X86::GR64RegClass, II, /*RestoreAfter=*/false, /*SPAdj=*/0,
977+
/*AllowSpill=*/true);
978+
assert(ScratchReg != 0 && "scratch reg was 0");
979+
RS->setRegUsed(ScratchReg);
980+
981+
BuildMI(MBB, II, DL, TII->get(X86::MOV64ri), ScratchReg).addImm(Offset);
982+
983+
MI.getOperand(FIOperandNum + 3).setImm(0);
984+
MI.getOperand(FIOperandNum + 2).setReg(ScratchReg);
985+
986+
return false;
987+
}
988+
989+
// ... for 32-bit targets, this is a bug!
990+
if (!Is64Bit && !FitsIn32Bits) {
991+
MI.emitGenericError("64-bit offset calculated but target is 32-bit");
992+
// Trap so that the instruction verification pass does not fail if run.
993+
BuildMI(MBB, MBBI, DL, TII->get(X86::TRAP));
994+
return false;
995+
}
996+
966997
if (Offset != 0 || !tryOptimizeLEAtoMOV(II))
967998
MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
968999
} else {

llvm/lib/Target/X86/X86RegisterInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#ifndef LLVM_LIB_TARGET_X86_X86REGISTERINFO_H
1414
#define LLVM_LIB_TARGET_X86_X86REGISTERINFO_H
1515

16+
#include "llvm/CodeGen/MachineFunction.h"
1617
#include "llvm/CodeGen/TargetRegisterInfo.h"
1718

1819
#define GET_REGINFO_HEADER
@@ -180,6 +181,10 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
180181
constrainRegClassToNonRex2(const TargetRegisterClass *RC) const;
181182

182183
bool isNonRex2RegClass(const TargetRegisterClass *RC) const;
184+
185+
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
186+
return true;
187+
}
183188
};
184189

185190
} // End llvm namespace
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp --version 4
2+
; RUN: llc -O0 -mtriple=x86_64 -mattr=+avx512f -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
3+
define void @f(i16 %LGV2, i1 %LGV3) {
4+
; CHECK-LABEL: f:
5+
; CHECK: # %bb.0: # %BB
6+
; CHECK-NEXT: subq $2147483528, %rsp # imm = 0x7FFFFF88
7+
; CHECK-NEXT: .cfi_def_cfa_offset 2147483536
8+
; CHECK-NEXT: movb %sil, %cl
9+
; CHECK-NEXT: movw %di, %ax
10+
; CHECK-NEXT: movswq %ax, %rax
11+
; CHECK-NEXT: andb $1, %cl
12+
; CHECK-NEXT: movabsq $-2147483768, %rdx # imm = 0xFFFFFFFF7FFFFF88
13+
; CHECK-NEXT: movb %cl, (%rsp,%rdx)
14+
; CHECK-NEXT: addq $2147483528, %rsp # imm = 0x7FFFFF88
15+
; CHECK-NEXT: .cfi_def_cfa_offset 8
16+
; CHECK-NEXT: retq
17+
BB:
18+
%A = alloca i1, i33 2147483648, align 1
19+
%G = getelementptr i1, ptr %A, i16 %LGV2
20+
%G4 = getelementptr i1, ptr %G, i32 -2147483648
21+
store i1 %LGV3, ptr %G4, align 1
22+
ret void
23+
}

llvm/test/CodeGen/X86/huge-stack.ll

Lines changed: 61 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,70 @@
55
define void @foo() unnamed_addr #0 {
66
; CHECK-LABEL: foo:
77
; CHECK: # %bb.0:
8-
; CHECK-NEXT: movabsq $8589934462, %rax # imm = 0x1FFFFFF7E
8+
; CHECK-NEXT: movabsq $8589934472, %rax # imm = 0x1FFFFFF88
99
; CHECK-NEXT: subq %rax, %rsp
10-
; CHECK-NEXT: .cfi_def_cfa_offset 8589934470
11-
; CHECK-NEXT: movb $42, -129(%rsp)
12-
; CHECK-NEXT: movb $43, -128(%rsp)
13-
; CHECK-NEXT: movabsq $8589934462, %rax # imm = 0x1FFFFFF7E
10+
; CHECK-NEXT: .cfi_def_cfa_offset 8589934480
11+
; CHECK-NEXT: movabsq $4294967177, %rax # imm = 0xFFFFFF89
12+
; CHECK-NEXT: movb $42, (%rsp,%rax)
13+
; CHECK-NEXT: movb $43, -118(%rsp)
14+
; CHECK-NEXT: movabsq $8589934472, %rax # imm = 0x1FFFFFF88
1415
; CHECK-NEXT: addq %rax, %rsp
1516
; CHECK-NEXT: .cfi_def_cfa_offset 8
1617
; CHECK-NEXT: retq
17-
%1 = alloca %large, align 1
18-
%2 = alloca %large, align 1
19-
%3 = getelementptr inbounds %large, ptr %1, i64 0, i64 0
20-
store i8 42, ptr %3, align 1
21-
%4 = getelementptr inbounds %large, ptr %2, i64 0, i64 0
22-
store i8 43, ptr %4, align 1
18+
%large1 = alloca %large, align 1
19+
%large2 = alloca %large, align 1
20+
%ptrLarge1 = getelementptr inbounds %large, ptr %large1, i64 0, i64 0
21+
store i8 42, ptr %ptrLarge1, align 1
22+
%ptrLarge2 = getelementptr inbounds %large, ptr %large2, i64 0, i64 0
23+
store i8 43, ptr %ptrLarge2, align 1
2324
ret void
2425
}
26+
27+
declare ptr @baz(ptr, ptr, ptr, ptr)
28+
29+
define ptr @scavenge_spill() unnamed_addr #0 {
30+
; CHECK-LABEL: scavenge_spill:
31+
; CHECK: # %bb.0:
32+
; CHECK-NEXT: movabsq $25769803816, %rax # imm = 0x600000028
33+
; CHECK-NEXT: subq %rax, %rsp
34+
; CHECK-NEXT: .cfi_def_cfa_offset 25769803824
35+
; CHECK-NEXT: movabsq $21474836521, %rax # imm = 0x500000029
36+
; CHECK-NEXT: leaq (%rsp,%rax), %rdi
37+
; CHECK-NEXT: movabsq $17179869226, %rax # imm = 0x40000002A
38+
; CHECK-NEXT: leaq (%rsp,%rax), %rsi
39+
; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
40+
; CHECK-NEXT: movabsq $12884901931, %rax # imm = 0x30000002B
41+
; CHECK-NEXT: leaq (%rsp,%rax), %rdx
42+
; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
43+
; CHECK-NEXT: movabsq $8589934636, %rax # imm = 0x20000002C
44+
; CHECK-NEXT: leaq (%rsp,%rax), %rcx
45+
; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
46+
; CHECK-NEXT: callq baz@PLT
47+
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
48+
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
49+
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
50+
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
51+
; CHECK-NEXT: leaq 46(%rsp), %rdi
52+
; CHECK-NEXT: callq baz@PLT
53+
; CHECK-NEXT: # kill: def $rcx killed $rax
54+
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
55+
; CHECK-NEXT: movabsq $25769803816, %rcx # imm = 0x600000028
56+
; CHECK-NEXT: addq %rcx, %rsp
57+
; CHECK-NEXT: .cfi_def_cfa_offset 8
58+
; CHECK-NEXT: retq
59+
%large1 = alloca %large, align 1
60+
%ptrLarge1 = getelementptr inbounds %large, ptr %large1, i64 0, i64 0
61+
%large2 = alloca %large, align 1
62+
%ptrLarge2 = getelementptr inbounds %large, ptr %large2, i64 0, i64 0
63+
%large3 = alloca %large, align 1
64+
%ptrLarge3 = getelementptr inbounds %large, ptr %large3, i64 0, i64 0
65+
%large4 = alloca %large, align 1
66+
%ptrLarge4 = getelementptr inbounds %large, ptr %large4, i64 0, i64 0
67+
%large5 = alloca %large, align 1
68+
%ptrLarge5 = getelementptr inbounds %large, ptr %large5, i64 0, i64 0
69+
%ret1 = call ptr @baz(ptr %ptrLarge1, ptr %ptrLarge2, ptr %ptrLarge3, ptr %ptrLarge4)
70+
%large6 = alloca %large, align 1
71+
%ptrLarge6 = getelementptr inbounds %large, ptr %large6, i64 0, i64 0
72+
%ret2 = call ptr @baz(ptr %ptrLarge6, ptr %ptrLarge2, ptr %ptrLarge3, ptr %ptrLarge4)
73+
ret ptr %ret1
74+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64 -O=0 -verify-machineinstrs | FileCheck %s
3+
@G = global i8 0
4+
5+
; Regression test for PR113856 - incorrect FastISel assert
6+
7+
define i32 @main() {
8+
; CHECK-LABEL: main:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: movabsq $-2147483652, %rax # imm = 0xFFFFFFFF7FFFFFFC
11+
; CHECK-NEXT: movl $0, (%rsp,%rax)
12+
; CHECK-NEXT: xorl %eax, %eax
13+
; CHECK-NEXT: retq
14+
%1 = alloca i32, align 4
15+
%G = getelementptr i8, ptr %1, i32 -2147483648
16+
store i32 0, ptr %G, align 4
17+
ret i32 0
18+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
; RUN: not llc < %s -mtriple=i686 -filetype=null -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=ERR-i686
2+
; RUN: llc < %s -mtriple=x86_64 -verify-machineinstrs | FileCheck %s -check-prefix=x86_64
3+
4+
; Regression test for #121932, #113856, #106352, #69365, #25051 which are caused by
5+
; an incorrectly written assertion for 64-bit offsets when compiling for 32-bit X86.
6+
7+
define i32 @main() #0 {
8+
; ERR-i686: error: <unknown>:0:0: 64-bit offset calculated but target is 32-bit
9+
;
10+
; x86_64-LABEL: main:
11+
; x86_64: # %bb.0: # %entry
12+
; x86_64-NEXT: movl $4294967192, %eax # imm = 0xFFFFFF98
13+
; x86_64-NEXT: subq %rax, %rsp
14+
; x86_64-NEXT: .cfi_def_cfa_offset 4294967200
15+
; x86_64-NEXT: movabsq $3221225318, %rax # imm = 0xBFFFFF66
16+
; x86_64-NEXT: movb $32, (%rsp,%rax)
17+
; x86_64-NEXT: movb $33, 2147483494(%rsp)
18+
; x86_64-NEXT: movb $34, 1073741670(%rsp)
19+
; x86_64-NEXT: movb $35, -154(%rsp)
20+
; x86_64-NEXT: xorl %eax, %eax
21+
; x86_64-NEXT: movl $4294967192, %ecx # imm = 0xFFFFFF98
22+
; x86_64-NEXT: addq %rcx, %rsp
23+
; x86_64-NEXT: .cfi_def_cfa_offset 8
24+
; x86_64-NEXT: retq
25+
entry:
26+
%a = alloca [1073741824 x i8], align 16
27+
%b = alloca [1073741824 x i8], align 16
28+
%c = alloca [1073741824 x i8], align 16
29+
%d = alloca [1073741824 x i8], align 16
30+
31+
%arrayida = getelementptr inbounds [1073741824 x i8], ptr %a, i64 0, i64 -42
32+
%arrayidb = getelementptr inbounds [1073741824 x i8], ptr %b, i64 0, i64 -42
33+
%arrayidc = getelementptr inbounds [1073741824 x i8], ptr %c, i64 0, i64 -42
34+
%arrayidd = getelementptr inbounds [1073741824 x i8], ptr %d, i64 0, i64 -42
35+
36+
store i8 32, ptr %arrayida, align 2
37+
store i8 33, ptr %arrayidb, align 2
38+
store i8 34, ptr %arrayidc, align 2
39+
store i8 35, ptr %arrayidd, align 2
40+
41+
ret i32 0
42+
}
43+
44+
; Same test as above but for an anonymous function.
45+
define i32 @0() #0 {
46+
; ERR-i686: error: <unknown>:0:0: 64-bit offset calculated but target is 32-bit
47+
;
48+
; x86_64-LABEL: __unnamed_1:
49+
; x86_64: # %bb.0: # %entry
50+
; x86_64-NEXT: movl $4294967192, %eax # imm = 0xFFFFFF98
51+
; x86_64-NEXT: subq %rax, %rsp
52+
; x86_64-NEXT: .cfi_def_cfa_offset 4294967200
53+
; x86_64-NEXT: movabsq $3221225318, %rax # imm = 0xBFFFFF66
54+
; x86_64-NEXT: movb $32, (%rsp,%rax)
55+
; x86_64-NEXT: movb $33, 2147483494(%rsp)
56+
; x86_64-NEXT: movb $34, 1073741670(%rsp)
57+
; x86_64-NEXT: movb $35, -154(%rsp)
58+
; x86_64-NEXT: xorl %eax, %eax
59+
; x86_64-NEXT: movl $4294967192, %ecx # imm = 0xFFFFFF98
60+
; x86_64-NEXT: addq %rcx, %rsp
61+
; x86_64-NEXT: .cfi_def_cfa_offset 8
62+
; x86_64-NEXT: retq
63+
entry:
64+
%a = alloca [1073741824 x i8], align 16
65+
%b = alloca [1073741824 x i8], align 16
66+
%c = alloca [1073741824 x i8], align 16
67+
%d = alloca [1073741824 x i8], align 16
68+
69+
%arrayida = getelementptr inbounds [1073741824 x i8], ptr %a, i64 0, i64 -42
70+
%arrayidb = getelementptr inbounds [1073741824 x i8], ptr %b, i64 0, i64 -42
71+
%arrayidc = getelementptr inbounds [1073741824 x i8], ptr %c, i64 0, i64 -42
72+
%arrayidd = getelementptr inbounds [1073741824 x i8], ptr %d, i64 0, i64 -42
73+
74+
store i8 32, ptr %arrayida, align 2
75+
store i8 33, ptr %arrayidb, align 2
76+
store i8 34, ptr %arrayidc, align 2
77+
store i8 35, ptr %arrayidd, align 2
78+
79+
ret i32 0
80+
}
81+
82+
attributes #0 = { optnone noinline }

llvm/test/CodeGen/X86/merge-huge-sp-updates.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ entry:
2222
call void @bar(i64 0, i64 0, i64 0, i64 0, i64 0, ptr null, ptr %rhs, ptr null, ptr %rhs)
2323
; CHECK: call{{.*}}bar
2424
; CHECK: addq{{.*}}$2147483647, %rsp
25-
; CHECK: addq{{.*}}$372037585, %rsp
26-
; CHECK: .cfi_adjust_cfa_offset -2519521232
25+
; CHECK: addq{{.*}}$372037601, %rsp
26+
; CHECK: .cfi_adjust_cfa_offset -2519521248
2727
ret void
2828
}
2929

0 commit comments

Comments
 (0)