Skip to content

Commit ff5fa71

Browse files
authored
[AArch64][SVE] Tweak how SVE CFI expressions are emitted (#151677)
The main change in this patch is we go from emitting the expression: @ cfa - NumBytes - NumScalableBytes * VG To: @ cfa - VG * NumScalableBytes - NumBytes That is, VG is the first expression. This is for a future patch that adds an alternative way to resolve VG (which uses the CFA, so it is convenient for the CFA to be at the top of the stack). Since doing this is fairly churn-heavy, I took the opportunity to also save up to 4-bytes per SVE CFI expression. This is done by folding LEB128 constants to literals when in the range 0 to 31, and using the offset in `DW_OP_breg*` expressions.
1 parent 753885e commit ff5fa71

40 files changed

+1018
-924
lines changed

llvm/include/llvm/Support/LEB128.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,23 @@ inline uint64_t decodeULEB128AndIncUnsafe(const uint8_t *&p) {
221221
return decodeULEB128AndInc(p, nullptr);
222222
}
223223

224+
enum class LEB128Sign { Unsigned, Signed };
225+
226+
template <LEB128Sign Sign, typename T, typename U = char,
227+
unsigned MaxLEB128SizeBytes = 16>
228+
inline void appendLEB128(SmallVectorImpl<U> &Buffer, T Value) {
229+
static_assert(sizeof(U) == 1, "Expected buffer of bytes");
230+
unsigned LEB128ValueSize;
231+
U TmpBuffer[MaxLEB128SizeBytes];
232+
if constexpr (Sign == LEB128Sign::Signed)
233+
LEB128ValueSize =
234+
encodeSLEB128(Value, reinterpret_cast<uint8_t *>(TmpBuffer));
235+
else
236+
LEB128ValueSize =
237+
encodeULEB128(Value, reinterpret_cast<uint8_t *>(TmpBuffer));
238+
Buffer.append(TmpBuffer, TmpBuffer + LEB128ValueSize);
239+
}
240+
224241
/// Utility function to get the size of the ULEB128-encoded value.
225242
LLVM_ABI extern unsigned getULEB128Size(uint64_t Value);
226243

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 59 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5861,33 +5861,41 @@ void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
58615861
}
58625862
}
58635863

5864-
// Convenience function to create a DWARF expression for
5865-
// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
5866-
static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
5867-
int NumVGScaledBytes, unsigned VG,
5868-
llvm::raw_string_ostream &Comment) {
5869-
uint8_t buffer[16];
5870-
5871-
if (NumBytes) {
5864+
// Convenience function to create a DWARF expression for: Constant `Operation`.
5865+
// This helper emits compact sequences for common cases. For example, for`-15
5866+
// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus.
5867+
static void appendConstantExpr(SmallVectorImpl<char> &Expr, int64_t Constant,
5868+
dwarf::LocationAtom Operation) {
5869+
if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) {
5870+
// -Constant (1 to 31)
5871+
Expr.push_back(dwarf::DW_OP_lit0 - Constant);
5872+
Operation = dwarf::DW_OP_minus;
5873+
} else if (Constant >= 0 && Constant <= 31) {
5874+
// Literal value 0 to 31
5875+
Expr.push_back(dwarf::DW_OP_lit0 + Constant);
5876+
} else {
5877+
// Signed constant
58725878
Expr.push_back(dwarf::DW_OP_consts);
5873-
Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
5874-
Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5875-
Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
5879+
appendLEB128<LEB128Sign::Signed>(Expr, Constant);
58765880
}
5881+
return Expr.push_back(Operation);
5882+
}
58775883

5878-
if (NumVGScaledBytes) {
5879-
Expr.push_back((uint8_t)dwarf::DW_OP_consts);
5880-
Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
5881-
5882-
Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
5883-
Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
5884-
Expr.push_back(0);
5885-
5886-
Expr.push_back((uint8_t)dwarf::DW_OP_mul);
5887-
Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5884+
// Convenience function to create a DWARF expression for a register.
5885+
static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) {
5886+
Expr.push_back(dwarf::DW_OP_bregx);
5887+
appendLEB128<LEB128Sign::Unsigned>(Expr, RegNum);
5888+
Expr.push_back(0);
5889+
}
58885890

5889-
Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
5890-
<< std::abs(NumVGScaledBytes) << " * VG";
5891+
// Convenience function to create a comment for
5892+
// (+/-) NumBytes (* RegScale)?
5893+
static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,
5894+
StringRef RegScale = {}) {
5895+
if (NumBytes) {
5896+
Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
5897+
if (!RegScale.empty())
5898+
Comment << ' ' << RegScale;
58915899
}
58925900
}
58935901

@@ -5909,19 +5917,26 @@ static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
59095917
else
59105918
Comment << printReg(Reg, &TRI);
59115919

5912-
// Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG)
5920+
// Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
59135921
SmallString<64> Expr;
59145922
unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5915-
Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
5916-
Expr.push_back(0);
5917-
appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
5918-
TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5923+
assert(DwarfReg >= 0 && DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
5924+
// Reg + NumBytes
5925+
Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg);
5926+
appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
5927+
appendOffsetComment(NumBytes, Comment);
5928+
if (NumVGScaledBytes) {
5929+
// + VG * NumVGScaledBytes
5930+
appendOffsetComment(NumVGScaledBytes, Comment, "* VG");
5931+
appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));
5932+
appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);
5933+
Expr.push_back(dwarf::DW_OP_plus);
5934+
}
59195935

59205936
// Wrap this into DW_CFA_def_cfa.
59215937
SmallString<64> DefCfaExpr;
59225938
DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
5923-
uint8_t buffer[16];
5924-
DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
5939+
appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());
59255940
DefCfaExpr.append(Expr.str());
59265941
return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
59275942
Comment.str());
@@ -5958,17 +5973,25 @@ MCCFIInstruction llvm::createCFAOffset(const TargetRegisterInfo &TRI,
59585973
llvm::raw_string_ostream Comment(CommentBuffer);
59595974
Comment << printReg(Reg, &TRI) << " @ cfa";
59605975

5961-
// Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
5976+
// Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)
5977+
assert(NumVGScaledBytes && "Expected scalable offset");
59625978
SmallString<64> OffsetExpr;
5963-
appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
5964-
TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5979+
// + VG * NumVGScaledBytes
5980+
appendOffsetComment(NumVGScaledBytes, Comment, "* VG");
5981+
appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));
5982+
appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);
5983+
OffsetExpr.push_back(dwarf::DW_OP_plus);
5984+
if (NumBytes) {
5985+
// + NumBytes
5986+
appendOffsetComment(NumBytes, Comment);
5987+
appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);
5988+
}
59655989

59665990
// Wrap this into DW_CFA_expression
59675991
SmallString<64> CfaExpr;
59685992
CfaExpr.push_back(dwarf::DW_CFA_expression);
5969-
uint8_t buffer[16];
5970-
CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
5971-
CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
5993+
appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);
5994+
appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());
59725995
CfaExpr.append(OffsetExpr.str());
59735996

59745997
return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),

llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define void @array_1D(ptr %addr) #0 {
1111
; CHECK: // %bb.0: // %entry
1212
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
1313
; CHECK-NEXT: addvl sp, sp, #-3
14-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
14+
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
1515
; CHECK-NEXT: .cfi_offset w29, -16
1616
; CHECK-NEXT: ldr z0, [x0]
1717
; CHECK-NEXT: ldr z1, [x0, #2, mul vl]
@@ -34,7 +34,7 @@ define %my_subtype @array_1D_extract(ptr %addr) #0 {
3434
; CHECK: // %bb.0: // %entry
3535
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
3636
; CHECK-NEXT: addvl sp, sp, #-3
37-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
37+
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
3838
; CHECK-NEXT: .cfi_offset w29, -16
3939
; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
4040
; CHECK-NEXT: addvl sp, sp, #3
@@ -52,7 +52,7 @@ define void @array_1D_insert(ptr %addr, %my_subtype %elt) #0 {
5252
; CHECK: // %bb.0: // %entry
5353
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
5454
; CHECK-NEXT: addvl sp, sp, #-3
55-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
55+
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
5656
; CHECK-NEXT: .cfi_offset w29, -16
5757
; CHECK-NEXT: ldr z1, [x0, #2, mul vl]
5858
; CHECK-NEXT: ldr z2, [x0]
@@ -75,7 +75,7 @@ define void @array_2D(ptr %addr) #0 {
7575
; CHECK: // %bb.0: // %entry
7676
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
7777
; CHECK-NEXT: addvl sp, sp, #-6
78-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 48 * VG
78+
; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x30, 0x1e, 0x22 // sp + 16 + 48 * VG
7979
; CHECK-NEXT: .cfi_offset w29, -16
8080
; CHECK-NEXT: ldr z0, [x0]
8181
; CHECK-NEXT: ldr z1, [x0, #5, mul vl]

llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ define void @test(ptr %addr) #0 {
1010
; CHECK: // %bb.0: // %entry
1111
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
1212
; CHECK-NEXT: addvl sp, sp, #-3
13-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
13+
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
1414
; CHECK-NEXT: .cfi_offset w29, -16
1515
; CHECK-NEXT: ldr z0, [x0]
1616
; CHECK-NEXT: ldr z1, [x0, #2, mul vl]

llvm/test/CodeGen/AArch64/fp8-sme2-cvtn.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @cvtn_f16_tuple(i64 %stride, p
1111
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
1212
; CHECK-NEXT: str z11, [sp, #1, mul vl] // 16-byte Folded Spill
1313
; CHECK-NEXT: str z10, [sp, #2, mul vl] // 16-byte Folded Spill
14-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
14+
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
1515
; CHECK-NEXT: .cfi_offset w29, -16
16-
; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 8 * VG
17-
; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 16 * VG
16+
; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x40, 0x1c // $d10 @ cfa - 8 * VG - 16
17+
; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x40, 0x1c // $d11 @ cfa - 16 * VG - 16
1818
; CHECK-NEXT: ptrue pn8.b
1919
; CHECK-NEXT: add x8, x1, x0
2020
; CHECK-NEXT: ld1h { z2.h, z10.h }, pn8/z, [x1]
@@ -52,10 +52,10 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @cvtnt_f32_tuple(i64 %stride,
5252
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
5353
; CHECK-NEXT: str z11, [sp, #1, mul vl] // 16-byte Folded Spill
5454
; CHECK-NEXT: str z10, [sp, #2, mul vl] // 16-byte Folded Spill
55-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
55+
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
5656
; CHECK-NEXT: .cfi_offset w29, -16
57-
; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 8 * VG
58-
; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 16 * VG
57+
; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x40, 0x1c // $d10 @ cfa - 8 * VG - 16
58+
; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x40, 0x1c // $d11 @ cfa - 16 * VG - 16
5959
; CHECK-NEXT: ptrue pn8.b
6060
; CHECK-NEXT: add x8, x1, x0
6161
; CHECK-NEXT: mov z1.d, z0.d

llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,16 @@
99
; CHECK-NEXT: .cfi_def_cfa_offset 16
1010
; CHECK-NEXT: .cfi_offset w29, -16
1111
; CHECK-NEXT: addvl sp, sp, #-2
12-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
12+
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
1313
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
1414
; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
15-
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
15+
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x40, 0x1c // $d8 @ cfa - 8 * VG - 16
1616
; CHECK-NEXT: addvl sp, sp, #-1
17-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
17+
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
1818
; CHECK-NEXT: // implicit-def: $z8
1919
; CHECK-NEXT: // implicit-def: $p4
2020
; CHECK-NEXT: addvl sp, sp, #1
21-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
21+
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
2222
; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
2323
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
2424
; CHECK-NEXT: addvl sp, sp, #2

0 commit comments

Comments
 (0)