-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AArch64][SVE] Tweak how SVE CFI expressions are emitted #151677
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The main change in this patch is we go from emitting the expression: @ cfa - NumBytes - NumScalableBytes * VG To: @ cfa - VG * NumScalableBytes - NumBytes That is, VG is the first expression. This is for a future patch that adds an alternative way to resolve VG (which uses the CFA, so it is convenient for the CFA to be at the top of the stack). Since doing this is fairly churn-heavy, I took the opportunity to also save up to 4-bytes per SVE CFI expression. This is done by folding LEB128 constants to literals when in the range 0 to 31, and using the offset in `DW_OP_breg*` expressions.
|
The test changes look huge, but most tests have not changed in a meaningful way (as it's only the bytes in the |
|
@llvm/pr-subscribers-backend-aarch64 Author: Benjamin Maxwell (MacDue) ChangesThe main change in this patch is we go from emitting the expression: @ cfa - NumBytes - NumScalableBytes * VG To: @ cfa - VG * NumScalableBytes - NumBytes That is, VG is the first expression. This is for a future patch that adds an alternative way to resolve VG (which uses the CFA, so it is convenient for the CFA to be at the top of the stack). Since doing this is fairly churn-heavy, I took the opportunity to also save up to 4-bytes per SVE CFI expression. This is done by folding LEB128 constants to literals when in the range 0 to 31, and using the offset in Patch is 336.69 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151677.diff 40 Files Affected:
diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index ce789cc49f295..6102c1dc1b952 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -221,6 +221,23 @@ inline uint64_t decodeULEB128AndIncUnsafe(const uint8_t *&p) {
return decodeULEB128AndInc(p, nullptr);
}
+enum class LEB128Sign { Unsigned, Signed };
+
+template <LEB128Sign Sign, typename T, typename U = char,
+ unsigned MaxLEB128SizeBytes = 16>
+inline void appendLEB128(SmallVectorImpl<U> &Buffer, T Value) {
+ static_assert(sizeof(U) == 1, "Expected buffer of bytes");
+ unsigned LEB128ValueSize;
+ U TmpBuffer[MaxLEB128SizeBytes];
+ if constexpr (Sign == LEB128Sign::Signed)
+ LEB128ValueSize =
+ encodeSLEB128(Value, reinterpret_cast<uint8_t *>(TmpBuffer));
+ else
+ LEB128ValueSize =
+ encodeULEB128(Value, reinterpret_cast<uint8_t *>(TmpBuffer));
+ Buffer.append(TmpBuffer, TmpBuffer + LEB128ValueSize);
+}
+
/// Utility function to get the size of the ULEB128-encoded value.
LLVM_ABI extern unsigned getULEB128Size(uint64_t Value);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 59d4fd26f6f91..baf249d2c87ca 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -5861,33 +5861,41 @@ void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
}
}
-// Convenience function to create a DWARF expression for
-// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
-static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
- int NumVGScaledBytes, unsigned VG,
- llvm::raw_string_ostream &Comment) {
- uint8_t buffer[16];
-
- if (NumBytes) {
+// Convenience function to create a DWARF expression for `Op` Value.
+// This helper emits compact sequences for common cases.
+static void appendConstantExpr(SmallVectorImpl<char> &Expr, int64_t Value,
+ uint8_t Op) {
+ // + -constant (<= 31)
+ if (Op == dwarf::DW_OP_plus && Value < 0 && -Value <= 31) {
+ Expr.push_back(dwarf::DW_OP_lit0 - Value);
+ Op = dwarf::DW_OP_minus;
+ } else if (Value >= 0 && Value <= 31) {
+ // `Op` literal value 0 to 31
+ Expr.push_back(dwarf::DW_OP_lit0 + Value);
+ } else {
+ // `Op` constant
Expr.push_back(dwarf::DW_OP_consts);
- Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
- Expr.push_back((uint8_t)dwarf::DW_OP_plus);
- Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
+ appendLEB128<LEB128Sign::Signed>(Expr, Value);
}
+ return Expr.push_back(Op);
+}
- if (NumVGScaledBytes) {
- Expr.push_back((uint8_t)dwarf::DW_OP_consts);
- Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
-
- Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
- Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
- Expr.push_back(0);
-
- Expr.push_back((uint8_t)dwarf::DW_OP_mul);
- Expr.push_back((uint8_t)dwarf::DW_OP_plus);
+// Convenience function to create a DWARF expression for VG
+static void appendReadVGRegExpr(SmallVectorImpl<char> &Expr,
+ unsigned VGRegNum) {
+ Expr.push_back(dwarf::DW_OP_bregx);
+ appendLEB128<LEB128Sign::Unsigned>(Expr, VGRegNum);
+ Expr.push_back(0);
+}
- Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
- << std::abs(NumVGScaledBytes) << " * VG";
+// Convenience function to create a comment for
+// (+/-) NumBytes (* RegScale)?
+static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,
+ StringRef RegScale = {}) {
+ if (NumBytes) {
+ Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
+ if (!RegScale.empty())
+ Comment << " * " << RegScale;
}
}
@@ -5909,19 +5917,26 @@ static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
else
Comment << printReg(Reg, &TRI);
- // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG)
+ // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
SmallString<64> Expr;
unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
+ assert(DwarfReg >= 0 && DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
+ // Reg + NumBytes
Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
- Expr.push_back(0);
- appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
- TRI.getDwarfRegNum(AArch64::VG, true), Comment);
+ appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
+ appendOffsetComment(NumBytes, Comment);
+ if (NumVGScaledBytes) {
+ // + VG * NumVGScaledBytes
+ appendOffsetComment(NumVGScaledBytes, Comment, "VG");
+ appendReadVGRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));
+ appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);
+ Expr.push_back(dwarf::DW_OP_plus);
+ }
// Wrap this into DW_CFA_def_cfa.
SmallString<64> DefCfaExpr;
DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
- uint8_t buffer[16];
- DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
+ appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());
DefCfaExpr.append(Expr.str());
return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
Comment.str());
@@ -5958,17 +5973,25 @@ MCCFIInstruction llvm::createCFAOffset(const TargetRegisterInfo &TRI,
llvm::raw_string_ostream Comment(CommentBuffer);
Comment << printReg(Reg, &TRI) << " @ cfa";
- // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
+ // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)
+ assert(NumVGScaledBytes && "Expected scalable offset");
SmallString<64> OffsetExpr;
- appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
- TRI.getDwarfRegNum(AArch64::VG, true), Comment);
+ // + VG * NumVGScaledBytes
+ appendOffsetComment(NumVGScaledBytes, Comment, "VG");
+ appendReadVGRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));
+ appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);
+ OffsetExpr.push_back(dwarf::DW_OP_plus);
+ if (NumBytes) {
+ // + NumBytes
+ appendOffsetComment(NumBytes, Comment);
+ appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);
+ }
// Wrap this into DW_CFA_expression
SmallString<64> CfaExpr;
CfaExpr.push_back(dwarf::DW_CFA_expression);
- uint8_t buffer[16];
- CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
- CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
+ appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);
+ appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());
CfaExpr.append(OffsetExpr.str());
return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
index 3a808f5a02f0d..dd018a659d1b8 100644
--- a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
+++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
@@ -11,7 +11,7 @@ define void @array_1D(ptr %addr) #0 {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-3
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ldr z1, [x0, #2, mul vl]
@@ -34,7 +34,7 @@ define %my_subtype @array_1D_extract(ptr %addr) #0 {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-3
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #3
@@ -52,7 +52,7 @@ define void @array_1D_insert(ptr %addr, %my_subtype %elt) #0 {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-3
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ldr z1, [x0, #2, mul vl]
; CHECK-NEXT: ldr z2, [x0]
@@ -75,7 +75,7 @@ define void @array_2D(ptr %addr) #0 {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-6
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 48 * VG
+; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x30, 0x1e, 0x22 // sp + 16 + 48 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ldr z1, [x0, #5, mul vl]
diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll
index e7d8f4ff39cee..be73dc91aac51 100644
--- a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll
+++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll
@@ -10,7 +10,7 @@ define void @test(ptr %addr) #0 {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-3
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ldr z1, [x0, #2, mul vl]
diff --git a/llvm/test/CodeGen/AArch64/fp8-sme2-cvtn.ll b/llvm/test/CodeGen/AArch64/fp8-sme2-cvtn.ll
index d1e0729db30e5..6a91d85a71baf 100644
--- a/llvm/test/CodeGen/AArch64/fp8-sme2-cvtn.ll
+++ b/llvm/test/CodeGen/AArch64/fp8-sme2-cvtn.ll
@@ -11,10 +11,10 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @cvtn_f16_tuple(i64 %stride, p
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str z11, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z10, [sp, #2, mul vl] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 8 * VG
-; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 16 * VG
+; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x40, 0x1c // $d10 @ cfa - 8 * VG - 16
+; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x40, 0x1c // $d11 @ cfa - 16 * VG - 16
; CHECK-NEXT: ptrue pn8.b
; CHECK-NEXT: add x8, x1, x0
; CHECK-NEXT: ld1h { z2.h, z10.h }, pn8/z, [x1]
@@ -52,10 +52,10 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @cvtnt_f32_tuple(i64 %stride,
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str z11, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z10, [sp, #2, mul vl] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 8 * VG
-; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 16 * VG
+; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x40, 0x1c // $d10 @ cfa - 8 * VG - 16
+; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x40, 0x1c // $d11 @ cfa - 16 * VG - 16
; CHECK-NEXT: ptrue pn8.b
; CHECK-NEXT: add x8, x1, x0
; CHECK-NEXT: mov z1.d, z0.d
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir
index aed3145073619..e970d8339d792 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir
@@ -9,16 +9,16 @@
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: addvl sp, sp, #-2
- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
- ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
+ ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x40, 0x1c // $d8 @ cfa - 8 * VG - 16
; CHECK-NEXT: addvl sp, sp, #-1
- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: // implicit-def: $z8
; CHECK-NEXT: // implicit-def: $p4
; CHECK-NEXT: addvl sp, sp, #1
- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #2
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
index 17b1ad2197c46..03a6aabffaaf1 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
@@ -64,7 +64,7 @@
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 32
@@ -79,7 +79,8 @@
# ASM: .cfi_def_cfa_offset 16
# ASM-NEXT: .cfi_offset w29, -16
# ASM: .cfi_def_cfa_offset 32
-# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG
+# ASM: .cfi_escape
+# ASM-SAME: // sp + 32 + 16 * VG
# ASM: .cfi_def_cfa wsp, 32
# ASM: .cfi_def_cfa_offset 16
# ASM: .cfi_def_cfa_offset 0
@@ -88,8 +89,8 @@
#
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
-# UNWINDINFO: DW_CFA_def_cfa_offset: +32
-# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_offset: +32
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +32, DW_OP_bregx 0x2e +0, DW_OP_lit16, DW_OP_mul, DW_OP_plus
# UNWINDINFO: DW_CFA_def_cfa: reg31 +32
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
# UNWINDINFO: DW_CFA_def_cfa_offset: +0
@@ -129,7 +130,7 @@ body: |
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 48
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape
#
# CHECK-NEXT: $x20 = IMPLICIT_DEF
@@ -152,7 +153,8 @@ body: |
# ASM-NEXT: .cfi_offset w21, -16
# ASM-NEXT: .cfi_offset w29, -32
# ASM: .cfi_def_cfa_offset 48
-# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 16 * VG
+# ASM: .cfi_escape
+# ASM-SAME: // sp + 48 + 16 * VG
#
# ASM: .cfi_def_cfa wsp, 48
# ASM: .cfi_def_cfa_offset 32
@@ -166,9 +168,8 @@ body: |
# UNWINDINFO: DW_CFA_offset: reg20 -8
# UNWINDINFO-NEXT: DW_CFA_offset: reg21 -16
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32
-# UNWINDINFO: DW_CFA_def_cfa_offset: +48
-# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
-#
+# UNWINDINFO: DW_CFA_def_cfa_offset: +48
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +48, DW_OP_bregx 0x2e +0, DW_OP_lit16, DW_OP_mul, DW_OP_plus
# UNWINDINFO: DW_CFA_def_cfa: reg31 +48
# UNWINDINFO: DW_CFA_def_cfa_offset: +32
# UNWINDINFO: DW_CFA_def_cfa_offset: +0
@@ -272,7 +273,7 @@ body: |
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape
# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
# CHECK-NEXT: STR_ZXI $z0, killed $[[TMP]], 2
@@ -295,7 +296,8 @@ body: |
# ASM: .cfi_def_cfa_offset 16
# ASM-NEXT: .cfi_offset w29, -16
# ASM: .cfi_def_cfa_offset 32
-# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 24 * VG
+# ASM: .cfi_escape
+# ASM-SAME: // sp + 32 + 24 * VG
#
# ASM: .cfi_def_cfa wsp, 32
# ASM: .cfi_def_cfa_offset 16
@@ -305,7 +307,7 @@ body: |
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
# UNWINDINFO: DW_CFA_def_cfa_offset: +32
-# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +32, DW_OP_bregx 0x2e +0, DW_OP_lit24, DW_OP_mul, DW_OP_plus
#
# UNWINDINFO: DW_CFA_def_cfa: reg31 +32
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
@@ -434,7 +436,7 @@ body: |
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape
# CHECK: $[[TMP:x[0-9]+]] = ADDVL_XXI $sp, 1
# CHECK-NEXT: $x0 = LDRXui killed $[[TMP]], 4
@@ -451,7 +453,8 @@ body: |
# ASM: .cfi_def_cfa_offset 16
# ASM-NEXT: .cfi_offset w29, -16
# ASM: .cfi_def_cfa_offset 32
-# ...
[truncated]
|
|
@llvm/pr-subscribers-llvm-support Author: Benjamin Maxwell (MacDue) ChangesThe main change in this patch is we go from emitting the expression: @ cfa - NumBytes - NumScalableBytes * VG To: @ cfa - VG * NumScalableBytes - NumBytes That is, VG is the first expression. This is for a future patch that adds an alternative way to resolve VG (which uses the CFA, so it is convenient for the CFA to be at the top of the stack). Since doing this is fairly churn-heavy, I took the opportunity to also save up to 4-bytes per SVE CFI expression. This is done by folding LEB128 constants to literals when in the range 0 to 31, and using the offset in Patch is 336.69 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151677.diff 40 Files Affected:
diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index ce789cc49f295..6102c1dc1b952 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -221,6 +221,23 @@ inline uint64_t decodeULEB128AndIncUnsafe(const uint8_t *&p) {
return decodeULEB128AndInc(p, nullptr);
}
+enum class LEB128Sign { Unsigned, Signed };
+
+template <LEB128Sign Sign, typename T, typename U = char,
+ unsigned MaxLEB128SizeBytes = 16>
+inline void appendLEB128(SmallVectorImpl<U> &Buffer, T Value) {
+ static_assert(sizeof(U) == 1, "Expected buffer of bytes");
+ unsigned LEB128ValueSize;
+ U TmpBuffer[MaxLEB128SizeBytes];
+ if constexpr (Sign == LEB128Sign::Signed)
+ LEB128ValueSize =
+ encodeSLEB128(Value, reinterpret_cast<uint8_t *>(TmpBuffer));
+ else
+ LEB128ValueSize =
+ encodeULEB128(Value, reinterpret_cast<uint8_t *>(TmpBuffer));
+ Buffer.append(TmpBuffer, TmpBuffer + LEB128ValueSize);
+}
+
/// Utility function to get the size of the ULEB128-encoded value.
LLVM_ABI extern unsigned getULEB128Size(uint64_t Value);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 59d4fd26f6f91..baf249d2c87ca 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -5861,33 +5861,41 @@ void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
}
}
-// Convenience function to create a DWARF expression for
-// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
-static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
- int NumVGScaledBytes, unsigned VG,
- llvm::raw_string_ostream &Comment) {
- uint8_t buffer[16];
-
- if (NumBytes) {
+// Convenience function to create a DWARF expression for `Op` Value.
+// This helper emits compact sequences for common cases.
+static void appendConstantExpr(SmallVectorImpl<char> &Expr, int64_t Value,
+ uint8_t Op) {
+ // + -constant (<= 31)
+ if (Op == dwarf::DW_OP_plus && Value < 0 && -Value <= 31) {
+ Expr.push_back(dwarf::DW_OP_lit0 - Value);
+ Op = dwarf::DW_OP_minus;
+ } else if (Value >= 0 && Value <= 31) {
+ // `Op` literal value 0 to 31
+ Expr.push_back(dwarf::DW_OP_lit0 + Value);
+ } else {
+ // `Op` constant
Expr.push_back(dwarf::DW_OP_consts);
- Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
- Expr.push_back((uint8_t)dwarf::DW_OP_plus);
- Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
+ appendLEB128<LEB128Sign::Signed>(Expr, Value);
}
+ return Expr.push_back(Op);
+}
- if (NumVGScaledBytes) {
- Expr.push_back((uint8_t)dwarf::DW_OP_consts);
- Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
-
- Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
- Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
- Expr.push_back(0);
-
- Expr.push_back((uint8_t)dwarf::DW_OP_mul);
- Expr.push_back((uint8_t)dwarf::DW_OP_plus);
+// Convenience function to create a DWARF expression for VG
+static void appendReadVGRegExpr(SmallVectorImpl<char> &Expr,
+ unsigned VGRegNum) {
+ Expr.push_back(dwarf::DW_OP_bregx);
+ appendLEB128<LEB128Sign::Unsigned>(Expr, VGRegNum);
+ Expr.push_back(0);
+}
- Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
- << std::abs(NumVGScaledBytes) << " * VG";
+// Convenience function to create a comment for
+// (+/-) NumBytes (* RegScale)?
+static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,
+ StringRef RegScale = {}) {
+ if (NumBytes) {
+ Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
+ if (!RegScale.empty())
+ Comment << " * " << RegScale;
}
}
@@ -5909,19 +5917,26 @@ static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
else
Comment << printReg(Reg, &TRI);
- // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG)
+ // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
SmallString<64> Expr;
unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
+ assert(DwarfReg >= 0 && DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
+ // Reg + NumBytes
Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
- Expr.push_back(0);
- appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
- TRI.getDwarfRegNum(AArch64::VG, true), Comment);
+ appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
+ appendOffsetComment(NumBytes, Comment);
+ if (NumVGScaledBytes) {
+ // + VG * NumVGScaledBytes
+ appendOffsetComment(NumVGScaledBytes, Comment, "VG");
+ appendReadVGRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));
+ appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);
+ Expr.push_back(dwarf::DW_OP_plus);
+ }
// Wrap this into DW_CFA_def_cfa.
SmallString<64> DefCfaExpr;
DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
- uint8_t buffer[16];
- DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
+ appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());
DefCfaExpr.append(Expr.str());
return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
Comment.str());
@@ -5958,17 +5973,25 @@ MCCFIInstruction llvm::createCFAOffset(const TargetRegisterInfo &TRI,
llvm::raw_string_ostream Comment(CommentBuffer);
Comment << printReg(Reg, &TRI) << " @ cfa";
- // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
+ // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)
+ assert(NumVGScaledBytes && "Expected scalable offset");
SmallString<64> OffsetExpr;
- appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
- TRI.getDwarfRegNum(AArch64::VG, true), Comment);
+ // + VG * NumVGScaledBytes
+ appendOffsetComment(NumVGScaledBytes, Comment, "VG");
+ appendReadVGRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));
+ appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);
+ OffsetExpr.push_back(dwarf::DW_OP_plus);
+ if (NumBytes) {
+ // + NumBytes
+ appendOffsetComment(NumBytes, Comment);
+ appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);
+ }
// Wrap this into DW_CFA_expression
SmallString<64> CfaExpr;
CfaExpr.push_back(dwarf::DW_CFA_expression);
- uint8_t buffer[16];
- CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
- CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
+ appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);
+ appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());
CfaExpr.append(OffsetExpr.str());
return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
index 3a808f5a02f0d..dd018a659d1b8 100644
--- a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
+++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
@@ -11,7 +11,7 @@ define void @array_1D(ptr %addr) #0 {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-3
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ldr z1, [x0, #2, mul vl]
@@ -34,7 +34,7 @@ define %my_subtype @array_1D_extract(ptr %addr) #0 {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-3
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ldr z0, [x0, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #3
@@ -52,7 +52,7 @@ define void @array_1D_insert(ptr %addr, %my_subtype %elt) #0 {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-3
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ldr z1, [x0, #2, mul vl]
; CHECK-NEXT: ldr z2, [x0]
@@ -75,7 +75,7 @@ define void @array_2D(ptr %addr) #0 {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-6
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 48 * VG
+; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x30, 0x1e, 0x22 // sp + 16 + 48 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ldr z1, [x0, #5, mul vl]
diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll
index e7d8f4ff39cee..be73dc91aac51 100644
--- a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll
+++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll
@@ -10,7 +10,7 @@ define void @test(ptr %addr) #0 {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-3
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ldr z0, [x0]
; CHECK-NEXT: ldr z1, [x0, #2, mul vl]
diff --git a/llvm/test/CodeGen/AArch64/fp8-sme2-cvtn.ll b/llvm/test/CodeGen/AArch64/fp8-sme2-cvtn.ll
index d1e0729db30e5..6a91d85a71baf 100644
--- a/llvm/test/CodeGen/AArch64/fp8-sme2-cvtn.ll
+++ b/llvm/test/CodeGen/AArch64/fp8-sme2-cvtn.ll
@@ -11,10 +11,10 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @cvtn_f16_tuple(i64 %stride, p
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str z11, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z10, [sp, #2, mul vl] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 8 * VG
-; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 16 * VG
+; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x40, 0x1c // $d10 @ cfa - 8 * VG - 16
+; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x40, 0x1c // $d11 @ cfa - 16 * VG - 16
; CHECK-NEXT: ptrue pn8.b
; CHECK-NEXT: add x8, x1, x0
; CHECK-NEXT: ld1h { z2.h, z10.h }, pn8/z, [x1]
@@ -52,10 +52,10 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @cvtnt_f32_tuple(i64 %stride,
; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str z11, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z10, [sp, #2, mul vl] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 8 * VG
-; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 16 * VG
+; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x40, 0x1c // $d10 @ cfa - 8 * VG - 16
+; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x40, 0x1c // $d11 @ cfa - 16 * VG - 16
; CHECK-NEXT: ptrue pn8.b
; CHECK-NEXT: add x8, x1, x0
; CHECK-NEXT: mov z1.d, z0.d
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir
index aed3145073619..e970d8339d792 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-calleesaves-fix.mir
@@ -9,16 +9,16 @@
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: addvl sp, sp, #-2
- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
- ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
+ ; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x09, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x40, 0x1c // $d8 @ cfa - 8 * VG - 16
; CHECK-NEXT: addvl sp, sp, #-1
- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
; CHECK-NEXT: // implicit-def: $z8
; CHECK-NEXT: // implicit-def: $p4
; CHECK-NEXT: addvl sp, sp, #1
- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #2
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
index 17b1ad2197c46..03a6aabffaaf1 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
@@ -64,7 +64,7 @@
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2
# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 32
@@ -79,7 +79,8 @@
# ASM: .cfi_def_cfa_offset 16
# ASM-NEXT: .cfi_offset w29, -16
# ASM: .cfi_def_cfa_offset 32
-# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG
+# ASM: .cfi_escape
+# ASM-SAME: // sp + 32 + 16 * VG
# ASM: .cfi_def_cfa wsp, 32
# ASM: .cfi_def_cfa_offset 16
# ASM: .cfi_def_cfa_offset 0
@@ -88,8 +89,8 @@
#
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
-# UNWINDINFO: DW_CFA_def_cfa_offset: +32
-# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_offset: +32
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +32, DW_OP_bregx 0x2e +0, DW_OP_lit16, DW_OP_mul, DW_OP_plus
# UNWINDINFO: DW_CFA_def_cfa: reg31 +32
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
# UNWINDINFO: DW_CFA_def_cfa_offset: +0
@@ -129,7 +130,7 @@ body: |
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 48
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape
#
# CHECK-NEXT: $x20 = IMPLICIT_DEF
@@ -152,7 +153,8 @@ body: |
# ASM-NEXT: .cfi_offset w21, -16
# ASM-NEXT: .cfi_offset w29, -32
# ASM: .cfi_def_cfa_offset 48
-# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 16 * VG
+# ASM: .cfi_escape
+# ASM-SAME: // sp + 48 + 16 * VG
#
# ASM: .cfi_def_cfa wsp, 48
# ASM: .cfi_def_cfa_offset 32
@@ -166,9 +168,8 @@ body: |
# UNWINDINFO: DW_CFA_offset: reg20 -8
# UNWINDINFO-NEXT: DW_CFA_offset: reg21 -16
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32
-# UNWINDINFO: DW_CFA_def_cfa_offset: +48
-# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
-#
+# UNWINDINFO: DW_CFA_def_cfa_offset: +48
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +48, DW_OP_bregx 0x2e +0, DW_OP_lit16, DW_OP_mul, DW_OP_plus
# UNWINDINFO: DW_CFA_def_cfa: reg31 +48
# UNWINDINFO: DW_CFA_def_cfa_offset: +32
# UNWINDINFO: DW_CFA_def_cfa_offset: +0
@@ -272,7 +273,7 @@ body: |
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape
# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
# CHECK-NEXT: STR_ZXI $z0, killed $[[TMP]], 2
@@ -295,7 +296,8 @@ body: |
# ASM: .cfi_def_cfa_offset 16
# ASM-NEXT: .cfi_offset w29, -16
# ASM: .cfi_def_cfa_offset 32
-# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 24 * VG
+# ASM: .cfi_escape
+# ASM-SAME: // sp + 32 + 24 * VG
#
# ASM: .cfi_def_cfa wsp, 32
# ASM: .cfi_def_cfa_offset 16
@@ -305,7 +307,7 @@ body: |
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
# UNWINDINFO: DW_CFA_def_cfa_offset: +32
-# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +32, DW_OP_bregx 0x2e +0, DW_OP_lit24, DW_OP_mul, DW_OP_plus
#
# UNWINDINFO: DW_CFA_def_cfa: reg31 +32
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
@@ -434,7 +436,7 @@ body: |
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
-# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22
+# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape
# CHECK: $[[TMP:x[0-9]+]] = ADDVL_XXI $sp, 1
# CHECK-NEXT: $x0 = LDRXui killed $[[TMP]], 4
@@ -451,7 +453,8 @@ body: |
# ASM: .cfi_def_cfa_offset 16
# ASM-NEXT: .cfi_offset w29, -16
# ASM: .cfi_def_cfa_offset 32
-# ...
[truncated]
|
gbossu
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks sensible to me, I'm just not comfortable approving (yet?) as I haven't checked all the computations in detail.
Super-nit: It would be even clearer to have a pure NFC PR with the refactorings, followed by another one with the optimisation for small constants.
| if (Op == dwarf::DW_OP_plus && Value < 0 && -Value <= 31) { | ||
| Expr.push_back(dwarf::DW_OP_lit0 - Value); | ||
| Op = dwarf::DW_OP_minus; | ||
| } else if (Value >= 0 && Value <= 31) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't you still need to check Op == dwarf::DW_OP_plus?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using DW_OP_lit* does not require the operation to be a plus
| Expr.push_back((uint8_t)dwarf::DW_OP_plus); | ||
| // Convenience function to create a DWARF expression for VG | ||
| static void appendReadVGRegExpr(SmallVectorImpl<char> &Expr, | ||
| unsigned VGRegNum) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Curious: Why not just appendVGExpr? Does a "write" expression exist?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In my next patch, I will add appendLoadIncomingVGExpr. I think the more explicit name here makes the distinction more apparent.
I'm not sure it's worthwhile, it'll result in two PRs with a similar amount of churn (both needing about the same level of review -- checking the before/after expressions in |
The main change in this patch is we go from emitting the expression:
@ cfa - NumBytes - NumScalableBytes * VG
To:
@ cfa - VG * NumScalableBytes - NumBytes
That is, VG is the first expression. This is for a future patch that adds an alternative way to resolve VG (which uses the CFA, so it is convenient for the CFA to be at the top of the stack).
Since doing this is fairly churn-heavy, I took the opportunity to also save up to 4-bytes per SVE CFI expression. This is done by folding LEB128 constants to literals when in the range 0 to 31, and using the offset in
DW_OP_breg*expressions.