llvm · PiJoules · May 15, 2025 · May 7, 2025 · pcc · Apr 21, 2025
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -298,7 +298,9 @@ wholeprogramdevirt::findLowestOffset(ArrayRef<VirtualCallTarget> Targets,
           ++Byte;
         }
       }
-      return (MinByte + I) * 8;
+      // Rounding up ensures the constant is always stored at address we
+      // can directly load from without misalignment.
+      return alignTo((MinByte + I) * 8, Size);
     NextI:;
     }
   }
@@ -1834,9 +1836,19 @@ bool DevirtModule::tryVirtualConstProp(
   if (!RetType)
     return false;
   unsigned BitWidth = RetType->getBitWidth();
+
+  // TODO: Since we can evaluated these constants at compile-time, we can save
+  // some space by calculating the smallest range of values that all these
+  // constants can fit in, then only allocate enough space to fit those values.
+  // At each callsite, we can get the original type by doing a sign/zero
+  // extension. For example, if we would store an i64, but we can see that all
+  // the values fit into an i16, then we can store an i16 before/after the
+  // vtable and at each callsite do a s/zext.
   if (BitWidth > 64)
     return false;
 
+  Align TypeAlignment = M.getDataLayout().getPrefTypeAlign(RetType);
+
   // Make sure that each function is defined, does not access memory, takes at
   // least one argument, does not use its first argument (which we assume is
   // 'this'), and has the same return type.
@@ -1861,6 +1873,18 @@ bool DevirtModule::tryVirtualConstProp(
         Fn->arg_empty() || !Fn->arg_begin()->use_empty() ||
         Fn->getReturnType() != RetType)
       return false;
+
+    // This only works if the integer size is at most the alignment of the
+    // vtable. If the table is underaligned, then we can't guarantee that the
+    // constant will always be aligned to the integer type alignment. For
+    // example, if the table is `align 1`, we can never guarantee that an i32
+    // stored before/after the vtable is 32-bit aligned without changing the
+    // alignment of the new global.
+    GlobalVariable *GV = Target.TM->Bits->GV;
+    Align TableAlignment = M.getDataLayout().getValueOrABITypeAlignment(
+        GV->getAlign(), GV->getValueType());
+    if (TypeAlignment > TableAlignment)
+      return false;
   }
 
   for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
@@ -1880,6 +1904,9 @@ bool DevirtModule::tryVirtualConstProp(
 
     // Find an allocation offset in bits in all vtables associated with the
     // type.
+    // TODO: If there would be "holes" in the vtable that were added by
+    // padding, we could place i1s there to reduce any extra padding that
+    // would be introduced by the i1s.
     uint64_t AllocBefore =
         findLowestOffset(TargetsForSlot, /*IsAfter=*/false, BitWidth);
     uint64_t AllocAfter =
@@ -1911,6 +1938,14 @@ bool DevirtModule::tryVirtualConstProp(
       setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
                            OffsetBit);
 
+    // In an earlier check we forbade constant propagation from operating on
+    // tables whose alignment is less than the alignment needed for loading
+    // the constant. Thus, the address we take the offset from will always be
+    // aligned to at least this integer alignment. Now, we need to ensure that
+    // the offset is also aligned to this integer alignment to ensure we always
+    // have an aligned load.
+    assert(OffsetByte % TypeAlignment.value() == 0);
+
     if (RemarksEnabled || AreStatisticsEnabled())
       for (auto &&Target : TargetsForSlot)
         Target.WasDevirt = true;

diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll
@@ -9,28 +9,28 @@ target datalayout = "e-p:64:64"
 ;; preserve alignment. Making them i16s allows them to stay at the beginning of
 ;; the vtable. There are other tests where there's a mix of constants before and
 ;; after the vtable but for this file we just want everything before the vtable.
-; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [8 x i8], [3 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\00\00\03\00\02", [3 x ptr] [ptr @vf0i1, ptr @vf1i1, ptr @vf1i16], [0 x i8] zeroinitializer }, section "vt1sec", !type [[T8:![0-9]+]]
+; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [8 x i8], [3 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\00\03\00\00\02", [3 x ptr] [ptr @vf0i1, ptr @vf1i1, ptr @vf1i16], [0 x i8] zeroinitializer }, section "vt1sec", !type [[T8:![0-9]+]]
 @vt1 = constant [3 x ptr] [
 ptr @vf0i1,
 ptr @vf1i1,
 ptr @vf1i16
 ], section "vt1sec", !type !0
 
-; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [8 x i8], [3 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\00\00\04\00\01", [3 x ptr] [ptr @vf1i1, ptr @vf0i1, ptr @vf2i16], [0 x i8] zeroinitializer }, !type [[T8]]
+; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [8 x i8], [3 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\00\04\00\00\01", [3 x ptr] [ptr @vf1i1, ptr @vf0i1, ptr @vf2i16], [0 x i8] zeroinitializer }, !type [[T8]]
 @vt2 = constant [3 x ptr] [
 ptr @vf1i1,
 ptr @vf0i1,
 ptr @vf2i16
 ], !type !0
 
-; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [4 x i8], [3 x ptr], [0 x i8] } { [4 x i8] c"\00\05\00\02", [3 x ptr] [ptr @vf0i1, ptr @vf1i1, ptr @vf3i16], [0 x i8] zeroinitializer }, align 2, !type [[T5:![0-9]+]]
+; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [4 x i8], [3 x ptr], [0 x i8] } { [4 x i8] c"\05\00\00\02", [3 x ptr] [ptr @vf0i1, ptr @vf1i1, ptr @vf3i16], [0 x i8] zeroinitializer }, align 2, !type [[T5:![0-9]+]]
 @vt3 = constant [3 x ptr] [
 ptr @vf0i1,
 ptr @vf1i1,
 ptr @vf3i16
 ], align 2, !type !0
 
-; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [16 x i8], [3 x ptr], [0 x i8] } { [16 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\00\06\00\01", [3 x ptr] [ptr @vf1i1, ptr @vf0i1, ptr @vf4i16], [0 x i8] zeroinitializer },  align 16, !type [[T16:![0-9]+]]
+; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [16 x i8], [3 x ptr], [0 x i8] } { [16 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\06\00\00\01", [3 x ptr] [ptr @vf1i1, ptr @vf0i1, ptr @vf4i16], [0 x i8] zeroinitializer },  align 16, !type [[T16:![0-9]+]]
 @vt4 = constant [3 x ptr] [
 ptr @vf1i1,
 ptr @vf0i1,
@@ -136,7 +136,7 @@ define i16 @call3(ptr %obj) {
   call void @llvm.assume(i1 %p)
   %fptrptr = getelementptr [3 x ptr], ptr %vtable, i16 0, i16 2
   %fptr = load ptr, ptr %fptrptr
-  ; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, ptr %vtable, i32 -3
+  ; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, ptr %vtable, i32 -4
   ; CHECK: [[VTLOAD3:%[^ ]*]] = load i16, ptr [[VTGEP3]]
   %result = call i16 %fptr(ptr %obj)
   ; CHECK: ret i16 [[VTLOAD3]]

diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll
@@ -37,28 +37,28 @@ target triple = "x86_64-unknown-linux-gnu"
 
 ; SKIP-ALL-NOT: devirtualized
 
-; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [8 x i8], [3 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\01\00\00\00\02", [3 x ptr] [ptr @vf0i1, ptr @vf1i1, ptr @vf1i32], [0 x i8] zeroinitializer }, section "vt1sec", !type [[T8:![0-9]+]]
+; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [8 x i8], [3 x ptr], [4 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\02", [3 x ptr] [ptr @vf0i1, ptr @vf1i1, ptr @vf1i32], [4 x i8] c"\01\00\00\00" }, section "vt1sec", !type [[T8:![0-9]+]]
 @vt1 = constant [3 x ptr] [
 ptr @vf0i1,
 ptr @vf1i1,
 ptr @vf1i32
 ], section "vt1sec", !type !0
 
-; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [8 x i8], [3 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\02\00\00\00\01", [3 x ptr] [ptr @vf1i1, ptr @vf0i1, ptr @vf2i32], [0 x i8] zeroinitializer }, !type [[T8]]
+; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [8 x i8], [3 x ptr], [4 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\01", [3 x ptr] [ptr @vf1i1, ptr @vf0i1, ptr @vf2i32], [4 x i8] c"\02\00\00\00" }, !type [[T8]]
 @vt2 = constant [3 x ptr] [
 ptr @vf1i1,
 ptr @vf0i1,
 ptr @vf2i32
 ], !type !0
 
-; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [8 x i8], [3 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\03\00\00\00\02", [3 x ptr] [ptr @vf0i1, ptr @vf1i1, ptr @vf3i32], [0 x i8] zeroinitializer }, !type [[T8]]
+; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [8 x i8], [3 x ptr], [4 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\02", [3 x ptr] [ptr @vf0i1, ptr @vf1i1, ptr @vf3i32], [4 x i8] c"\03\00\00\00" }, !type [[T8]]
 @vt3 = constant [3 x ptr] [
 ptr @vf0i1,
 ptr @vf1i1,
 ptr @vf3i32
 ], !type !0
 
-; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [8 x i8], [3 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\04\00\00\00\01", [3 x ptr] [ptr @vf1i1, ptr @vf0i1, ptr @vf4i32], [0 x i8] zeroinitializer }, !type [[T8]]
+; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [8 x i8], [3 x ptr], [4 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\01", [3 x ptr] [ptr @vf1i1, ptr @vf0i1, ptr @vf4i32], [4 x i8] c"\04\00\00\00" }, !type [[T8]]
 @vt4 = constant [3 x ptr] [
 ptr @vf1i1,
 ptr @vf0i1,
@@ -95,10 +95,10 @@ i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf0i1 to i64), i64 p
 i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf4i32 to i64), i64 ptrtoint (ptr @vt7_rel to i64)) to i32)
 ], !type !1
 
-; CHECK: @vt1 = alias [3 x ptr], getelementptr inbounds ({ [8 x i8], [3 x ptr], [0 x i8] }, ptr [[VT1DATA]], i32 0, i32 1)
-; CHECK: @vt2 = alias [3 x ptr], getelementptr inbounds ({ [8 x i8], [3 x ptr], [0 x i8] }, ptr [[VT2DATA]], i32 0, i32 1)
-; CHECK: @vt3 = alias [3 x ptr], getelementptr inbounds ({ [8 x i8], [3 x ptr], [0 x i8] }, ptr [[VT3DATA]], i32 0, i32 1)
-; CHECK: @vt4 = alias [3 x ptr], getelementptr inbounds ({ [8 x i8], [3 x ptr], [0 x i8] }, ptr [[VT4DATA]], i32 0, i32 1)
+; CHECK: @vt1 = alias [3 x ptr], getelementptr inbounds ({ [8 x i8], [3 x ptr], [4 x i8] }, ptr [[VT1DATA]], i32 0, i32 1)
+; CHECK: @vt2 = alias [3 x ptr], getelementptr inbounds ({ [8 x i8], [3 x ptr], [4 x i8] }, ptr [[VT2DATA]], i32 0, i32 1)
+; CHECK: @vt3 = alias [3 x ptr], getelementptr inbounds ({ [8 x i8], [3 x ptr], [4 x i8] }, ptr [[VT3DATA]], i32 0, i32 1)
+; CHECK: @vt4 = alias [3 x ptr], getelementptr inbounds ({ [8 x i8], [3 x ptr], [4 x i8] }, ptr [[VT4DATA]], i32 0, i32 1)
 ; CHECK: @vt6_rel = alias [3 x i32], getelementptr inbounds ({ [4 x i8], [3 x i32], [0 x i8] }, ptr [[VT6RELDATA]], i32 0, i32 1)
 ; CHECK: @vt7_rel = alias [3 x i32], getelementptr inbounds ({ [4 x i8], [3 x i32], [0 x i8] }, ptr [[VT7RELDATA]], i32 0, i32 1)
 
@@ -165,7 +165,7 @@ define i32 @call3(ptr %obj) {
   %vtable = load ptr, ptr %obj
   %pair = call {ptr, i1} @llvm.type.checked.load(ptr %vtable, i32 16, metadata !"typeid")
   %fptr = extractvalue {ptr, i1} %pair, 0
-  ; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, ptr %vtable, i32 -5
+  ; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, ptr %vtable, i32 24
   ; CHECK: [[VTLOAD3:%[^ ]*]] = load i32, ptr [[VTGEP3]]
   %result = call i32 %fptr(ptr %obj)
   ; CHECK: ret i32 [[VTLOAD3]]

diff --git a/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll b/llvm/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll
@@ -2,30 +2,30 @@
 
 target datalayout = "e-p:64:64"
 
-; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [0 x i8], [4 x ptr], [5 x i8] } { [0 x i8] zeroinitializer, [4 x ptr] [ptr null, ptr @vf0i1, ptr @vf1i1, ptr @vf1i32], [5 x i8] c"\02\03\00\00\00" }, !type [[T8:![0-9]+]]
+; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [0 x i8], [4 x ptr], [8 x i8] } { [0 x i8] zeroinitializer, [4 x ptr] [ptr null, ptr @vf0i1, ptr @vf1i1, ptr @vf1i32], [8 x i8] c"\02\00\00\00\03\00\00\00" }, !type [[T8:![0-9]+]]
 @vt1 = constant [4 x ptr] [
 ptr null,
 ptr @vf0i1,
 ptr @vf1i1,
 ptr @vf1i32
 ], !type !1
 
-; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [0 x i8], [3 x ptr], [5 x i8] } { [0 x i8] zeroinitializer, [3 x ptr] [ptr @vf1i1, ptr @vf0i1, ptr @vf2i32], [5 x i8] c"\01\04\00\00\00" }, !type [[T0:![0-9]+]]
+; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [0 x i8], [3 x ptr], [8 x i8] } { [0 x i8] zeroinitializer, [3 x ptr] [ptr @vf1i1, ptr @vf0i1, ptr @vf2i32], [8 x i8] c"\01\00\00\00\04\00\00\00" }, !type [[T0:![0-9]+]]
 @vt2 = constant [3 x ptr] [
 ptr @vf1i1,
 ptr @vf0i1,
 ptr @vf2i32
 ], !type !0
 
-; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [0 x i8], [4 x ptr], [5 x i8] } { [0 x i8] zeroinitializer, [4 x ptr] [ptr null, ptr @vf0i1, ptr @vf1i1, ptr @vf3i32], [5 x i8] c"\02\05\00\00\00" }, !type [[T8]]
+; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [0 x i8], [4 x ptr], [8 x i8] } { [0 x i8] zeroinitializer, [4 x ptr] [ptr null, ptr @vf0i1, ptr @vf1i1, ptr @vf3i32], [8 x i8] c"\02\00\00\00\05\00\00\00" }, !type [[T8]]
 @vt3 = constant [4 x ptr] [
 ptr null,
 ptr @vf0i1,
 ptr @vf1i1,
 ptr @vf3i32
 ], !type !1
 
-; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [0 x i8], [3 x ptr], [5 x i8] } { [0 x i8] zeroinitializer, [3 x ptr] [ptr @vf1i1, ptr @vf0i1, ptr @vf4i32], [5 x i8] c"\01\06\00\00\00" }, !type [[T0]]
+; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [0 x i8], [3 x ptr], [8 x i8] } { [0 x i8] zeroinitializer, [3 x ptr] [ptr @vf1i1, ptr @vf0i1, ptr @vf4i32], [8 x i8] c"\01\00\00\00\06\00\00\00" }, !type [[T0]]
 @vt4 = constant [3 x ptr] [
 ptr @vf1i1,
 ptr @vf0i1,
@@ -57,10 +57,10 @@ i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf1i1 to i64), i64 p
 i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf4i32 to i64), i64 ptrtoint (ptr @vt6_rel to i64)) to i32)
 ], !type !2
 
-; CHECK: @vt1 = alias [4 x ptr], getelementptr inbounds ({ [0 x i8], [4 x ptr], [5 x i8] }, ptr [[VT1DATA]], i32 0, i32 1)
-; CHECK: @vt2 = alias [3 x ptr], getelementptr inbounds ({ [0 x i8], [3 x ptr], [5 x i8] }, ptr [[VT2DATA]], i32 0, i32 1)
-; CHECK: @vt3 = alias [4 x ptr], getelementptr inbounds ({ [0 x i8], [4 x ptr], [5 x i8] }, ptr [[VT3DATA]], i32 0, i32 1)
-; CHECK: @vt4 = alias [3 x ptr], getelementptr inbounds ({ [0 x i8], [3 x ptr], [5 x i8] }, ptr [[VT4DATA]], i32 0, i32 1)
+; CHECK: @vt1 = alias [4 x ptr], getelementptr inbounds ({ [0 x i8], [4 x ptr], [8 x i8] }, ptr [[VT1DATA]], i32 0, i32 1)
+; CHECK: @vt2 = alias [3 x ptr], getelementptr inbounds ({ [0 x i8], [3 x ptr], [8 x i8] }, ptr [[VT2DATA]], i32 0, i32 1)
+; CHECK: @vt3 = alias [4 x ptr], getelementptr inbounds ({ [0 x i8], [4 x ptr], [8 x i8] }, ptr [[VT3DATA]], i32 0, i32 1)
+; CHECK: @vt4 = alias [3 x ptr], getelementptr inbounds ({ [0 x i8], [3 x ptr], [8 x i8] }, ptr [[VT4DATA]], i32 0, i32 1)
 
 define i1 @vf0i1(ptr %this) readnone {
   ret i1 0
@@ -124,7 +124,7 @@ define i32 @call3(ptr %obj) {
   call void @llvm.assume(i1 %p)
   %fptrptr = getelementptr [3 x ptr], ptr %vtable, i32 0, i32 2
   %fptr = load ptr, ptr %fptrptr
-  ; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, ptr %vtable, i32 25
+  ; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, ptr %vtable, i32 28
   ; CHECK: [[VTLOAD3:%[^ ]*]] = load i32, ptr [[VTGEP3]]
   %result = call i32 %fptr(ptr %obj)
   ; CHECK: ret i32 [[VTLOAD3]]