From 2d1cd65a3237f31a74181413aa50c5821de69f48 Mon Sep 17 00:00:00 2001 From: Bruno De Fraine Date: Thu, 7 Nov 2024 15:49:38 +0100 Subject: [PATCH 1/4] [GlobalOpt] Fix global SRA incorrect use of original alignment on some elements The logic had a flaw where the alignment from the original aggregate is unintentionally retained when the calculated known alignment is not higher than the ABI type alignment. Fixes #115282 --- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 7 ++++--- llvm/test/DebugInfo/X86/global-sra-struct-fit-segment.ll | 2 +- .../X86/global-sra-struct-part-overlap-segment.ll | 2 +- llvm/test/Transforms/GlobalOpt/globalsra-align.ll | 8 ++++---- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 4647c65a5c850..9aaa0212fc844 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -575,15 +575,16 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { *GV->getParent(), Ty, false, GlobalVariable::InternalLinkage, Initializer, GV->getName() + "." + Twine(NameSuffix++), GV, GV->getThreadLocalMode(), GV->getAddressSpace()); + // Start out by copying attributes from the original, including alignment. NGV->copyAttributesFrom(GV); NewGlobals.insert({OffsetForTy, NGV}); // Calculate the known alignment of the field. If the original aggregate - // had 256 byte alignment for example, something might depend on that: + // had 256 byte alignment for example, then the element at a given offset + // may also have a known alignment, and something might depend on that: // propagate info to each field. Align NewAlign = commonAlignment(StartAlignment, OffsetForTy); - if (NewAlign > DL.getABITypeAlign(Ty)) - NGV->setAlignment(NewAlign); + NGV->setAlignment(std::max(NewAlign, DL.getABITypeAlign(Ty))); // Copy over the debug info for the variable. transferSRADebugInfo(GV, NGV, OffsetForTy * 8, diff --git a/llvm/test/DebugInfo/X86/global-sra-struct-fit-segment.ll b/llvm/test/DebugInfo/X86/global-sra-struct-fit-segment.ll index 94b7deec95f41..ab48ef2411f35 100644 --- a/llvm/test/DebugInfo/X86/global-sra-struct-fit-segment.ll +++ b/llvm/test/DebugInfo/X86/global-sra-struct-fit-segment.ll @@ -20,7 +20,7 @@ %struct.BSS1 = type <{ [12 x i8] }> ;CHECK: @.BSS1.0 = internal unnamed_addr global i32 0, align 32, !dbg ![[GVE1:.*]] -;CHECK: @.BSS1.1 = internal unnamed_addr global i32 0, align 32, !dbg ![[GVE2:.*]], !dbg ![[GVE4:.*]] +;CHECK: @.BSS1.1 = internal unnamed_addr global i32 0, align 4, !dbg ![[GVE2:.*]], !dbg ![[GVE4:.*]] ;CHECK: @.BSS1.2 = internal unnamed_addr global i32 0, align 8, !dbg ![[GVE3:.*]] @.BSS1 = internal global %struct.BSS1 zeroinitializer, align 32, !dbg !0, !dbg !7, !dbg !10, !dbg !27, !dbg !29 diff --git a/llvm/test/DebugInfo/X86/global-sra-struct-part-overlap-segment.ll b/llvm/test/DebugInfo/X86/global-sra-struct-part-overlap-segment.ll index d7854cb822e3e..9dd865edbbdad 100644 --- a/llvm/test/DebugInfo/X86/global-sra-struct-part-overlap-segment.ll +++ b/llvm/test/DebugInfo/X86/global-sra-struct-part-overlap-segment.ll @@ -26,7 +26,7 @@ @.BSS3 = internal unnamed_addr global %struct.BSS3 zeroinitializer, align 32, !dbg !0, !dbg !7, !dbg !29 ;CHECK: @.BSS3.0 = internal unnamed_addr global double 0.000000e+00, align 32, !dbg ![[GVE1:.*]], !dbg ![[GVE2:.*]] -;CHECK: @.BSS3.1 = internal unnamed_addr global double 0.000000e+00, align 32, !dbg ![[GVE3:.*]], !dbg ![[GVE4:.*]], !dbg ![[GVE6:.*]] +;CHECK: @.BSS3.1 = internal unnamed_addr global double 0.000000e+00, align 8, !dbg ![[GVE3:.*]], !dbg ![[GVE4:.*]], !dbg ![[GVE6:.*]] ;CHECK: @.BSS3.2 = internal unnamed_addr global double 0.000000e+00, align 16, !dbg ![[GVE5:.*]] @.C363_mymod_bar_ = internal constant [2 x i8] c"IF" diff --git a/llvm/test/Transforms/GlobalOpt/globalsra-align.ll b/llvm/test/Transforms/GlobalOpt/globalsra-align.ll index aadd5b866a822..74ccb3f279ed0 100644 --- a/llvm/test/Transforms/GlobalOpt/globalsra-align.ll +++ b/llvm/test/Transforms/GlobalOpt/globalsra-align.ll @@ -15,9 +15,9 @@ target datalayout = "p:16:32:64" ; 16-bit pointers with 32-bit ABI alignment and ;. ; CHECK: @[[A_4:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global ptr null, align 8 -; CHECK: @[[A_5:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global ptr null, align 16 +; CHECK: @[[A_5:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global ptr null, align 8 ; CHECK: @[[A_6:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global ptr null, align 16 -; CHECK: @[[A_7:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global ptr null, align 16 +; CHECK: @[[A_7:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global ptr null, align 8 ;. define ptr @reduce_align_0() { ; CHECK-LABEL: @reduce_align_0( @@ -31,7 +31,7 @@ define ptr @reduce_align_0() { define ptr @reduce_align_1() { ; CHECK-LABEL: @reduce_align_1( -; CHECK-NEXT: [[X:%.*]] = load ptr, ptr @a.5, align 16 +; CHECK-NEXT: [[X:%.*]] = load ptr, ptr @a.5, align 8 ; CHECK-NEXT: ret ptr [[X]] ; %x = load ptr, ptr getelementptr inbounds ([3 x [7 x ptr]], ptr @a, i64 0, i64 2, i64 1), align 4 @@ -51,7 +51,7 @@ define ptr @reduce_align_2() { define ptr @reduce_align_3() { ; CHECK-LABEL: @reduce_align_3( -; CHECK-NEXT: [[X:%.*]] = load ptr, ptr @a.7, align 16 +; CHECK-NEXT: [[X:%.*]] = load ptr, ptr @a.7, align 8 ; CHECK-NEXT: ret ptr [[X]] ; %x = load ptr, ptr getelementptr inbounds ([3 x [7 x ptr]], ptr @a, i64 0, i64 2, i64 3), align 4 From f5151ec2cdf70e7ccb7975a69010edef0479a497 Mon Sep 17 00:00:00 2001 From: Bruno De Fraine Date: Thu, 7 Nov 2024 22:56:44 +0100 Subject: [PATCH 2/4] Checking for ABI type align is unnecessary --- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 9aaa0212fc844..3381b5f77683b 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -584,7 +584,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // may also have a known alignment, and something might depend on that: // propagate info to each field. Align NewAlign = commonAlignment(StartAlignment, OffsetForTy); - NGV->setAlignment(std::max(NewAlign, DL.getABITypeAlign(Ty))); + NGV->setAlignment(NewAlign); // Copy over the debug info for the variable. transferSRADebugInfo(GV, NGV, OffsetForTy * 8, From 0a590284a2a03808ac2d5c9d7a4df16e21f6641e Mon Sep 17 00:00:00 2001 From: Bruno De Fraine Date: Thu, 7 Nov 2024 23:28:15 +0100 Subject: [PATCH 3/4] [GlobalOpt] Add test case for issue #115282 --- llvm/test/Transforms/GlobalOpt/pr115282.ll | 51 ++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 llvm/test/Transforms/GlobalOpt/pr115282.ll diff --git a/llvm/test/Transforms/GlobalOpt/pr115282.ll b/llvm/test/Transforms/GlobalOpt/pr115282.ll new file mode 100644 index 0000000000000..45235490a113d --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/pr115282.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; RUN: opt < %s -passes=globalopt -S | FileCheck %s + +@params = internal global [4 x i32] zeroinitializer, align 32 + +;. +; CHECK: @params.0 = internal unnamed_addr global i32 0, align 32 +; CHECK: @params.1 = internal unnamed_addr global i32 0, align 32 +; CHECK: @params.2 = internal unnamed_addr global i32 0, align 8 +; CHECK: @params.3 = internal unnamed_addr global i32 0, align 32 +;. +define void @set(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: define void @set( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) local_unnamed_addr { +; CHECK-NEXT: store i32 [[A]], ptr @params.0, align 32 +; CHECK-NEXT: store i32 [[B]], ptr @params.1, align 32 +; CHECK-NEXT: store i32 [[C]], ptr @params.2, align 8 +; CHECK-NEXT: store i32 [[D]], ptr @params.3, align 32 +; CHECK-NEXT: ret void +; + store i32 %a, ptr @params + store i32 %b, ptr getelementptr ([4 x i32], ptr @params, i32 0, i32 1) + store i32 %c, ptr getelementptr ([4 x i32], ptr @params, i32 0, i32 2) + store i32 %d, ptr getelementptr ([4 x i32], ptr @params, i32 0, i32 3) + ret void +} + +%S = type { i32, i32, i32, i32 } + +define %S @get() { +; CHECK-LABEL: define %S @get() local_unnamed_addr { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr @params.0, align 32 +; CHECK-NEXT: [[SA:%.*]] = insertvalue [[S:%.*]] undef, i32 [[A]], 0 +; CHECK-NEXT: [[B:%.*]] = load i32, ptr @params.1, align 32 +; CHECK-NEXT: [[SB:%.*]] = insertvalue [[S]] [[SA]], i32 [[B]], 1 +; CHECK-NEXT: [[C:%.*]] = load i32, ptr @params.2, align 8 +; CHECK-NEXT: [[SC:%.*]] = insertvalue [[S]] [[SB]], i32 [[C]], 2 +; CHECK-NEXT: [[D:%.*]] = load i32, ptr @params.3, align 32 +; CHECK-NEXT: [[SD:%.*]] = insertvalue [[S]] [[SC]], i32 [[D]], 3 +; CHECK-NEXT: ret [[S]] [[SD]] +; + %a = load i32, ptr @params + %sa = insertvalue %S undef, i32 %a, 0 + %b = load i32, ptr getelementptr ([4 x i32], ptr @params, i32 0, i32 1) + %sb = insertvalue %S %sa, i32 %b, 1 + %c = load i32, ptr getelementptr ([4 x i32], ptr @params, i32 0, i32 2) + %sc = insertvalue %S %sb, i32 %c, 2 + %d = load i32, ptr getelementptr ([4 x i32], ptr @params, i32 0, i32 3) + %sd = insertvalue %S %sc, i32 %d, 3 + ret %S %sd +} From f281c804827534c60e872df21815362a94f7a59d Mon Sep 17 00:00:00 2001 From: Bruno De Fraine Date: Thu, 7 Nov 2024 23:30:12 +0100 Subject: [PATCH 4/4] Update test case after fix --- llvm/test/Transforms/GlobalOpt/pr115282.ll | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/test/Transforms/GlobalOpt/pr115282.ll b/llvm/test/Transforms/GlobalOpt/pr115282.ll index 45235490a113d..26e4b45fb7c96 100644 --- a/llvm/test/Transforms/GlobalOpt/pr115282.ll +++ b/llvm/test/Transforms/GlobalOpt/pr115282.ll @@ -5,17 +5,17 @@ ;. ; CHECK: @params.0 = internal unnamed_addr global i32 0, align 32 -; CHECK: @params.1 = internal unnamed_addr global i32 0, align 32 +; CHECK: @params.1 = internal unnamed_addr global i32 0, align 4 ; CHECK: @params.2 = internal unnamed_addr global i32 0, align 8 -; CHECK: @params.3 = internal unnamed_addr global i32 0, align 32 +; CHECK: @params.3 = internal unnamed_addr global i32 0, align 4 ;. define void @set(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: define void @set( ; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) local_unnamed_addr { ; CHECK-NEXT: store i32 [[A]], ptr @params.0, align 32 -; CHECK-NEXT: store i32 [[B]], ptr @params.1, align 32 +; CHECK-NEXT: store i32 [[B]], ptr @params.1, align 4 ; CHECK-NEXT: store i32 [[C]], ptr @params.2, align 8 -; CHECK-NEXT: store i32 [[D]], ptr @params.3, align 32 +; CHECK-NEXT: store i32 [[D]], ptr @params.3, align 4 ; CHECK-NEXT: ret void ; store i32 %a, ptr @params @@ -31,11 +31,11 @@ define %S @get() { ; CHECK-LABEL: define %S @get() local_unnamed_addr { ; CHECK-NEXT: [[A:%.*]] = load i32, ptr @params.0, align 32 ; CHECK-NEXT: [[SA:%.*]] = insertvalue [[S:%.*]] undef, i32 [[A]], 0 -; CHECK-NEXT: [[B:%.*]] = load i32, ptr @params.1, align 32 +; CHECK-NEXT: [[B:%.*]] = load i32, ptr @params.1, align 4 ; CHECK-NEXT: [[SB:%.*]] = insertvalue [[S]] [[SA]], i32 [[B]], 1 ; CHECK-NEXT: [[C:%.*]] = load i32, ptr @params.2, align 8 ; CHECK-NEXT: [[SC:%.*]] = insertvalue [[S]] [[SB]], i32 [[C]], 2 -; CHECK-NEXT: [[D:%.*]] = load i32, ptr @params.3, align 32 +; CHECK-NEXT: [[D:%.*]] = load i32, ptr @params.3, align 4 ; CHECK-NEXT: [[SD:%.*]] = insertvalue [[S]] [[SC]], i32 [[D]], 3 ; CHECK-NEXT: ret [[S]] [[SD]] ;