Skip to content

Commit b3cf704

Browse files
committed
[CodeGen] Fix the computation of the alignment of split stores.
By Clement Courbet! Backported from rG15488ff24b4a
1 parent 6f69240 commit b3cf704

File tree

3 files changed

+195
-2
lines changed

3 files changed

+195
-2
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6857,12 +6857,20 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
68576857
Value *Addr = Builder.CreateBitCast(
68586858
SI.getOperand(1),
68596859
SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));
6860-
if ((IsLE && Upper) || (!IsLE && !Upper))
6860+
const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
6861+
if (IsOffsetStore)
68616862
Addr = Builder.CreateGEP(
68626863
SplitStoreType, Addr,
68636864
ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
6865+
MaybeAlign Alignment(SI.getAlignment());
6866+
if (IsOffsetStore && Alignment) {
6867+
// When splitting the store in half, naturally one half will retain the
6868+
// alignment of the original wider store, regardless of whether it was
6869+
// over-aligned or not, while the other will require adjustment.
6870+
Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
6871+
}
68646872
Builder.CreateAlignedStore(
6865-
V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment());
6873+
V, Addr, Alignment.hasValue() ? Alignment.getValue().value() : 0);
68666874
};
68676875

68686876
CreateSplitStore(LValue, false);
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S -codegenprepare -mtriple=powerpc64-unknown-linux-gnu -data-layout="E-m:e-i64:64-n32:64" -force-split-store < %s | FileCheck --check-prefixes=ALL,BE %s
3+
; RUN: opt -S -codegenprepare -mtriple=powerpc64le-unknown-linux-gnu -data-layout="e-m:e-i64:64-n32:64" -force-split-store < %s | FileCheck --check-prefixes=ALL,LE %s
4+
5+
define void @split_store_align1(float %x, i64* %p) {
6+
; BE-LABEL: @split_store_align1(
7+
; BE-NEXT: [[B:%.*]] = bitcast float [[X:%.*]] to i32
8+
; BE-NEXT: [[Z:%.*]] = zext i32 0 to i64
9+
; BE-NEXT: [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
10+
; BE-NEXT: [[Z2:%.*]] = zext i32 [[B]] to i64
11+
; BE-NEXT: [[O:%.*]] = or i64 [[S]], [[Z2]]
12+
; BE-NEXT: [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
13+
; BE-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 1
14+
; BE-NEXT: store i32 [[B]], i32* [[TMP2]], align 1
15+
; BE-NEXT: [[TMP3:%.*]] = bitcast i64* [[P]] to i32*
16+
; BE-NEXT: store i32 0, i32* [[TMP3]], align 1
17+
; BE-NEXT: ret void
18+
;
19+
; LE-LABEL: @split_store_align1(
20+
; LE-NEXT: [[B:%.*]] = bitcast float [[X:%.*]] to i32
21+
; LE-NEXT: [[Z:%.*]] = zext i32 0 to i64
22+
; LE-NEXT: [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
23+
; LE-NEXT: [[Z2:%.*]] = zext i32 [[B]] to i64
24+
; LE-NEXT: [[O:%.*]] = or i64 [[S]], [[Z2]]
25+
; LE-NEXT: [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
26+
; LE-NEXT: store i32 [[B]], i32* [[TMP1]], align 1
27+
; LE-NEXT: [[TMP2:%.*]] = bitcast i64* [[P]] to i32*
28+
; LE-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 1
29+
; LE-NEXT: store i32 0, i32* [[TMP3]], align 1
30+
; LE-NEXT: ret void
31+
;
32+
%b = bitcast float %x to i32
33+
%z = zext i32 0 to i64
34+
%s = shl nuw nsw i64 %z, 32
35+
%z2 = zext i32 %b to i64
36+
%o = or i64 %s, %z2
37+
store i64 %o, i64* %p, align 1
38+
ret void
39+
}
40+
41+
define void @split_store_align2(float %x, i64* %p) {
42+
; BE-LABEL: @split_store_align2(
43+
; BE-NEXT: [[B:%.*]] = bitcast float [[X:%.*]] to i32
44+
; BE-NEXT: [[Z:%.*]] = zext i32 0 to i64
45+
; BE-NEXT: [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
46+
; BE-NEXT: [[Z2:%.*]] = zext i32 [[B]] to i64
47+
; BE-NEXT: [[O:%.*]] = or i64 [[S]], [[Z2]]
48+
; BE-NEXT: [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
49+
; BE-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 1
50+
; BE-NEXT: store i32 [[B]], i32* [[TMP2]], align 2
51+
; BE-NEXT: [[TMP3:%.*]] = bitcast i64* [[P]] to i32*
52+
; BE-NEXT: store i32 0, i32* [[TMP3]], align 2
53+
; BE-NEXT: ret void
54+
;
55+
; LE-LABEL: @split_store_align2(
56+
; LE-NEXT: [[B:%.*]] = bitcast float [[X:%.*]] to i32
57+
; LE-NEXT: [[Z:%.*]] = zext i32 0 to i64
58+
; LE-NEXT: [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
59+
; LE-NEXT: [[Z2:%.*]] = zext i32 [[B]] to i64
60+
; LE-NEXT: [[O:%.*]] = or i64 [[S]], [[Z2]]
61+
; LE-NEXT: [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
62+
; LE-NEXT: store i32 [[B]], i32* [[TMP1]], align 2
63+
; LE-NEXT: [[TMP2:%.*]] = bitcast i64* [[P]] to i32*
64+
; LE-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 1
65+
; LE-NEXT: store i32 0, i32* [[TMP3]], align 2
66+
; LE-NEXT: ret void
67+
;
68+
%b = bitcast float %x to i32
69+
%z = zext i32 0 to i64
70+
%s = shl nuw nsw i64 %z, 32
71+
%z2 = zext i32 %b to i64
72+
%o = or i64 %s, %z2
73+
store i64 %o, i64* %p, align 2
74+
ret void
75+
}
76+
77+
define void @split_store_align8(float %x, i64* %p) {
78+
; BE-LABEL: @split_store_align8(
79+
; BE-NEXT: [[B:%.*]] = bitcast float [[X:%.*]] to i32
80+
; BE-NEXT: [[Z:%.*]] = zext i32 0 to i64
81+
; BE-NEXT: [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
82+
; BE-NEXT: [[Z2:%.*]] = zext i32 [[B]] to i64
83+
; BE-NEXT: [[O:%.*]] = or i64 [[S]], [[Z2]]
84+
; BE-NEXT: [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
85+
; BE-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 1
86+
; BE-NEXT: store i32 [[B]], i32* [[TMP2]], align 4
87+
; BE-NEXT: [[TMP3:%.*]] = bitcast i64* [[P]] to i32*
88+
; BE-NEXT: store i32 0, i32* [[TMP3]], align 8
89+
; BE-NEXT: ret void
90+
;
91+
; LE-LABEL: @split_store_align8(
92+
; LE-NEXT: [[B:%.*]] = bitcast float [[X:%.*]] to i32
93+
; LE-NEXT: [[Z:%.*]] = zext i32 0 to i64
94+
; LE-NEXT: [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
95+
; LE-NEXT: [[Z2:%.*]] = zext i32 [[B]] to i64
96+
; LE-NEXT: [[O:%.*]] = or i64 [[S]], [[Z2]]
97+
; LE-NEXT: [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
98+
; LE-NEXT: store i32 [[B]], i32* [[TMP1]], align 8
99+
; LE-NEXT: [[TMP2:%.*]] = bitcast i64* [[P]] to i32*
100+
; LE-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 1
101+
; LE-NEXT: store i32 0, i32* [[TMP3]], align 4
102+
; LE-NEXT: ret void
103+
;
104+
%b = bitcast float %x to i32
105+
%z = zext i32 0 to i64
106+
%s = shl nuw nsw i64 %z, 32
107+
%z2 = zext i32 %b to i64
108+
%o = or i64 %s, %z2
109+
store i64 %o, i64* %p, align 8
110+
ret void
111+
}
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -codegenprepare -mtriple=x86_64-unknown-unknown -force-split-store -S < %s | FileCheck %s
3+
4+
target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32"
5+
target triple = "i686-w64-windows-gnu"
6+
7+
define void @split_store_align1(float %x, i64* %p) {
8+
; CHECK-LABEL: @split_store_align1(
9+
; CHECK-NEXT: [[B:%.*]] = bitcast float [[X:%.*]] to i32
10+
; CHECK-NEXT: [[Z:%.*]] = zext i32 0 to i64
11+
; CHECK-NEXT: [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
12+
; CHECK-NEXT: [[Z2:%.*]] = zext i32 [[B]] to i64
13+
; CHECK-NEXT: [[O:%.*]] = or i64 [[S]], [[Z2]]
14+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
15+
; CHECK-NEXT: store i32 [[B]], i32* [[TMP1]], align 1
16+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[P]] to i32*
17+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 1
18+
; CHECK-NEXT: store i32 0, i32* [[TMP3]], align 1
19+
; CHECK-NEXT: ret void
20+
;
21+
%b = bitcast float %x to i32
22+
%z = zext i32 0 to i64
23+
%s = shl nuw nsw i64 %z, 32
24+
%z2 = zext i32 %b to i64
25+
%o = or i64 %s, %z2
26+
store i64 %o, i64* %p, align 1
27+
ret void
28+
}
29+
30+
define void @split_store_align2(float %x, i64* %p) {
31+
; CHECK-LABEL: @split_store_align2(
32+
; CHECK-NEXT: [[B:%.*]] = bitcast float [[X:%.*]] to i32
33+
; CHECK-NEXT: [[Z:%.*]] = zext i32 0 to i64
34+
; CHECK-NEXT: [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
35+
; CHECK-NEXT: [[Z2:%.*]] = zext i32 [[B]] to i64
36+
; CHECK-NEXT: [[O:%.*]] = or i64 [[S]], [[Z2]]
37+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
38+
; CHECK-NEXT: store i32 [[B]], i32* [[TMP1]], align 2
39+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[P]] to i32*
40+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 1
41+
; CHECK-NEXT: store i32 0, i32* [[TMP3]], align 2
42+
; CHECK-NEXT: ret void
43+
;
44+
%b = bitcast float %x to i32
45+
%z = zext i32 0 to i64
46+
%s = shl nuw nsw i64 %z, 32
47+
%z2 = zext i32 %b to i64
48+
%o = or i64 %s, %z2
49+
store i64 %o, i64* %p, align 2
50+
ret void
51+
}
52+
53+
define void @split_store_align8(float %x, i64* %p) {
54+
; CHECK-LABEL: @split_store_align8(
55+
; CHECK-NEXT: [[B:%.*]] = bitcast float [[X:%.*]] to i32
56+
; CHECK-NEXT: [[Z:%.*]] = zext i32 0 to i64
57+
; CHECK-NEXT: [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
58+
; CHECK-NEXT: [[Z2:%.*]] = zext i32 [[B]] to i64
59+
; CHECK-NEXT: [[O:%.*]] = or i64 [[S]], [[Z2]]
60+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
61+
; CHECK-NEXT: store i32 [[B]], i32* [[TMP1]], align 8
62+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[P]] to i32*
63+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 1
64+
; CHECK-NEXT: store i32 0, i32* [[TMP3]], align 4
65+
; CHECK-NEXT: ret void
66+
;
67+
%b = bitcast float %x to i32
68+
%z = zext i32 0 to i64
69+
%s = shl nuw nsw i64 %z, 32
70+
%z2 = zext i32 %b to i64
71+
%o = or i64 %s, %z2
72+
store i64 %o, i64* %p, align 8
73+
ret void
74+
}

0 commit comments

Comments
 (0)