You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[MemCpyOpt] Fix a variety of scalable-type crashes
This patch fixes a variety of crashes resulting from the `MemCpyOptPass`
casting `TypeSize` to a constant integer, whether implicitly or
explicitly.
Since the `MemsetRanges` requires a constant size to work, all but one
of the fixes in this patch simply involve skipping the various
optimizations for scalable types as cleanly as possible.
The optimization of `byval` parameters, however, has been updated to
work on scalable types in theory. In practice, this optimization is only
valid when the length of the `memcpy` is known to be larger than the
scalable type size, which is currently never the case. This could
perhaps be done in the future using the `vscale_range` attribute.
Some implicit casts have been left as they were, under the knowledge
they are only called on aggregate types. These should never be
scalably-sized.
Reviewed By: nikic, tra
Differential Revision: https://reviews.llvm.org/D109329
(cherry-picked from commit 7fb66d4)
; CHECK-NEXT: [[PTR_NEXT_2:%.*]] = bitcast i8* [[PTR_NEXT]] to <vscale x 1 x i8>*
68
+
; CHECK-NEXT: store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* [[PTR_NEXT_2]], align 1
69
+
; CHECK-NEXT: ret void
70
+
;
71
+
storei8zeroinitializer, i8*%ptr
72
+
%ptr.next = getelementptri8, i8*%ptr, i641
73
+
%ptr.next.2 = bitcasti8*%ptr.nextto <vscale x 1 x i8>*
74
+
store <vscale x 1 x i8> zeroinitializer, <vscale x 1 x i8>* %ptr.next.2
75
+
retvoid
76
+
}
77
+
78
+
; Check that the call-slot optimization doesn't crash when encountering scalable types.
79
+
definevoid@callslotoptzn(<vscale x 4 x float> %val, <vscale x 4 x float>* %out) {
80
+
; CHECK-LABEL: @callslotoptzn(
81
+
; CHECK-NEXT: [[ALLOC:%.*]] = alloca <vscale x 4 x float>, align 16
82
+
; CHECK-NEXT: [[IDX:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
83
+
; CHECK-NEXT: [[BALLOC:%.*]] = getelementptr inbounds <vscale x 4 x float>, <vscale x 4 x float>* [[ALLOC]], i64 0, i64 0
84
+
; CHECK-NEXT: [[STRIDE:%.*]] = getelementptr inbounds float, float* [[BALLOC]], <vscale x 4 x i32> [[IDX]]
85
+
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> [[VAL:%.*]], <vscale x 4 x float*> [[STRIDE]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
86
+
; CHECK-NEXT: [[LI:%.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* [[ALLOC]], align 4
87
+
; CHECK-NEXT: store <vscale x 4 x float> [[LI]], <vscale x 4 x float>* [[OUT:%.*]], align 4
88
+
; CHECK-NEXT: ret void
89
+
;
90
+
%alloc = alloca <vscale x 4 x float>, align16
91
+
%idx = tailcall <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
92
+
%balloc = getelementptrinbounds <vscale x 4 x float>, <vscale x 4 x float>* %alloc, i640, i640
93
+
%stride = getelementptrinboundsfloat, float* %balloc, <vscale x 4 x i32> %idx
94
+
callvoid@llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %stride, i324, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1true, i320), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
95
+
%li = load <vscale x 4 x float>, <vscale x 4 x float>* %alloc, align4
96
+
store <vscale x 4 x float> %li, <vscale x 4 x float>* %out, align4
97
+
retvoid
98
+
}
99
+
100
+
declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
101
+
declarevoid@llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> , <vscale x 4 x float*> , i32, <vscale x 4 x i1>)
0 commit comments