You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[clang][AArch64][SVE] Avoid going through memory for coerced VLST return values
VLST return values are coerced to VLATs in the function epilog for
consistency with the VLAT ABI. Previously, this coercion was done
through memory. It is preferable to use the
llvm.experimental.vector.insert intrinsic to avoid going through memory
here.
Reviewed By: c-rhodes
Differential Revision: https://reviews.llvm.org/D94290
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 4 x i32>* [[RETVAL_COERCE]] to <[[#div(VBITS, 32)]] x i32>*
56
-
// CHECK-NEXT: store <[[#div(VBITS, 32)]] x i32> [[ADD]], <[[#div(VBITS, 32)]] x i32>* [[RETVAL_0__SROA_CAST]], align 16
57
-
// CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[RETVAL_COERCE]], align 16
58
-
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
54
+
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v[[#div(VBITS, 32)]]i32(<vscale x 4 x i32> undef, <[[#div(VBITS, 32)]] x i32> [[ADD]], i64 0)
55
+
// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
28
27
// CHECK-NEXT: [[X:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[X_COERCE:%.*]], i64 0)
29
28
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[X]], i64 0)
30
29
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[CASTSCALABLESVE]], i64 0)
31
-
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 4 x i32>* [[RETVAL_COERCE]] to <16 x i32>*
32
-
// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16
33
-
// CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[RETVAL_COERCE]], align 16
34
-
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
30
+
// CHECK-NEXT: [[CASTSCALABLESVE1:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[CASTFIXEDSVE]], i64 0)
31
+
// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE1]]
35
32
//
36
33
fixed_int32_tfixed_caller(fixed_int32_tx) {
37
34
returnsizeless_callee(x);
38
35
}
39
36
40
37
// CHECK-LABEL: @fixed_callee(
41
38
// CHECK-NEXT: entry:
42
-
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
43
39
// CHECK-NEXT: [[X:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[X_COERCE:%.*]], i64 0)
44
-
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 4 x i32>* [[RETVAL_COERCE]] to <16 x i32>*
45
-
// CHECK-NEXT: store <16 x i32> [[X]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16
46
-
// CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[RETVAL_COERCE]], align 16
47
-
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
40
+
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[X]], i64 0)
41
+
// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
48
42
//
49
43
fixed_int32_tfixed_callee(fixed_int32_tx) {
50
44
returnx;
51
45
}
52
46
53
47
// CHECK-LABEL: @sizeless_caller(
54
48
// CHECK-NEXT: entry:
55
-
// CHECK-NEXT: [[COERCE_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
56
49
// CHECK-NEXT: [[COERCE1:%.*]] = alloca <16 x i32>, align 16
57
50
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[X:%.*]], i64 0)
58
-
// CHECK-NEXT: [[COERCE_0__SROA_CAST:%.*]] = bitcast <vscale x 4 x i32>* [[COERCE_COERCE]] to <16 x i32>*
59
-
// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[COERCE_0__SROA_CAST]], align 16
60
-
// CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[COERCE_COERCE]], align 16
61
-
// CHECK-NEXT: [[CALL:%.*]] = call <vscale x 4 x i32> @fixed_callee(<vscale x 4 x i32> [[TMP0]])
62
-
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[COERCE1]] to <vscale x 4 x i32>*
63
-
// CHECK-NEXT: store <vscale x 4 x i32> [[CALL]], <vscale x 4 x i32>* [[TMP1]], align 16
64
-
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* [[COERCE1]], align 16, [[TBAA6:!tbaa !.*]]
65
-
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP2]], i64 0)
66
-
// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
51
+
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[CASTFIXEDSVE]], i64 0)
52
+
// CHECK-NEXT: [[CALL:%.*]] = call <vscale x 4 x i32> @fixed_callee(<vscale x 4 x i32> [[CASTSCALABLESVE]])
53
+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x i32>* [[COERCE1]] to <vscale x 4 x i32>*
54
+
// CHECK-NEXT: store <vscale x 4 x i32> [[CALL]], <vscale x 4 x i32>* [[TMP0]], align 16
55
+
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[COERCE1]], align 16, [[TBAA6:!tbaa !.*]]
56
+
// CHECK-NEXT: [[CASTSCALABLESVE2:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP1]], i64 0)
57
+
// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE2]]
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
79
-
// CHECK-NEXT: [[OP1:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[X_COERCE:%.*]], i64 0)
80
-
// CHECK-NEXT: [[OP2:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[X_COERCE1:%.*]], i64 0)
69
+
// CHECK-NEXT: [[OP1:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[OP1_COERCE:%.*]], i64 0)
70
+
// CHECK-NEXT: [[OP2:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[OP2_COERCE:%.*]], i64 0)
81
71
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[OP1]], i64 0)
82
-
// CHECK-NEXT: [[CASTSCALABLESVE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[OP2]], i64 0)
72
+
// CHECK-NEXT: [[CASTSCALABLESVE2:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[OP2]], i64 0)
83
73
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
84
-
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sel.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[CASTSCALABLESVE]], <vscale x 4 x i32> [[CASTSCALABLESVE3]])
74
+
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sel.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[CASTSCALABLESVE]], <vscale x 4 x i32> [[CASTSCALABLESVE2]])
85
75
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TMP1]], i64 0)
86
-
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 4 x i32>* [[RETVAL_COERCE]] to <16 x i32>*
87
-
// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16
88
-
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[RETVAL_COERCE]], align 16
89
-
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
76
+
// CHECK-NEXT: [[CASTSCALABLESVE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[CASTFIXEDSVE]], i64 0)
77
+
// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE3]]
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x double>, align 16
98
-
// CHECK-NEXT: [[OP1:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[X_COERCE:%.*]], i64 0)
99
-
// CHECK-NEXT: [[OP2:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[X_COERCE1:%.*]], i64 0)
85
+
// CHECK-NEXT: [[OP1:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[OP1_COERCE:%.*]], i64 0)
86
+
// CHECK-NEXT: [[OP2:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[OP2_COERCE:%.*]], i64 0)
100
87
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[OP1]], i64 0)
101
-
// CHECK-NEXT: [[CASTSCALABLESVE3:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[OP2]], i64 0)
88
+
// CHECK-NEXT: [[CASTSCALABLESVE2:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[OP2]], i64 0)
102
89
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
103
-
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.sel.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[CASTSCALABLESVE]], <vscale x 2 x double> [[CASTSCALABLESVE3]])
90
+
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.sel.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[CASTSCALABLESVE]], <vscale x 2 x double> [[CASTSCALABLESVE2]])
104
91
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[TMP1]], i64 0)
105
-
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 2 x double>* [[RETVAL_COERCE]] to <8 x double>*
106
-
// CHECK-NEXT: store <8 x double> [[CASTFIXEDSVE]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16
107
-
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 2 x double>, <vscale x 2 x double>* [[RETVAL_COERCE]], align 16
108
-
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
92
+
// CHECK-NEXT: [[CASTSCALABLESVE3:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[CASTFIXEDSVE]], i64 0)
93
+
// CHECK-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE3]]
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
154
-
// CHECK-NEXT: [[OP1:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[X_COERCE:%.*]], i64 0)
138
+
// CHECK-NEXT: [[OP1:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[OP1_COERCE:%.*]], i64 0)
155
139
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[OP1]], i64 0)
156
140
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
157
141
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sel.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[CASTSCALABLESVE]], <vscale x 4 x i32> [[OP2:%.*]])
158
142
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TMP1]], i64 0)
159
-
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 4 x i32>* [[RETVAL_COERCE]] to <16 x i32>*
160
-
// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16
161
-
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[RETVAL_COERCE]], align 16
162
-
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
143
+
// CHECK-NEXT: [[CASTSCALABLESVE1:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[CASTFIXEDSVE]], i64 0)
144
+
// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE1]]
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x double>, align 16
171
-
// CHECK-NEXT: [[OP1:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[X_COERCE:%.*]], i64 0)
152
+
// CHECK-NEXT: [[OP1:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[OP1_COERCE:%.*]], i64 0)
172
153
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[OP1]], i64 0)
173
154
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
174
155
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.sel.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[CASTSCALABLESVE]], <vscale x 2 x double> [[OP2:%.*]])
175
156
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[TMP1]], i64 0)
176
-
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 2 x double>* [[RETVAL_COERCE]] to <8 x double>*
177
-
// CHECK-NEXT: store <8 x double> [[CASTFIXEDSVE]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16
178
-
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 2 x double>, <vscale x 2 x double>* [[RETVAL_COERCE]], align 16
179
-
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
157
+
// CHECK-NEXT: [[CASTSCALABLESVE1:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[CASTFIXEDSVE]], i64 0)
158
+
// CHECK-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE1]]
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
217
195
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
218
196
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sel.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
219
197
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TMP1]], i64 0)
220
-
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 4 x i32>* [[RETVAL_COERCE]] to <16 x i32>*
221
-
// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16
222
-
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[RETVAL_COERCE]], align 16
223
-
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
198
+
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[CASTFIXEDSVE]], i64 0)
199
+
// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x double>, align 16
232
207
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
233
208
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.sel.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
234
209
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[TMP1]], i64 0)
235
-
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 2 x double>* [[RETVAL_COERCE]] to <8 x double>*
236
-
// CHECK-NEXT: store <8 x double> [[CASTFIXEDSVE]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16
237
-
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 2 x double>, <vscale x 2 x double>* [[RETVAL_COERCE]], align 16
238
-
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
210
+
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[CASTFIXEDSVE]], i64 0)
211
+
// CHECK-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
0 commit comments