@@ -66,57 +66,150 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8
6666; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT14]]
6767; CHECK-NEXT: br i1 [[CONFLICT_RDX15]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
6868; CHECK: [[VECTOR_PH]]:
69- ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1 .i32(i32 0, i32 [[TMP2]])
69+ ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1 .i32(i32 0, i32 [[TMP2]])
7070; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
7171; CHECK: [[VECTOR_BODY]]:
72- ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE22:.*]] ]
73- ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE22]] ]
74- ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 4, i8 8, i8 12>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE22]] ]
75- ; CHECK-NEXT: [[TMP28:%.*]] = load i8, ptr [[SRC]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
76- ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[TMP28]], i64 0
77- ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
78- ; CHECK-NEXT: [[TMP25:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i64>
79- ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP25]], i32 0
72+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE46:.*]] ]
73+ ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE46]] ]
74+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i8> [ <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 32, i8 36, i8 40, i8 44, i8 48, i8 52, i8 56, i8 60>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE46]] ]
75+ ; CHECK-NEXT: [[TMP25:%.*]] = load i8, ptr [[SRC]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
76+ ; CHECK-NEXT: [[TMP29:%.*]] = zext i8 [[TMP25]] to i64
8077; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP29]], 1
81- ; CHECK-NEXT: [[TMP26 :%.*]] = zext <4 x i8> [[VEC_IND]] to <4 x i64>
82- ; CHECK-NEXT: [[TMP27 :%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
83- ; CHECK-NEXT: br i1 [[TMP27 ]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
78+ ; CHECK-NEXT: [[TMP27 :%.*]] = zext <16 x i8> [[VEC_IND]] to <16 x i64>
79+ ; CHECK-NEXT: [[TMP28 :%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 0
80+ ; CHECK-NEXT: br i1 [[TMP28 ]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
8481; CHECK: [[PRED_STORE_IF]]:
85- ; CHECK-NEXT: [[TMP102:%.*]] = extractelement <4 x i64> [[TMP26 ]], i32 0
82+ ; CHECK-NEXT: [[TMP102:%.*]] = extractelement <16 x i64> [[TMP27 ]], i32 0
8683; CHECK-NEXT: [[TMP103:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP102]], i64 [[OFF]]
8784; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP103]], align 8, !alias.scope [[META3]]
8885; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
8986; CHECK: [[PRED_STORE_CONTINUE]]:
90- ; CHECK-NEXT: [[TMP32 :%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
91- ; CHECK-NEXT: br i1 [[TMP32 ]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
87+ ; CHECK-NEXT: [[TMP31 :%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 1
88+ ; CHECK-NEXT: br i1 [[TMP31 ]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
9289; CHECK: [[PRED_STORE_IF17]]:
93- ; CHECK-NEXT: [[TMP108:%.*]] = extractelement <4 x i64> [[TMP26 ]], i32 1
90+ ; CHECK-NEXT: [[TMP108:%.*]] = extractelement <16 x i64> [[TMP27 ]], i32 1
9491; CHECK-NEXT: [[TMP109:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP108]], i64 [[OFF]]
9592; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP109]], align 8, !alias.scope [[META3]]
9693; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE18]]
9794; CHECK: [[PRED_STORE_CONTINUE18]]:
98- ; CHECK-NEXT: [[TMP37 :%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
99- ; CHECK-NEXT: br i1 [[TMP37 ]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
95+ ; CHECK-NEXT: [[TMP34 :%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 2
96+ ; CHECK-NEXT: br i1 [[TMP34 ]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
10097; CHECK: [[PRED_STORE_IF19]]:
101- ; CHECK-NEXT: [[TMP114:%.*]] = extractelement <4 x i64> [[TMP26 ]], i32 2
98+ ; CHECK-NEXT: [[TMP114:%.*]] = extractelement <16 x i64> [[TMP27 ]], i32 2
10299; CHECK-NEXT: [[TMP115:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP114]], i64 [[OFF]]
103100; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP115]], align 8, !alias.scope [[META3]]
104101; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]]
105102; CHECK: [[PRED_STORE_CONTINUE20]]:
106- ; CHECK-NEXT: [[TMP42 :%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
107- ; CHECK-NEXT: br i1 [[TMP42 ]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22]]
103+ ; CHECK-NEXT: [[TMP37 :%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 3
104+ ; CHECK-NEXT: br i1 [[TMP37 ]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.* ]]
108105; CHECK: [[PRED_STORE_IF21]]:
109- ; CHECK-NEXT: [[TMP120:%.*]] = extractelement <4 x i64> [[TMP26 ]], i32 3
106+ ; CHECK-NEXT: [[TMP120:%.*]] = extractelement <16 x i64> [[TMP27 ]], i32 3
110107; CHECK-NEXT: [[TMP121:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP120]], i64 [[OFF]]
111108; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP121]], align 8, !alias.scope [[META3]]
112109; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]]
113110; CHECK: [[PRED_STORE_CONTINUE22]]:
111+ ; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 4
112+ ; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
113+ ; CHECK: [[PRED_STORE_IF23]]:
114+ ; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i64> [[TMP27]], i32 4
115+ ; CHECK-NEXT: [[TMP42:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP41]], i64 [[OFF]]
116+ ; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP42]], align 8, !alias.scope [[META3]]
117+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]]
118+ ; CHECK: [[PRED_STORE_CONTINUE24]]:
119+ ; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 5
120+ ; CHECK-NEXT: br i1 [[TMP43]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
121+ ; CHECK: [[PRED_STORE_IF25]]:
122+ ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i64> [[TMP27]], i32 5
123+ ; CHECK-NEXT: [[TMP45:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP44]], i64 [[OFF]]
124+ ; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP45]], align 8, !alias.scope [[META3]]
125+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]]
126+ ; CHECK: [[PRED_STORE_CONTINUE26]]:
127+ ; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 6
128+ ; CHECK-NEXT: br i1 [[TMP46]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
129+ ; CHECK: [[PRED_STORE_IF27]]:
130+ ; CHECK-NEXT: [[TMP76:%.*]] = extractelement <16 x i64> [[TMP27]], i32 6
131+ ; CHECK-NEXT: [[TMP77:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP76]], i64 [[OFF]]
132+ ; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP77]], align 8, !alias.scope [[META3]]
133+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]]
134+ ; CHECK: [[PRED_STORE_CONTINUE28]]:
135+ ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 7
136+ ; CHECK-NEXT: br i1 [[TMP49]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
137+ ; CHECK: [[PRED_STORE_IF29]]:
138+ ; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i64> [[TMP27]], i32 7
139+ ; CHECK-NEXT: [[TMP51:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP50]], i64 [[OFF]]
140+ ; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP51]], align 8, !alias.scope [[META3]]
141+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE30]]
142+ ; CHECK: [[PRED_STORE_CONTINUE30]]:
143+ ; CHECK-NEXT: [[TMP52:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 8
144+ ; CHECK-NEXT: br i1 [[TMP52]], label %[[PRED_STORE_IF31:.*]], label %[[PRED_STORE_CONTINUE32:.*]]
145+ ; CHECK: [[PRED_STORE_IF31]]:
146+ ; CHECK-NEXT: [[TMP53:%.*]] = extractelement <16 x i64> [[TMP27]], i32 8
147+ ; CHECK-NEXT: [[TMP54:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP53]], i64 [[OFF]]
148+ ; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP54]], align 8, !alias.scope [[META3]]
149+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE32]]
150+ ; CHECK: [[PRED_STORE_CONTINUE32]]:
151+ ; CHECK-NEXT: [[TMP55:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 9
152+ ; CHECK-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF33:.*]], label %[[PRED_STORE_CONTINUE34:.*]]
153+ ; CHECK: [[PRED_STORE_IF33]]:
154+ ; CHECK-NEXT: [[TMP56:%.*]] = extractelement <16 x i64> [[TMP27]], i32 9
155+ ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP56]], i64 [[OFF]]
156+ ; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP57]], align 8, !alias.scope [[META3]]
157+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE34]]
158+ ; CHECK: [[PRED_STORE_CONTINUE34]]:
159+ ; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 10
160+ ; CHECK-NEXT: br i1 [[TMP58]], label %[[PRED_STORE_IF35:.*]], label %[[PRED_STORE_CONTINUE36:.*]]
161+ ; CHECK: [[PRED_STORE_IF35]]:
162+ ; CHECK-NEXT: [[TMP59:%.*]] = extractelement <16 x i64> [[TMP27]], i32 10
163+ ; CHECK-NEXT: [[TMP60:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP59]], i64 [[OFF]]
164+ ; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP60]], align 8, !alias.scope [[META3]]
165+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE36]]
166+ ; CHECK: [[PRED_STORE_CONTINUE36]]:
167+ ; CHECK-NEXT: [[TMP61:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 11
168+ ; CHECK-NEXT: br i1 [[TMP61]], label %[[PRED_STORE_IF37:.*]], label %[[PRED_STORE_CONTINUE38:.*]]
169+ ; CHECK: [[PRED_STORE_IF37]]:
170+ ; CHECK-NEXT: [[TMP62:%.*]] = extractelement <16 x i64> [[TMP27]], i32 11
171+ ; CHECK-NEXT: [[TMP63:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP62]], i64 [[OFF]]
172+ ; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP63]], align 8, !alias.scope [[META3]]
173+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE38]]
174+ ; CHECK: [[PRED_STORE_CONTINUE38]]:
175+ ; CHECK-NEXT: [[TMP64:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 12
176+ ; CHECK-NEXT: br i1 [[TMP64]], label %[[PRED_STORE_IF39:.*]], label %[[PRED_STORE_CONTINUE40:.*]]
177+ ; CHECK: [[PRED_STORE_IF39]]:
178+ ; CHECK-NEXT: [[TMP65:%.*]] = extractelement <16 x i64> [[TMP27]], i32 12
179+ ; CHECK-NEXT: [[TMP66:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP65]], i64 [[OFF]]
180+ ; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP66]], align 8, !alias.scope [[META3]]
181+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE40]]
182+ ; CHECK: [[PRED_STORE_CONTINUE40]]:
183+ ; CHECK-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 13
184+ ; CHECK-NEXT: br i1 [[TMP67]], label %[[PRED_STORE_IF41:.*]], label %[[PRED_STORE_CONTINUE42:.*]]
185+ ; CHECK: [[PRED_STORE_IF41]]:
186+ ; CHECK-NEXT: [[TMP68:%.*]] = extractelement <16 x i64> [[TMP27]], i32 13
187+ ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP68]], i64 [[OFF]]
188+ ; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP69]], align 8, !alias.scope [[META3]]
189+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE42]]
190+ ; CHECK: [[PRED_STORE_CONTINUE42]]:
191+ ; CHECK-NEXT: [[TMP70:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 14
192+ ; CHECK-NEXT: br i1 [[TMP70]], label %[[PRED_STORE_IF43:.*]], label %[[PRED_STORE_CONTINUE44:.*]]
193+ ; CHECK: [[PRED_STORE_IF43]]:
194+ ; CHECK-NEXT: [[TMP71:%.*]] = extractelement <16 x i64> [[TMP27]], i32 14
195+ ; CHECK-NEXT: [[TMP72:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP71]], i64 [[OFF]]
196+ ; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP72]], align 8, !alias.scope [[META3]]
197+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE44]]
198+ ; CHECK: [[PRED_STORE_CONTINUE44]]:
199+ ; CHECK-NEXT: [[TMP73:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 15
200+ ; CHECK-NEXT: br i1 [[TMP73]], label %[[PRED_STORE_IF45:.*]], label %[[PRED_STORE_CONTINUE46]]
201+ ; CHECK: [[PRED_STORE_IF45]]:
202+ ; CHECK-NEXT: [[TMP74:%.*]] = extractelement <16 x i64> [[TMP27]], i32 15
203+ ; CHECK-NEXT: [[TMP75:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP74]], i64 [[OFF]]
204+ ; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP75]], align 8, !alias.scope [[META3]]
205+ ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE46]]
206+ ; CHECK: [[PRED_STORE_CONTINUE46]]:
114207; CHECK-NEXT: store i8 0, ptr [[DST_2]], align 1, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
115- ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
116- ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1 .i32(i32 [[INDEX_NEXT]], i32 [[TMP2]])
117- ; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
208+ ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16
209+ ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1 .i32(i32 [[INDEX_NEXT]], i32 [[TMP2]])
210+ ; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
118211; CHECK-NEXT: [[TMP48:%.*]] = xor i1 [[TMP47]], true
119- ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 16 )
212+ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 64 )
120213; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
121214; CHECK: [[MIDDLE_BLOCK]]:
122215; CHECK-NEXT: br label %[[EXIT:.*]]
0 commit comments