@@ -137,8 +137,8 @@ exit: ; preds = %exit.loopexit, %entry
137
137
ret i32 %res.0.lcssa
138
138
}
139
139
140
- define i32 @common_sext_different_types (ptr %a , ptr %b , ptr %c , i32 %N ) {
141
- ; CHECK-LABEL: define i32 @common_sext_different_types (
140
+ define i32 @common_sext_different_src_types (ptr %a , ptr %b , ptr %c , i32 %N ) {
141
+ ; CHECK-LABEL: define i32 @common_sext_different_src_types (
142
142
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) {
143
143
; CHECK-NEXT: [[ENTRY:.*:]]
144
144
; CHECK-NEXT: [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2
@@ -222,8 +222,9 @@ exit: ; preds = %exit.loopexit, %entry
222
222
%res.0.lcssa = phi i32 [ %add3 , %for.body ]
223
223
ret i32 %res.0.lcssa
224
224
}
225
- define i32 @common_zext_different_types (ptr %a , ptr %b , ptr %c , i32 %N ) {
226
- ; CHECK-LABEL: define i32 @common_zext_different_types(
225
+
226
+ define i32 @common_zext_different_src_types (ptr %a , ptr %b , ptr %c , i32 %N ) {
227
+ ; CHECK-LABEL: define i32 @common_zext_different_src_types(
227
228
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) {
228
229
; CHECK-NEXT: [[ENTRY:.*:]]
229
230
; CHECK-NEXT: [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2
@@ -307,3 +308,153 @@ exit: ; preds = %exit.loopexit, %entry
307
308
%res.0.lcssa = phi i32 [ %add3 , %for.body ]
308
309
ret i32 %res.0.lcssa
309
310
}
311
+
312
+ define i32 @common_sext_different_dest_types (ptr %a , ptr %b , ptr %c , i32 %N ) {
313
+ ; CHECK-LABEL: define i32 @common_sext_different_dest_types(
314
+ ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) {
315
+ ; CHECK-NEXT: [[ENTRY:.*:]]
316
+ ; CHECK-NEXT: [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2
317
+ ; CHECK-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
318
+ ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
319
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
320
+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
321
+ ; CHECK: [[VECTOR_PH]]:
322
+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
323
+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
324
+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
325
+ ; CHECK: [[VECTOR_BODY]]:
326
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
327
+ ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]]
328
+ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDEX]]
329
+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDEX]]
330
+ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i32 0
331
+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
332
+ ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i32 0
333
+ ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
334
+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i32 0
335
+ ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
336
+ ; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i8> [[WIDE_LOAD]] to <4 x i32>
337
+ ; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i8> [[WIDE_LOAD1]] to <4 x i32>
338
+ ; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[WIDE_LOAD1]] to <4 x i16>
339
+ ; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i8> [[WIDE_LOAD2]] to <4 x i16>
340
+ ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP7]]
341
+ ; CHECK-NEXT: [[TMP11:%.*]] = add nsw <4 x i16> [[TMP9]], [[TMP8]]
342
+ ; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32>
343
+ ; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP10]], [[TMP12]]
344
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
345
+ ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
346
+ ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
347
+ ; CHECK: [[MIDDLE_BLOCK]]:
348
+ ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP13]], i32 3
349
+ ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP13]], i32 3
350
+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
351
+ ; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
352
+ ; CHECK: [[SCALAR_PH]]:
353
+ ;
354
+ entry:
355
+ %cmp28.not = icmp ult i32 %N , 2
356
+ %div27 = lshr i32 %N , 1
357
+ %wide.trip.count = zext nneg i32 %div27 to i64
358
+ br label %for.body
359
+
360
+ for.body: ; preds = %for.body.preheader, %for.body
361
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
362
+ %res = phi i32 [ 0 , %entry ], [ %add3 , %for.body ]
363
+ %a.ptr = getelementptr inbounds nuw i8 , ptr %a , i64 %iv
364
+ %b.ptr = getelementptr inbounds nuw i8 , ptr %b , i64 %iv
365
+ %b.ptr2 = getelementptr inbounds nuw i16 , ptr %b , i64 %iv
366
+ %c.ptr = getelementptr inbounds nuw i8 , ptr %c , i64 %iv
367
+ %a.val = load i8 , ptr %a.ptr , align 1
368
+ %b.val = load i8 , ptr %b.ptr , align 1
369
+ %c.val = load i8 , ptr %c.ptr , align 1
370
+ %a.ext = sext i8 %a.val to i32
371
+ %b.ext = sext i8 %b.val to i32
372
+ %b.ext2 = sext i8 %b.val to i16
373
+ %c.ext = sext i8 %c.val to i16
374
+ %add = add nsw i32 %a.ext , %b.ext
375
+ %add2 = add nsw i16 %c.ext , %b.ext2
376
+ %add2.ext = sext i16 %add2 to i32
377
+ %add3 = add i32 %add , %add2.ext
378
+ %iv.next = add nuw nsw i64 %iv , 1
379
+ %exitcond = icmp eq i64 %iv.next , %wide.trip.count
380
+ br i1 %exitcond , label %exit , label %for.body
381
+
382
+ exit: ; preds = %exit.loopexit, %entry
383
+ %res.0.lcssa = phi i32 [ %add3 , %for.body ]
384
+ ret i32 %res.0.lcssa
385
+ }
386
+
387
+ define i32 @common_zext_different_dest_types (ptr %a , ptr %b , ptr %c , i32 %N ) {
388
+ ; CHECK-LABEL: define i32 @common_zext_different_dest_types(
389
+ ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) {
390
+ ; CHECK-NEXT: [[ENTRY:.*:]]
391
+ ; CHECK-NEXT: [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2
392
+ ; CHECK-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1
393
+ ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64
394
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
395
+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
396
+ ; CHECK: [[VECTOR_PH]]:
397
+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
398
+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
399
+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
400
+ ; CHECK: [[VECTOR_BODY]]:
401
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
402
+ ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]]
403
+ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDEX]]
404
+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDEX]]
405
+ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i32 0
406
+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
407
+ ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i32 0
408
+ ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
409
+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i32 0
410
+ ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
411
+ ; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32>
412
+ ; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i32>
413
+ ; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i16>
414
+ ; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i16>
415
+ ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP7]]
416
+ ; CHECK-NEXT: [[TMP11:%.*]] = add nsw <4 x i16> [[TMP9]], [[TMP8]]
417
+ ; CHECK-NEXT: [[TMP12:%.*]] = zext <4 x i16> [[TMP11]] to <4 x i32>
418
+ ; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP10]], [[TMP12]]
419
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
420
+ ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
421
+ ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
422
+ ; CHECK: [[MIDDLE_BLOCK]]:
423
+ ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP13]], i32 3
424
+ ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP13]], i32 3
425
+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
426
+ ; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
427
+ ; CHECK: [[SCALAR_PH]]:
428
+ ;
429
+ entry:
430
+ %cmp28.not = icmp ult i32 %N , 2
431
+ %div27 = lshr i32 %N , 1
432
+ %wide.trip.count = zext nneg i32 %div27 to i64
433
+ br label %for.body
434
+
435
+ for.body: ; preds = %for.body.preheader, %for.body
436
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
437
+ %res = phi i32 [ 0 , %entry ], [ %add3 , %for.body ]
438
+ %a.ptr = getelementptr inbounds nuw i8 , ptr %a , i64 %iv
439
+ %b.ptr = getelementptr inbounds nuw i8 , ptr %b , i64 %iv
440
+ %b.ptr2 = getelementptr inbounds nuw i16 , ptr %b , i64 %iv
441
+ %c.ptr = getelementptr inbounds nuw i8 , ptr %c , i64 %iv
442
+ %a.val = load i8 , ptr %a.ptr , align 1
443
+ %b.val = load i8 , ptr %b.ptr , align 1
444
+ %c.val = load i8 , ptr %c.ptr , align 1
445
+ %a.ext = zext i8 %a.val to i32
446
+ %b.ext = zext i8 %b.val to i32
447
+ %b.ext2 = zext i8 %b.val to i16
448
+ %c.ext = zext i8 %c.val to i16
449
+ %add = add nsw i32 %a.ext , %b.ext
450
+ %add2 = add nsw i16 %c.ext , %b.ext2
451
+ %add2.ext = zext i16 %add2 to i32
452
+ %add3 = add i32 %add , %add2.ext
453
+ %iv.next = add nuw nsw i64 %iv , 1
454
+ %exitcond = icmp eq i64 %iv.next , %wide.trip.count
455
+ br i1 %exitcond , label %exit , label %for.body
456
+
457
+ exit: ; preds = %exit.loopexit, %entry
458
+ %res.0.lcssa = phi i32 [ %add3 , %for.body ]
459
+ ret i32 %res.0.lcssa
460
+ }
0 commit comments