Skip to content

Commit 28a20b4

Browse files
authored
[VectorCombine] Avoid inserting freeze when scalarizing extend-extract if all extracts would lead to UB on poison. (#164683)
This change aims to avoid inserting a freeze instruction between the load and bitcast when scalarizing extend-extract. This is particularly useful in combination with #164682, which can then potentially further scalarize, provided there is no freeze. alive2 proof: https://alive2.llvm.org/ce/z/W-GD88
1 parent 8cb0c0c commit 28a20b4

File tree

2 files changed

+211
-2
lines changed

2 files changed

+211
-2
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2017,8 +2017,31 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
20172017

20182018
Value *ScalarV = Ext->getOperand(0);
20192019
if (!isGuaranteedNotToBePoison(ScalarV, &AC, dyn_cast<Instruction>(ScalarV),
2020-
&DT))
2021-
ScalarV = Builder.CreateFreeze(ScalarV);
2020+
&DT)) {
2021+
// Check wether all lanes are extracted, all extracts trigger UB
2022+
// on poison, and the last extract (and hence all previous ones)
2023+
// are guaranteed to execute if Ext executes. If so, we do not
2024+
// need to insert a freeze.
2025+
SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2026+
bool AllExtractsTriggerUB = true;
2027+
ExtractElementInst *LastExtract = nullptr;
2028+
BasicBlock *ExtBB = Ext->getParent();
2029+
for (User *U : Ext->users()) {
2030+
auto *Extract = cast<ExtractElementInst>(U);
2031+
if (Extract->getParent() != ExtBB || !programUndefinedIfPoison(Extract)) {
2032+
AllExtractsTriggerUB = false;
2033+
break;
2034+
}
2035+
ExtractedLanes.insert(cast<ConstantInt>(Extract->getIndexOperand()));
2036+
if (!LastExtract || LastExtract->comesBefore(Extract))
2037+
LastExtract = Extract;
2038+
}
2039+
if (ExtractedLanes.size() != DstTy->getNumElements() ||
2040+
!AllExtractsTriggerUB ||
2041+
!isGuaranteedToTransferExecutionToSuccessor(Ext->getIterator(),
2042+
LastExtract->getIterator()))
2043+
ScalarV = Builder.CreateFreeze(ScalarV);
2044+
}
20222045
ScalarV = Builder.CreateBitCast(
20232046
ScalarV,
20242047
IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy)));

llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,3 +346,189 @@ entry:
346346
call void @use.i32(i32 %ext.3)
347347
ret void
348348
}
349+
350+
define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(<4 x i8> %src) {
351+
; CHECK-LABEL: define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(
352+
; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
353+
; CHECK-NEXT: [[ENTRY:.*:]]
354+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[SRC]] to i32
355+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 24
356+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0]], 16
357+
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 255
358+
; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP0]], 8
359+
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 255
360+
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 255
361+
; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
362+
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
363+
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
364+
; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
365+
; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
366+
; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP6]], [[TMP5]]
367+
; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP3]]
368+
; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP1]]
369+
; CHECK-NEXT: ret i32 [[ADD3]]
370+
;
371+
entry:
372+
%ext = zext nneg <4 x i8> %src to <4 x i32>
373+
%ext.0 = extractelement <4 x i32> %ext, i64 0
374+
%ext.1 = extractelement <4 x i32> %ext, i64 1
375+
%ext.2 = extractelement <4 x i32> %ext, i64 2
376+
%ext.3 = extractelement <4 x i32> %ext, i64 3
377+
378+
%add1 = add i32 %ext.0, %ext.1
379+
%add2 = add i32 %add1, %ext.2
380+
%add3 = add i32 %add2, %ext.3
381+
ret i32 %add3
382+
}
383+
384+
define noundef i32 @zext_v4i8_not_all_lanes_used(<4 x i8> %src) {
385+
; CHECK-LABEL: define noundef i32 @zext_v4i8_not_all_lanes_used(
386+
; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
387+
; CHECK-NEXT: [[ENTRY:.*:]]
388+
; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i8> [[SRC]]
389+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
390+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 24
391+
; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP0]], 8
392+
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 255
393+
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 255
394+
; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
395+
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
396+
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
397+
; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
398+
; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[TMP6]], [[TMP5]]
399+
; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP1]]
400+
; CHECK-NEXT: ret i32 [[ADD3]]
401+
;
402+
entry:
403+
%ext = zext nneg <4 x i8> %src to <4 x i32>
404+
%ext.0 = extractelement <4 x i32> %ext, i64 0
405+
%ext.1 = extractelement <4 x i32> %ext, i64 1
406+
%ext.3 = extractelement <4 x i32> %ext, i64 3
407+
408+
%add1 = add i32 %ext.0, %ext.1
409+
%add2 = add i32 %add1, %ext.3
410+
ret i32 %add2
411+
}
412+
413+
define i32 @zext_v4i8_all_lanes_used_no_ub(<4 x i8> %src) {
414+
; CHECK-LABEL: define i32 @zext_v4i8_all_lanes_used_no_ub(
415+
; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
416+
; CHECK-NEXT: [[ENTRY:.*:]]
417+
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
418+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
419+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
420+
; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
421+
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 255
422+
; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
423+
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 255
424+
; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP1]], 255
425+
; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
426+
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
427+
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
428+
; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
429+
; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
430+
; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
431+
; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP4]]
432+
; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP2]]
433+
; CHECK-NEXT: ret i32 [[ADD3]]
434+
;
435+
entry:
436+
%ext = zext nneg <4 x i8> %src to <4 x i32>
437+
%ext.0 = extractelement <4 x i32> %ext, i64 0
438+
%ext.1 = extractelement <4 x i32> %ext, i64 1
439+
%ext.2 = extractelement <4 x i32> %ext, i64 2
440+
%ext.3 = extractelement <4 x i32> %ext, i64 3
441+
442+
%add1 = add i32 %ext.0, %ext.1
443+
%add2 = add i32 %add1, %ext.2
444+
%add3 = add i32 %add2, %ext.3
445+
ret i32 %add3
446+
}
447+
448+
define noundef i32 @zext_v4i8_extracts_different_blocks(<4 x i8> %src, i1 %cond) {
449+
; CHECK-LABEL: define noundef i32 @zext_v4i8_extracts_different_blocks(
450+
; CHECK-SAME: <4 x i8> [[SRC:%.*]], i1 [[COND:%.*]]) {
451+
; CHECK-NEXT: [[ENTRY:.*:]]
452+
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
453+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
454+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
455+
; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
456+
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 255
457+
; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
458+
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 255
459+
; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP1]], 255
460+
; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
461+
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
462+
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
463+
; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
464+
; CHECK: [[THEN]]:
465+
; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
466+
; CHECK-NEXT: br label %[[EXIT:.*]]
467+
; CHECK: [[ELSE]]:
468+
; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
469+
; CHECK-NEXT: br label %[[EXIT]]
470+
; CHECK: [[EXIT]]:
471+
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP4]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ]
472+
; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
473+
; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[PHI]]
474+
; CHECK-NEXT: ret i32 [[ADD2]]
475+
;
476+
entry:
477+
%ext = zext nneg <4 x i8> %src to <4 x i32>
478+
%ext.0 = extractelement <4 x i32> %ext, i64 0
479+
%ext.1 = extractelement <4 x i32> %ext, i64 1
480+
br i1 %cond, label %then, label %else
481+
482+
then:
483+
%ext.2 = extractelement <4 x i32> %ext, i64 2
484+
br label %exit
485+
486+
else:
487+
%ext.3 = extractelement <4 x i32> %ext, i64 3
488+
br label %exit
489+
490+
exit:
491+
%phi = phi i32 [ %ext.2, %then ], [ %ext.3, %else ]
492+
%add1 = add i32 %ext.0, %ext.1
493+
%add2 = add i32 %add1, %phi
494+
ret i32 %add2
495+
}
496+
497+
498+
declare void @may_throw() willreturn
499+
500+
define noundef i32 @zext_v4i8_throwing_call_between(<4 x i8> %src) {
501+
; CHECK-LABEL: define noundef i32 @zext_v4i8_throwing_call_between(
502+
; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
503+
; CHECK-NEXT: [[ENTRY:.*:]]
504+
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
505+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
506+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
507+
; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
508+
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 255
509+
; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
510+
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 255
511+
; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP1]], 255
512+
; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
513+
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
514+
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
515+
; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
516+
; CHECK-NEXT: call void @may_throw()
517+
; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
518+
; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
519+
; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP4]]
520+
; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP2]]
521+
; CHECK-NEXT: ret i32 [[ADD3]]
522+
;
523+
entry:
524+
%ext = zext nneg <4 x i8> %src to <4 x i32>
525+
%ext.0 = extractelement <4 x i32> %ext, i64 0
526+
%ext.1 = extractelement <4 x i32> %ext, i64 1
527+
%ext.2 = extractelement <4 x i32> %ext, i64 2
528+
call void @may_throw()
529+
%ext.3 = extractelement <4 x i32> %ext, i64 3
530+
%add1 = add i32 %ext.0, %ext.1
531+
%add2 = add i32 %add1, %ext.2
532+
%add3 = add i32 %add2, %ext.3
533+
ret i32 %add3
534+
}

0 commit comments

Comments
 (0)