Skip to content

Commit a4e0574

Browse files
[DAG] Enhance canCreateUndefOrPoison for VECTOR_COMPRESS and add tests for freeze elimination
1 parent 97988f6 commit a4e0574

File tree

2 files changed

+42
-1
lines changed

2 files changed

+42
-1
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5705,7 +5705,9 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
57055705
case ISD::VECTOR_COMPRESS:
57065706
// Return true only if undef is checked and at least one element is
57075707
// demanded.
5708-
return !PoisonOnly && !DemandedElts.isZero();
5708+
if (Op.getOperand(2).isUndef())
5709+
return !PoisonOnly && !DemandedElts.isZero();
5710+
return false;
57095711

57105712
default:
57115713
// Allow the target to implement this method for its nodes.
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl | FileCheck %s
3+
4+
declare <16 x i32> @llvm.experimental.vector.compress.v16i32(<16 x i32>, <16 x i1>, <16 x i32>)
5+
6+
define <16 x i32> @test_compress_freeze_elimination(<16 x i32> %a0, <16 x i32> %a1, <16 x i8> %a3) {
7+
; CHECK-LABEL: test_compress_freeze_elimination:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
10+
; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
11+
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12+
; CHECK-NEXT: vpcompressd %zmm1, %zmm0 {%k1}
13+
; CHECK-NEXT: retq
14+
%cmp = icmp sgt <16 x i32> %a0, %a1
15+
%ext = zext <16 x i8> %a3 to <16 x i32>
16+
%cpr = call <16 x i32> @llvm.experimental.vector.compress.v16i32(<16 x i32> %ext, <16 x i1> %cmp, <16 x i32> splat(i32 15))
17+
%fr = freeze <16 x i32> %cpr
18+
%and = and <16 x i32> %fr, splat(i32 255)
19+
ret <16 x i32> %and
20+
}
21+
22+
; Test 2: Negative Case (Optimization must NOT happen)
23+
; PassThru is 'undef', so compress *can* generate undefs.
24+
; The freeze must remain to define those lanes.
25+
define <16 x i32> @test_compress_freeze_must_remain(<16 x i32> %a0, <16 x i32> %a1, <16 x i8> %a3) {
26+
; CHECK-LABEL: test_compress_freeze_must_remain:
27+
; CHECK: # %bb.0:
28+
; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
29+
; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
30+
; CHECK-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
31+
; CHECK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
32+
; CHECK-NEXT: retq
33+
%cmp = icmp sgt <16 x i32> %a0, %a1
34+
%ext = zext <16 x i8> %a3 to <16 x i32>
35+
%cpr = call <16 x i32> @llvm.experimental.vector.compress.v16i32(<16 x i32> %ext, <16 x i1> %cmp, <16 x i32> undef)
36+
%fr = freeze <16 x i32> %cpr
37+
%and = and <16 x i32> %fr, splat(i32 255)
38+
ret <16 x i32> %and
39+
}

0 commit comments

Comments
 (0)