-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[X86][DAGCombiner][SelectionDAG] - Fold Zext Build Vector to Bitcast of widen Build Vector #135010
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
e5c1914
483c273
a48660c
ff669c6
e776464
9dc5117
dd28865
321b52e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,258 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 | ||
| ; RUN: llc -mcpu=znver5 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s | ||
|
||
|
|
||
| ; Function Attrs: nofree norecurse nosync nounwind memory(argmem: read) uwtable | ||
| define dso_local i32 @foov8i8(ptr nocapture noundef readonly %a, i64 noundef %a_stride, ptr nocapture noundef readonly %b) local_unnamed_addr { | ||
| ; CHECK-LABEL: foov8i8: | ||
| ; CHECK: # %bb.0: # %entry | ||
| ; CHECK-NEXT: movzbl (%rdi), %eax | ||
| ; CHECK-NEXT: leaq (%rsi,%rsi,2), %rcx | ||
| ; CHECK-NEXT: leaq (%rsi,%rsi,4), %r8 | ||
| ; CHECK-NEXT: leaq (,%rsi,8), %r9 | ||
| ; CHECK-NEXT: subq %rsi, %r9 | ||
| ; CHECK-NEXT: vmovd %eax, %xmm0 | ||
| ; CHECK-NEXT: vpinsrb $2, (%rdi,%rsi), %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpinsrb $4, (%rdi,%rsi,2), %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpinsrb $6, (%rdi,%rcx), %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpinsrb $8, (%rdi,%rsi,4), %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpinsrb $10, (%rdi,%r8), %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpinsrb $12, (%rdi,%rcx,2), %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpinsrb $14, (%rdi,%r9), %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpmaddwd (%rdx), %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] | ||
| ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] | ||
| ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vmovd %xmm0, %eax | ||
| ; CHECK-NEXT: retq | ||
| entry: | ||
| %var0 = load i8, ptr %a, align 1 | ||
| %arrayidx.1 = getelementptr inbounds i8, ptr %a, i64 %a_stride | ||
| %var1 = load i8, ptr %arrayidx.1, align 1 | ||
| %mul.2 = shl nsw i64 %a_stride, 1 | ||
| %arrayidx.2 = getelementptr inbounds i8, ptr %a, i64 %mul.2 | ||
| %var2 = load i8, ptr %arrayidx.2, align 1 | ||
| %mul.3 = mul nsw i64 %a_stride, 3 | ||
| %arrayidx.3 = getelementptr inbounds i8, ptr %a, i64 %mul.3 | ||
| %var3 = load i8, ptr %arrayidx.3, align 1 | ||
| %mul.4 = shl nsw i64 %a_stride, 2 | ||
| %arrayidx.4 = getelementptr inbounds i8, ptr %a, i64 %mul.4 | ||
| %var4 = load i8, ptr %arrayidx.4, align 1 | ||
| %mul.5 = mul nsw i64 %a_stride, 5 | ||
| %arrayidx.5 = getelementptr inbounds i8, ptr %a, i64 %mul.5 | ||
| %var5 = load i8, ptr %arrayidx.5, align 1 | ||
| %mul.6 = mul nsw i64 %a_stride, 6 | ||
| %arrayidx.6 = getelementptr inbounds i8, ptr %a, i64 %mul.6 | ||
| %var6 = load i8, ptr %arrayidx.6, align 1 | ||
| %mul.7 = mul nsw i64 %a_stride, 7 | ||
| %arrayidx.7 = getelementptr inbounds i8, ptr %a, i64 %mul.7 | ||
| %var7 = load i8, ptr %arrayidx.7, align 1 | ||
| %var8 = insertelement <8 x i8> poison, i8 %var0, i64 0 | ||
| %var9 = insertelement <8 x i8> %var8, i8 %var1, i64 1 | ||
| %var10 = insertelement <8 x i8> %var9, i8 %var2, i64 2 | ||
| %var11 = insertelement <8 x i8> %var10, i8 %var3, i64 3 | ||
| %var12 = insertelement <8 x i8> %var11, i8 %var4, i64 4 | ||
| %var13 = insertelement <8 x i8> %var12, i8 %var5, i64 5 | ||
| %var14 = insertelement <8 x i8> %var13, i8 %var6, i64 6 | ||
| %var15 = insertelement <8 x i8> %var14, i8 %var7, i64 7 | ||
| %var16 = zext <8 x i8> %var15 to <8 x i32> | ||
| %var17 = load <8 x i16>, ptr %b, align 2 | ||
| %var18 = sext <8 x i16> %var17 to <8 x i32> | ||
| %var19 = mul nsw <8 x i32> %var18, %var16 | ||
| %var20 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %var19) | ||
| ret i32 %var20 | ||
| } | ||
|
|
||
| ; Function Attrs: nofree norecurse nosync nounwind memory(argmem: read) uwtable | ||
| define dso_local i32 @foov4i8(ptr nocapture noundef readonly %a, i64 noundef %a_stride, ptr nocapture noundef readonly %b) local_unnamed_addr { | ||
| ; CHECK-LABEL: foov4i8: | ||
| ; CHECK: # %bb.0: # %entry | ||
| ; CHECK-NEXT: movzbl (%rdi), %eax | ||
| ; CHECK-NEXT: leaq (%rsi,%rsi,2), %rcx | ||
| ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero | ||
| ; CHECK-NEXT: vmovd %eax, %xmm0 | ||
| ; CHECK-NEXT: vpinsrb $4, (%rdi,%rsi), %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpinsrb $8, (%rdi,%rsi,2), %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpinsrb $12, (%rdi,%rcx), %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpmaddwd %xmm0, %xmm1, %xmm0 | ||
| ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] | ||
| ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] | ||
| ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vmovd %xmm0, %eax | ||
| ; CHECK-NEXT: retq | ||
| entry: | ||
| %var0 = load i8, ptr %a, align 1 | ||
| %arrayidx.1 = getelementptr inbounds i8, ptr %a, i64 %a_stride | ||
| %var1 = load i8, ptr %arrayidx.1, align 1 | ||
| %mul.2 = shl nsw i64 %a_stride, 1 | ||
| %arrayidx.2 = getelementptr inbounds i8, ptr %a, i64 %mul.2 | ||
| %var2 = load i8, ptr %arrayidx.2, align 1 | ||
| %mul.3 = mul nsw i64 %a_stride, 3 | ||
| %arrayidx.3 = getelementptr inbounds i8, ptr %a, i64 %mul.3 | ||
| %var3 = load i8, ptr %arrayidx.3, align 1 | ||
| %var8 = insertelement <4 x i8> poison, i8 %var0, i64 0 | ||
| %var9 = insertelement <4 x i8> %var8, i8 %var1, i64 1 | ||
| %var10 = insertelement <4 x i8> %var9, i8 %var2, i64 2 | ||
| %var11 = insertelement <4 x i8> %var10, i8 %var3, i64 3 | ||
| %var16 = zext <4 x i8> %var11 to <4 x i32> | ||
| %var17 = load <4 x i16>, ptr %b, align 2 | ||
| %var18 = sext <4 x i16> %var17 to <4 x i32> | ||
| %var19 = mul nsw <4 x i32> %var18, %var16 | ||
| %var20 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %var19) | ||
| ret i32 %var20 | ||
| } | ||
|
|
||
| ; Function Attrs: nofree norecurse nosync nounwind memory(argmem: read) uwtable | ||
| define dso_local i32 @foov2i8(ptr nocapture noundef readonly %a, i64 noundef %a_stride, ptr nocapture noundef readonly %b) local_unnamed_addr { | ||
| ; CHECK-LABEL: foov2i8: | ||
| ; CHECK: # %bb.0: | ||
| ; CHECK-NEXT: movzbl (%rdi), %eax | ||
| ; CHECK-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero | ||
| ; CHECK-NEXT: vmovd %eax, %xmm0 | ||
| ; CHECK-NEXT: vpinsrb $4, (%rdi,%rsi), %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero | ||
| ; CHECK-NEXT: vpmaddwd %xmm0, %xmm1, %xmm0 | ||
| ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] | ||
| ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vmovd %xmm0, %eax | ||
| ; CHECK-NEXT: retq | ||
| %var0 = load i8, ptr %a, align 1 | ||
| %arrayidx.1 = getelementptr inbounds i8, ptr %a, i64 %a_stride | ||
| %var1 = load i8, ptr %arrayidx.1, align 1 | ||
| %var8 = insertelement <2 x i8> poison, i8 %var0, i64 0 | ||
| %var9 = insertelement <2 x i8> %var8, i8 %var1, i64 1 | ||
| %var16 = zext <2 x i8> %var9 to <2 x i32> | ||
| %var17 = load <2 x i16>, ptr %b, align 2 | ||
| %var18 = sext <2 x i16> %var17 to <2 x i32> | ||
| %var19 = mul nsw <2 x i32> %var18, %var16 | ||
| %var20 = tail call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %var19) | ||
| ret i32 %var20 | ||
| } | ||
|
|
||
| ; Function Attrs: nofree norecurse nosync nounwind memory(argmem: read) uwtable | ||
| define dso_local i64 @foov2i8_v2i64(ptr nocapture noundef readonly %a, i64 noundef %a_stride, ptr nocapture noundef readonly %b) local_unnamed_addr { | ||
| ; CHECK-LABEL: foov2i8_v2i64: | ||
| ; CHECK: # %bb.0: | ||
| ; CHECK-NEXT: movzbl (%rdi), %eax | ||
| ; CHECK-NEXT: vpmovsxbq (%rdx), %xmm1 | ||
| ; CHECK-NEXT: vmovd %eax, %xmm0 | ||
| ; CHECK-NEXT: vpinsrb $8, (%rdi,%rsi), %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpmuldq %xmm0, %xmm1, %xmm0 | ||
| ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] | ||
| ; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vmovq %xmm0, %rax | ||
| ; CHECK-NEXT: retq | ||
| %var0 = load i8, ptr %a, align 1 | ||
| %arrayidx.1 = getelementptr inbounds i8, ptr %a, i64 %a_stride | ||
| %var1 = load i8, ptr %arrayidx.1, align 1 | ||
| %var8 = insertelement <2 x i8> poison, i8 %var0, i64 0 | ||
| %var9 = insertelement <2 x i8> %var8, i8 %var1, i64 1 | ||
| %var16 = zext <2 x i8> %var9 to <2 x i64> | ||
| %var17 = load <2 x i8>, ptr %b, align 2 | ||
| %var18 = sext <2 x i8> %var17 to <2 x i64> | ||
| %var19 = mul nsw <2 x i64> %var18, %var16 | ||
| %var20 = tail call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %var19) | ||
| ret i64 %var20 | ||
| } | ||
|
|
||
|
|
||
| ; Function Attrs: nofree norecurse nosync nounwind memory(argmem: read) uwtable | ||
| define dso_local i32 @foov4i16(ptr nocapture noundef readonly %a, i64 noundef %a_stride, ptr nocapture noundef readonly %b) local_unnamed_addr { | ||
| ; CHECK-LABEL: foov4i16: | ||
| ; CHECK: # %bb.0: # %entry | ||
| ; CHECK-NEXT: movzwl (%rdi), %eax | ||
| ; CHECK-NEXT: leaq (%rsi,%rsi,2), %rcx | ||
| ; CHECK-NEXT: vpmovsxwd (%rdx), %xmm1 | ||
| ; CHECK-NEXT: vmovd %eax, %xmm0 | ||
| ; CHECK-NEXT: vpinsrw $2, (%rdi,%rsi), %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpinsrw $4, (%rdi,%rsi,2), %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpinsrw $6, (%rdi,%rcx), %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpmulld %xmm0, %xmm1, %xmm0 | ||
| ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] | ||
| ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] | ||
| ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vmovd %xmm0, %eax | ||
| ; CHECK-NEXT: retq | ||
| entry: | ||
| %var0 = load i16, ptr %a, align 1 | ||
| %arrayidx.1 = getelementptr inbounds i8, ptr %a, i64 %a_stride | ||
| %var1 = load i16, ptr %arrayidx.1, align 1 | ||
| %mul.2 = shl nsw i64 %a_stride, 1 | ||
| %arrayidx.2 = getelementptr inbounds i8, ptr %a, i64 %mul.2 | ||
| %var2 = load i16, ptr %arrayidx.2, align 1 | ||
| %mul.3 = mul nsw i64 %a_stride, 3 | ||
| %arrayidx.3 = getelementptr inbounds i8, ptr %a, i64 %mul.3 | ||
| %var3 = load i16, ptr %arrayidx.3, align 1 | ||
| %var8 = insertelement <4 x i16> poison, i16 %var0, i64 0 | ||
| %var9 = insertelement <4 x i16> %var8, i16 %var1, i64 1 | ||
| %var10 = insertelement <4 x i16> %var9, i16 %var2, i64 2 | ||
| %var11 = insertelement <4 x i16> %var10, i16 %var3, i64 3 | ||
| %var16 = zext <4 x i16> %var11 to <4 x i32> | ||
| %var17 = load <4 x i16>, ptr %b, align 2 | ||
| %var18 = sext <4 x i16> %var17 to <4 x i32> | ||
| %var19 = mul nsw <4 x i32> %var18, %var16 | ||
| %var20 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %var19) | ||
| ret i32 %var20 | ||
| } | ||
|
|
||
| ; Function Attrs: nofree norecurse nosync nounwind memory(argmem: read) uwtable | ||
| define dso_local i32 @foov2i16(ptr nocapture noundef readonly %a, i64 noundef %a_stride, ptr nocapture noundef readonly %b) local_unnamed_addr { | ||
| ; CHECK-LABEL: foov2i16: | ||
| ; CHECK: # %bb.0: | ||
| ; CHECK-NEXT: movzwl (%rdi), %eax | ||
| ; CHECK-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero | ||
| ; CHECK-NEXT: vmovd %eax, %xmm0 | ||
| ; CHECK-NEXT: vpinsrw $1, (%rdi,%rsi), %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vpmovsxwd %xmm1, %xmm1 | ||
| ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero | ||
| ; CHECK-NEXT: vpmulld %xmm0, %xmm1, %xmm0 | ||
| ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] | ||
| ; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vmovd %xmm0, %eax | ||
| ; CHECK-NEXT: retq | ||
| %var0 = load i16, ptr %a, align 1 | ||
| %arrayidx.1 = getelementptr inbounds i8, ptr %a, i64 %a_stride | ||
| %var1 = load i16, ptr %arrayidx.1, align 1 | ||
| %var8 = insertelement <2 x i16> poison, i16 %var0, i64 0 | ||
| %var9 = insertelement <2 x i16> %var8, i16 %var1, i64 1 | ||
| %var16 = zext <2 x i16> %var9 to <2 x i32> | ||
| %var17 = load <2 x i16>, ptr %b, align 2 | ||
| %var18 = sext <2 x i16> %var17 to <2 x i32> | ||
| %var19 = mul nsw <2 x i32> %var18, %var16 | ||
| %var20 = tail call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %var19) | ||
| ret i32 %var20 | ||
| } | ||
|
|
||
| ; Function Attrs: nofree norecurse nosync nounwind memory(argmem: read) uwtable | ||
| define dso_local i64 @foov2i32(ptr nocapture noundef readonly %a, i64 noundef %a_stride, ptr nocapture noundef readonly %b) local_unnamed_addr { | ||
| ; CHECK-LABEL: foov2i32: | ||
| ; CHECK: # %bb.0: | ||
| ; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
| ; CHECK-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero | ||
| ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] | ||
| ; CHECK-NEXT: vpmovsxdq (%rdx), %xmm1 | ||
| ; CHECK-NEXT: vpmullq %xmm0, %xmm1, %xmm0 | ||
| ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] | ||
| ; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0 | ||
| ; CHECK-NEXT: vmovq %xmm0, %rax | ||
| ; CHECK-NEXT: retq | ||
| %var0 = load i32, ptr %a, align 1 | ||
| %arrayidx.1 = getelementptr inbounds i8, ptr %a, i64 %a_stride | ||
| %var1 = load i32, ptr %arrayidx.1, align 1 | ||
| %var8 = insertelement <2 x i32> poison, i32 %var0, i64 0 | ||
| %var9 = insertelement <2 x i32> %var8, i32 %var1, i64 1 | ||
| %var16 = zext <2 x i32> %var9 to <2 x i64> | ||
| %var17 = load <2 x i32>, ptr %b, align 2 | ||
| %var18 = sext <2 x i32> %var17 to <2 x i64> | ||
| %var19 = mul nsw <2 x i64> %var18, %var16 | ||
| %var20 = tail call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %var19) | ||
| ret i64 %var20 | ||
| } | ||
|
|
||
| ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) | ||
| declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) #1 | ||
| declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #1 | ||
| declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) #1 | ||
| declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) #1 | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
DAG format comment of the matched pattern
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@arsenm Add such comment like zext(build_vec) -> bitcast(build_vec). which visualize the transformation, right?