Skip to content

[CIR][CIRGen][Builtin][X86] Lower avx512 gather intrinsics #1785

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 89 additions & 2 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -710,8 +710,95 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_gathersiv8di:
case X86::BI__builtin_ia32_gathersiv16si:
case X86::BI__builtin_ia32_gatherdiv8di:
case X86::BI__builtin_ia32_gatherdiv16si:
llvm_unreachable("gather3div2df NYI");
case X86::BI__builtin_ia32_gatherdiv16si: {
StringRef intrinsicName;
switch (BuiltinID) {
default:
llvm_unreachable("Unexpected builtin");
case X86::BI__builtin_ia32_gather3div2df:
intrinsicName = "x86.avx512.mask.gather3div2.df";
break;
case X86::BI__builtin_ia32_gather3div2di:
intrinsicName = "x86.avx512.mask.gather3div2.di";
break;
case X86::BI__builtin_ia32_gather3div4df:
intrinsicName = "x86.avx512.mask.gather3div4.df";
break;
case X86::BI__builtin_ia32_gather3div4di:
intrinsicName = "x86.avx512.mask.gather3div4.di";
break;
case X86::BI__builtin_ia32_gather3div4sf:
intrinsicName = "x86.avx512.mask.gather3div4.sf";
break;
case X86::BI__builtin_ia32_gather3div4si:
intrinsicName = "x86.avx512.mask.gather3div4.si";
break;
case X86::BI__builtin_ia32_gather3div8sf:
intrinsicName = "x86.avx512.mask.gather3div8.sf";
break;
case X86::BI__builtin_ia32_gather3div8si:
intrinsicName = "x86.avx512.mask.gather3div8.si";
break;
case X86::BI__builtin_ia32_gather3siv2df:
intrinsicName = "x86.avx512.mask.gather3siv2.df";
break;
case X86::BI__builtin_ia32_gather3siv2di:
intrinsicName = "x86.avx512.mask.gather3siv2.di";
break;
case X86::BI__builtin_ia32_gather3siv4df:
intrinsicName = "x86.avx512.mask.gather3siv4.df";
break;
case X86::BI__builtin_ia32_gather3siv4di:
intrinsicName = "x86.avx512.mask.gather3siv4.di";
break;
case X86::BI__builtin_ia32_gather3siv4sf:
intrinsicName = "x86.avx512.mask.gather3siv4.sf";
break;
case X86::BI__builtin_ia32_gather3siv4si:
intrinsicName = "x86.avx512.mask.gather3siv4.si";
break;
case X86::BI__builtin_ia32_gather3siv8sf:
intrinsicName = "x86.avx512.mask.gather3siv8.sf";
break;
case X86::BI__builtin_ia32_gather3siv8si:
intrinsicName = "x86.avx512.mask.gather3siv8.si";
break;
case X86::BI__builtin_ia32_gathersiv8df:
intrinsicName = "x86.avx512.mask.gather.dpd.512";
break;
case X86::BI__builtin_ia32_gathersiv16sf:
intrinsicName = "x86.avx512.mask.gather.dps.512";
break;
case X86::BI__builtin_ia32_gatherdiv8df:
intrinsicName = "x86.avx512.mask.gather.qpd.512";
break;
case X86::BI__builtin_ia32_gatherdiv16sf:
intrinsicName = "x86.avx512.mask.gather.qps.512";
break;
case X86::BI__builtin_ia32_gathersiv8di:
intrinsicName = "x86.avx512.mask.gather.dpq.512";
break;
case X86::BI__builtin_ia32_gathersiv16si:
intrinsicName = "x86.avx512.mask.gather.dpi.512";
break;
case X86::BI__builtin_ia32_gatherdiv8di:
intrinsicName = "x86.avx512.mask.gather.qpq.512";
break;
case X86::BI__builtin_ia32_gatherdiv16si:
intrinsicName = "x86.avx512.mask.gather.qpi.512";
break;
}

unsigned minElts =
std::min(cast<cir::VectorType>(Ops[0].getType()).getSize(),
cast<cir::VectorType>(Ops[2].getType()).getSize());
Ops[3] = getMaskVecValue(*this, Ops[3], minElts, getLoc(E->getExprLoc()));
return builder
.create<cir::LLVMIntrinsicCallOp>(
getLoc(E->getExprLoc()), builder.getStringAttr(intrinsicName.str()),
convertType(E->getType()), Ops)
.getResult();
}
case X86::BI__builtin_ia32_scattersiv8df:
case X86::BI__builtin_ia32_scattersiv16sf:
case X86::BI__builtin_ia32_scatterdiv8df:
Expand Down
135 changes: 135 additions & 0 deletions clang/test/CIR/CodeGen/X86/avx512f-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,138 @@ __m512i test_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) {
// LLVM: @llvm.masked.expandload.v16i32(ptr %{{.*}}, <16 x i1> %{{.*}}, <16 x i32> %{{.*}})
return _mm512_maskz_expandloadu_epi32(__U, __P);
}

__m512d test_mm512_i32gather_pd(__m256i __index, void const *__addr) {
// CIR-LABEL: _mm512_i32gather_pd
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpd.512"

// LLVM-LABEL: test_mm512_i32gather_pd
// LLVM: @llvm.x86.avx512.mask.gather.dpd.512
return _mm512_i32gather_pd(__index, __addr, 2);
}

__m512d test_mm512_mask_i32gather_pd(__m512d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
// CIR-LABEL: _mm512_mask_i32gather_pd
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpd.512"

// LLVM-LABEL: test_mm512_mask_i32gather_pd
// LLVM: @llvm.x86.avx512.mask.gather.dpd.512
return _mm512_mask_i32gather_pd(__v1_old, __mask, __index, __addr, 2);
}

__m512 test_mm512_i32gather_ps(__m512i __index, void const *__addr) {
// CIR-LABEL: _mm512_i32gather_ps
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dps.512"

// LLVM-LABEL: test_mm512_i32gather_ps
// LLVM: @llvm.x86.avx512.mask.gather.dps.512
return _mm512_i32gather_ps(__index, __addr, 2);
}

__m512d test_mm512_i64gather_pd(__m512i __index, void const *__addr) {
// CIR-LABEL: _mm512_i64gather_pd
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpd.512"

// LLVM-LABEL: test_mm512_i64gather_pd
// CHECK: @llvm.x86.avx512.mask.gather.qpd.512
return _mm512_i64gather_pd(__index, __addr, 2);
}

__m512d test_mm512_mask_i64gather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
// CIR-LABEL: _mm512_mask_i64gather_pd
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpd.512"

// LLVM-LABEL: test_mm512_mask_i64gather_pd
// CHECK: @llvm.x86.avx512.mask.gather.qpd.512
return _mm512_mask_i64gather_pd(__v1_old, __mask, __index, __addr, 2);
}

__m256 test_mm512_i64gather_ps(__m512i __index, void const *__addr) {
// CIR-LABEL: _mm512_i64gather_ps
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qps.512"

// LLVM-LABEL: test_mm512_i64gather_ps
// LLVM: @llvm.x86.avx512.mask.gather.qps.512
return _mm512_i64gather_ps(__index, __addr, 2);
}

__m256 test_mm512_mask_i64gather_ps(__m256 __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
// CIR-LABEL: _mm512_mask_i64gather_ps
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qps.512"

// LLVM-LABEL: test_mm512_mask_i64gather_ps
// LLVM: @llvm.x86.avx512.mask.gather.qps.512
return _mm512_mask_i64gather_ps(__v1_old, __mask, __index, __addr, 2);
}

__m512i test_mm512_i32gather_epi64(__m256i __index, void const *__addr) {
// CIR-LABEL: _mm512_i32gather_epi64
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpq.512"

// LLVM-LABEL: test_mm512_i32gather_epi64
// LLVM: @llvm.x86.avx512.mask.gather.dpq.512
return _mm512_i32gather_epi64(__index, __addr, 2);
}

__m512i test_mm512_mask_i32gather_epi64(__m512i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
// CIR-LABEL: _mm512_mask_i32gather_epi64
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpq.512"

// LLVM-LABEL: test_mm512_mask_i32gather_epi64
// LLVM: @llvm.x86.avx512.mask.gather.dpq.512
return _mm512_mask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2);
}

__m512i test_mm512_i32gather_epi32(__m512i __index, void const *__addr) {
// CIR-LABEL: _mm512_i32gather_epi32
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpi.512"

// LLVM-LABEL: test_mm512_i32gather_epi32
// LLVM: @llvm.x86.avx512.mask.gather.dpi.512
return _mm512_i32gather_epi32(__index, __addr, 2);
}

__m512i test_mm512_mask_i32gather_epi32(__m512i __v1_old, __mmask16 __mask, __m512i __index, void const *__addr) {
// CIR-LABEL: _mm512_mask_i32gather_epi32
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpi.512"

// LLVM-LABEL: test_mm512_mask_i32gather_epi32
// LLVM: @llvm.x86.avx512.mask.gather.dpi.512
return _mm512_mask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2);
}

__m512i test_mm512_i64gather_epi64(__m512i __index, void const *__addr) {
// CIR-LABEL: _mm512_i64gather_epi64
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpq.512"

// LLVM-LABEL: test_mm512_i64gather_epi64
// LLVM: @llvm.x86.avx512.mask.gather.qpq.512
return _mm512_i64gather_epi64(__index, __addr, 2);
}

__m512i test_mm512_mask_i64gather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
// CIR-LABEL: _mm512_mask_i64gather_epi64
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpq.512"

// LLVM-LABEL: test_mm512_mask_i64gather_epi64
// LLVM: @llvm.x86.avx512.mask.gather.qpq.512
return _mm512_mask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2);
}

__m256i test_mm512_i64gather_epi32(__m512i __index, void const *__addr) {
// CIR-LABEL: _mm512_i64gather_epi32
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpi.512"

// LLVM-LABEL: test_mm512_i64gather_epi32
// LLVM: @llvm.x86.avx512.mask.gather.qpi.512
return _mm512_i64gather_epi32(__index, __addr, 2);
}

__m256i test_mm512_mask_i64gather_epi32(__m256i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
// CIR-LABEL: _mm512_mask_i64gather_epi32
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpi.512"

// LLVM-LABEL: test_mm512_mask_i64gather_epi32
// LLVM: @llvm.x86.avx512.mask.gather.qpi.512
return _mm512_mask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2);
}
144 changes: 144 additions & 0 deletions clang/test/CIR/CodeGen/X86/avx512vl-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -527,3 +527,147 @@ __m256i test_mm256_maskz_expandloadu_epi32(__mmask8 __U, void const *__P) {
// LLVM: @llvm.masked.expandload.v8i32(ptr %{{.*}}, <8 x i1> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_maskz_expandloadu_epi32(__U,__P);
}

__m128d test_mm_mmask_i64gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
// CIR-LABEL: test_mm_mmask_i64gather_pd
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div2.df"

// LLVM-LABEL: @test_mm_mmask_i64gather_pd
// LLVM: @llvm.x86.avx512.mask.gather3div2.df
return _mm_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2);
}

__m128i test_mm_mmask_i64gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
// CIR-LABEL: test_mm_mmask_i64gather_epi64
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div2.di"

// LLVM-LABEL: @test_mm_mmask_i64gather_epi64
// LLVM: @llvm.x86.avx512.mask.gather3div2.di
return _mm_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2);
}

__m256d test_mm256_mmask_i64gather_pd(__m256d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
// CIR-LABEL: test_mm256_mmask_i64gather_pd
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div4.df"

// LLVM-LABEL: @test_mm256_mmask_i64gather_pd
// LLVM: @llvm.x86.avx512.mask.gather3div4.df
return _mm256_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2);
}

__m256i test_mm256_mmask_i64gather_epi64(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
// CIR-LABEL: test_mm256_mmask_i64gather_epi64
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div4.di"

// LLVM-LABEL: @test_mm256_mmask_i64gather_epi64
// LLVM: @llvm.x86.avx512.mask.gather3div4.di
return _mm256_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2);
}

__m128 test_mm_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
// CIR-LABEL: test_mm_mmask_i64gather_ps
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div4.sf"

// LLVM-LABEL: @test_mm_mmask_i64gather_ps
// LLVM: @llvm.x86.avx512.mask.gather3div4.sf
return _mm_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2);
}

__m128i test_mm_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
// CIR-LABEL: test_mm_mmask_i64gather_epi32
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div4.si"

// LLVM-LABEL: @test_mm_mmask_i64gather_epi32
// LLVM: @llvm.x86.avx512.mask.gather3div4.si
return _mm_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2);
}

__m128 test_mm256_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
// CIR-LABEL: test_mm256_mmask_i64gather_ps
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div8.sf"

// LLVM-LABEL: @test_mm256_mmask_i64gather_ps
// LLVM: @llvm.x86.avx512.mask.gather3div8.sf
return _mm256_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2);
}

__m128i test_mm256_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
// CIR-LABEL: test_mm256_mmask_i64gather_epi32
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div8.si"

// LLVM-LABEL: @test_mm256_mmask_i64gather_epi32
// LLVM: @llvm.x86.avx512.mask.gather3div8.si
return _mm256_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2);
}

__m128d test_mm_mask_i32gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
// CIR-LABEL: test_mm_mask_i32gather_pd
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv2.df"

// LLVM-LABEL: @test_mm_mask_i32gather_pd
// LLVM: @llvm.x86.avx512.mask.gather3siv2.df
return _mm_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2);
}

__m128i test_mm_mask_i32gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
// CIR-LABEL: test_mm_mask_i32gather_epi64
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv2.di"

// LLVM-LABEL: @test_mm_mask_i32gather_epi64
// LLVM: @llvm.x86.avx512.mask.gather3siv2.di
return _mm_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2);
}

__m256d test_mm256_mask_i32gather_pd(__m256d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
// CIR-LABEL: test_mm256_mask_i32gather_pd
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv4.df"

// LLVM-LABEL: @test_mm256_mask_i32gather_pd
// LLVM: @llvm.x86.avx512.mask.gather3siv4.df
return _mm256_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2);
}

__m256i test_mm256_mask_i32gather_epi64(__m256i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
// CIR-LABEL: test_mm256_mask_i32gather_epi64
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv4.di"

// LLVM-LABEL: @test_mm256_mask_i32gather_epi64
// LLVM: @llvm.x86.avx512.mask.gather3siv4.di
return _mm256_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2);
}

__m128 test_mm_mask_i32gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
// CIR-LABEL: test_mm_mask_i32gather_ps
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv4.sf"

// LLVM-LABEL: @test_mm_mask_i32gather_ps
// LLVM: @llvm.x86.avx512.mask.gather3siv4.sf
return _mm_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2);
}

__m128i test_mm_mask_i32gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
// CIR-LABEL: test_mm_mask_i32gather_epi32
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv4.si"

// LLVM-LABEL: @test_mm_mask_i32gather_epi32
// LLVM: @llvm.x86.avx512.mask.gather3siv4.si
return _mm_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2);
}

__m256 test_mm256_mask_i32gather_ps(__m256 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
// CIR-LABEL: test_mm256_mask_i32gather_ps
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv8.sf"

// LLVM-LABEL: @test_mm256_mask_i32gather_ps
// LLVM: @llvm.x86.avx512.mask.gather3siv8.sf
return _mm256_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2);
}

__m256i test_mm256_mask_i32gather_epi32(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
// CIR-LABEL: test_mm256_mask_i32gather_epi32
// CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv8.si"

// LLVM-LABEL: @test_mm256_mask_i32gather_epi32
// LLVM: @llvm.x86.avx512.mask.gather3siv8.si
return _mm256_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2);
}
Loading