Skip to content

Commit 2326b31

Browse files
authored
[CIR][CIRGen][Builtin][X86] Lower avx512 scatter intrinsics (#1786)
1 parent 84b3939 commit 2326b31

File tree

3 files changed

+506
-2
lines changed

3 files changed

+506
-2
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 91 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -850,8 +850,97 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
850850
case X86::BI__builtin_ia32_scattersiv4sf:
851851
case X86::BI__builtin_ia32_scattersiv4si:
852852
case X86::BI__builtin_ia32_scattersiv8sf:
853-
case X86::BI__builtin_ia32_scattersiv8si:
854-
llvm_unreachable("scattersiv8df NYI");
853+
case X86::BI__builtin_ia32_scattersiv8si: {
854+
llvm::StringRef intrinsicName;
855+
switch (BuiltinID) {
856+
default:
857+
llvm_unreachable("Unexpected builtin");
858+
case X86::BI__builtin_ia32_scattersiv8df:
859+
intrinsicName = "x86.avx512.mask.scatter.dpd.512";
860+
break;
861+
case X86::BI__builtin_ia32_scattersiv16sf:
862+
intrinsicName = "x86.avx512.mask.scatter.dps.512";
863+
break;
864+
case X86::BI__builtin_ia32_scatterdiv8df:
865+
intrinsicName = "x86.avx512.mask.scatter.qpd.512";
866+
break;
867+
case X86::BI__builtin_ia32_scatterdiv16sf:
868+
intrinsicName = "x86.avx512.mask.scatter.qps.512";
869+
break;
870+
case X86::BI__builtin_ia32_scattersiv8di:
871+
intrinsicName = "x86.avx512.mask.scatter.dpq.512";
872+
break;
873+
case X86::BI__builtin_ia32_scattersiv16si:
874+
intrinsicName = "x86.avx512.mask.scatter.dpi.512";
875+
break;
876+
case X86::BI__builtin_ia32_scatterdiv8di:
877+
intrinsicName = "x86.avx512.mask.scatter.qpq.512";
878+
break;
879+
case X86::BI__builtin_ia32_scatterdiv16si:
880+
intrinsicName = "x86.avx512.mask.scatter.qpi.512";
881+
break;
882+
case X86::BI__builtin_ia32_scatterdiv2df:
883+
intrinsicName = "x86.avx512.mask.scatterdiv2.df";
884+
break;
885+
case X86::BI__builtin_ia32_scatterdiv2di:
886+
intrinsicName = "x86.avx512.mask.scatterdiv2.di";
887+
break;
888+
case X86::BI__builtin_ia32_scatterdiv4df:
889+
intrinsicName = "x86.avx512.mask.scatterdiv4.df";
890+
break;
891+
case X86::BI__builtin_ia32_scatterdiv4di:
892+
intrinsicName = "x86.avx512.mask.scatterdiv4.di";
893+
break;
894+
case X86::BI__builtin_ia32_scatterdiv4sf:
895+
intrinsicName = "x86.avx512.mask.scatterdiv4.sf";
896+
break;
897+
case X86::BI__builtin_ia32_scatterdiv4si:
898+
intrinsicName = "x86.avx512.mask.scatterdiv4.si";
899+
break;
900+
case X86::BI__builtin_ia32_scatterdiv8sf:
901+
intrinsicName = "x86.avx512.mask.scatterdiv8.sf";
902+
break;
903+
case X86::BI__builtin_ia32_scatterdiv8si:
904+
intrinsicName = "x86.avx512.mask.scatterdiv8.si";
905+
break;
906+
case X86::BI__builtin_ia32_scattersiv2df:
907+
intrinsicName = "x86.avx512.mask.scattersiv2.df";
908+
break;
909+
case X86::BI__builtin_ia32_scattersiv2di:
910+
intrinsicName = "x86.avx512.mask.scattersiv2.di";
911+
break;
912+
case X86::BI__builtin_ia32_scattersiv4df:
913+
intrinsicName = "x86.avx512.mask.scattersiv4.df";
914+
break;
915+
case X86::BI__builtin_ia32_scattersiv4di:
916+
intrinsicName = "x86.avx512.mask.scattersiv4.di";
917+
break;
918+
case X86::BI__builtin_ia32_scattersiv4sf:
919+
intrinsicName = "x86.avx512.mask.scattersiv4.sf";
920+
break;
921+
case X86::BI__builtin_ia32_scattersiv4si:
922+
intrinsicName = "x86.avx512.mask.scattersiv4.si";
923+
break;
924+
case X86::BI__builtin_ia32_scattersiv8sf:
925+
intrinsicName = "x86.avx512.mask.scattersiv8.sf";
926+
break;
927+
case X86::BI__builtin_ia32_scattersiv8si:
928+
intrinsicName = "x86.avx512.mask.scattersiv8.si";
929+
break;
930+
}
931+
932+
unsigned minElts =
933+
std::min(cast<cir::VectorType>(Ops[2].getType()).getSize(),
934+
cast<cir::VectorType>(Ops[3].getType()).getSize());
935+
Ops[1] = getMaskVecValue(*this, Ops[1], minElts, getLoc(E->getExprLoc()));
936+
937+
return builder
938+
.create<cir::LLVMIntrinsicCallOp>(
939+
getLoc(E->getExprLoc()), builder.getStringAttr(intrinsicName.str()),
940+
builder.getVoidTy(), Ops)
941+
.getResult();
942+
}
943+
855944
case X86::BI__builtin_ia32_vextractf128_pd256:
856945
case X86::BI__builtin_ia32_vextractf128_ps256:
857946
case X86::BI__builtin_ia32_vextractf128_si256:

clang/test/CIR/CodeGen/X86/avx512f-builtins.c

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,3 +507,130 @@ __m256i test_mm512_mask_i64gather_epi32(__m256i __v1_old, __mmask8 __mask, __m51
507507
// LLVM: @llvm.x86.avx512.mask.gather.qpi.512
508508
return _mm512_mask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2);
509509
}
510+
511+
512+
void test_mm512_i32scatter_pd(void *__addr, __m256i __index, __m512d __v1) {
513+
// CIR-LABEL: test_mm512_i32scatter_pd
514+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpd.512"
515+
516+
// LLVM-LABEL: test_mm512_i32scatter_pd
517+
// LLVM: @llvm.x86.avx512.mask.scatter.dpd.512
518+
return _mm512_i32scatter_pd(__addr, __index, __v1, 2);
519+
}
520+
521+
void test_mm512_mask_i32scatter_pd(void *__addr, __mmask8 __mask, __m256i __index, __m512d __v1) {
522+
// CIR-LABEL: test_mm512_mask_i32scatter_pd
523+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpd.512"
524+
525+
// LLVM-LABEL: test_mm512_mask_i32scatter_pd
526+
// LLVM: @llvm.x86.avx512.mask.scatter.dpd.512
527+
return _mm512_mask_i32scatter_pd(__addr, __mask, __index, __v1, 2);
528+
}
529+
530+
void test_mm512_i32scatter_ps(void *__addr, __m512i __index, __m512 __v1) {
531+
// CIR-LABEL: test_mm512_i32scatter_ps
532+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dps.512"
533+
534+
// LLVM-LABEL: test_mm512_i32scatter_ps
535+
// LLVM: @llvm.x86.avx512.mask.scatter.dps.512
536+
return _mm512_i32scatter_ps(__addr, __index, __v1, 2);
537+
}
538+
539+
void test_mm512_mask_i32scatter_ps(void *__addr, __mmask16 __mask, __m512i __index, __m512 __v1) {
540+
// CIR-LABEL: test_mm512_mask_i32scatter_ps
541+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dps.512"
542+
543+
// LLVM-LABEL: test_mm512_mask_i32scatter_ps
544+
// LLVM: @llvm.x86.avx512.mask.scatter.dps.512
545+
return _mm512_mask_i32scatter_ps(__addr, __mask, __index, __v1, 2);
546+
}
547+
548+
void test_mm512_i64scatter_pd(void *__addr, __m512i __index, __m512d __v1) {
549+
// CIR-LABEL: test_mm512_i64scatter_pd
550+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpd.512"
551+
552+
// LLVM-LABEL: test_mm512_i64scatter_pd
553+
// LLVM: @llvm.x86.avx512.mask.scatter.qpd.512
554+
return _mm512_i64scatter_pd(__addr, __index, __v1, 2);
555+
}
556+
557+
void test_mm512_mask_i64scatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) {
558+
// CIR-LABEL: test_mm512_mask_i64scatter_pd
559+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpd.512"
560+
561+
// LLVM-LABEL: test_mm512_mask_i64scatter_pd
562+
// LLVM: @llvm.x86.avx512.mask.scatter.qpd.512
563+
return _mm512_mask_i64scatter_pd(__addr, __mask, __index, __v1, 2);
564+
}
565+
566+
void test_mm512_i64scatter_ps(void *__addr, __m512i __index, __m256 __v1) {
567+
// CIR-LABEL: test_mm512_i64scatter_ps
568+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qps.512"
569+
570+
// LLVM-LABEL: test_mm512_i64scatter_ps
571+
// LLVM: @llvm.x86.avx512.mask.scatter.qps.512
572+
return _mm512_i64scatter_ps(__addr, __index, __v1, 2);
573+
}
574+
575+
void test_mm512_mask_i64scatter_ps(void *__addr, __mmask8 __mask, __m512i __index, __m256 __v1) {
576+
// CIR-LABEL: test_mm512_mask_i64scatter_ps
577+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qps.512"
578+
579+
// LLVM-LABEL: test_mm512_mask_i64scatter_ps
580+
// LLVM: @llvm.x86.avx512.mask.scatter.qps.512
581+
return _mm512_mask_i64scatter_ps(__addr, __mask, __index, __v1, 2);
582+
}
583+
584+
void test_mm512_i32scatter_epi32(void *__addr, __m512i __index, __m512i __v1) {
585+
// CIR-LABEL: test_mm512_i32scatter_epi32
586+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpi.512"
587+
588+
// LLVM-LABEL: test_mm512_i32scatter_epi32
589+
// LLVM: @llvm.x86.avx512.mask.scatter.dpi.512
590+
return _mm512_i32scatter_epi32(__addr, __index, __v1, 2);
591+
}
592+
593+
void test_mm512_mask_i32scatter_epi32(void *__addr, __mmask16 __mask, __m512i __index, __m512i __v1) {
594+
// CIR-LABEL: test_mm512_mask_i32scatter_epi32
595+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpi.512"
596+
597+
// LLVM-LABEL: test_mm512_mask_i32scatter_epi32
598+
// LLVM: @llvm.x86.avx512.mask.scatter.dpi.512
599+
return _mm512_mask_i32scatter_epi32(__addr, __mask, __index, __v1, 2);
600+
}
601+
602+
void test_mm512_i64scatter_epi64(void *__addr, __m512i __index, __m512i __v1) {
603+
// CIR-LABEL: test_mm512_i64scatter_epi64
604+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpq.512"
605+
606+
// LLVM-LABEL: test_mm512_i64scatter_epi64
607+
// LLVM: @llvm.x86.avx512.mask.scatter.qpq.512
608+
return _mm512_i64scatter_epi64(__addr, __index, __v1, 2);
609+
}
610+
611+
void test_mm512_mask_i64scatter_epi64(void *__addr, __mmask8 __mask, __m512i __index, __m512i __v1) {
612+
// CIR-LABEL: test_mm512_mask_i64scatter_epi64
613+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpq.512"
614+
615+
// LLVM-LABEL: test_mm512_mask_i64scatter_epi64
616+
// LLVM: @llvm.x86.avx512.mask.scatter.qpq.512
617+
return _mm512_mask_i64scatter_epi64(__addr, __mask, __index, __v1, 2);
618+
}
619+
620+
void test_mm512_i64scatter_epi32(void *__addr, __m512i __index, __m256i __v1) {
621+
// CIR-LABEL: test_mm512_i64scatter_epi32
622+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpi.512"
623+
624+
// LLVM-LABEL: test_mm512_i64scatter_epi32
625+
// LLVM: @llvm.x86.avx512.mask.scatter.qpi.512
626+
return _mm512_i64scatter_epi32(__addr, __index, __v1, 2);
627+
}
628+
629+
void test_mm512_mask_i64scatter_epi32(void *__addr, __mmask8 __mask, __m512i __index, __m256i __v1) {
630+
// CIR-LABEL: test_mm512_mask_i64scatter_epi32
631+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpi.512"
632+
633+
// LLVM-LABEL: test_mm512_mask_i64scatter_epi32
634+
// LLVM: @llvm.x86.avx512.mask.scatter.qpi.512
635+
return _mm512_mask_i64scatter_epi32(__addr, __mask, __index, __v1, 2);
636+
}

0 commit comments

Comments
 (0)