Skip to content
42 changes: 42 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@

#include "CIRGenFunction.h"
#include "CIRGenModule.h"
#include "mlir/IR/Location.h"
#include "mlir/IR/ValueRange.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"
#include "clang/CIR/MissingFeatures.h"

using namespace clang;
Expand Down Expand Up @@ -85,6 +88,41 @@ static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, mlir::Location loc,
return maskVec;
}

static mlir::Value emitX86FunnelShift(CIRGenFunction &cgf,
mlir::Location location, mlir::Value &op0,
mlir::Value &op1, mlir::Value &amt,
bool isRight) {
CIRGenBuilderTy &builder = cgf.getBuilder();
mlir::Type op0Ty = op0.getType();

// Amount may be scalar immediate, in which case create a splat vector.
// Funnel shifts amounts are treated as modulo and types are all power-of-2
// so we only care about the lowest log2 bits anyway.
if (amt.getType() != op0Ty) {
auto vecTy = mlir::cast<cir::VectorType>(op0Ty);
uint64_t numElems = vecTy.getSize();

auto amtTy = mlir::cast<cir::IntType>(amt.getType());
auto vecElemTy = mlir::cast<cir::IntType>(vecTy.getElementType());

// Cast to same width unsigned if not already unsigned.
if (amtTy.isSigned()) {
cir::IntType unsignedAmtTy = builder.getUIntNTy(amtTy.getWidth());
amt = builder.createIntCast(amt, unsignedAmtTy);
}
// Cast the unsigned `amt` to operand element type's width unsigned.
cir::IntType unsignedVecElemType = builder.getUIntNTy(vecElemTy.getWidth());
amt = builder.createIntCast(amt, unsignedVecElemType);
amt = cir::VecSplatOp::create(
builder, location, cir::VectorType::get(unsignedVecElemType, numElems),
amt);
}

const std::string intrinsicName = isRight ? "fshr" : "fshl";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
const std::string intrinsicName = isRight ? "fshr" : "fshl";
const StringRef intrinsicName = isRight ? "fshr" : "fshl";

return emitIntrinsicCallOp(cgf.getBuilder(), location, intrinsicName, op0Ty,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return emitIntrinsicCallOp(cgf.getBuilder(), location, intrinsicName, op0Ty,
return emitIntrinsicCallOp(builder, location, intrinsicName, op0Ty,

mlir::ValueRange{op0, op1, amt});
}

mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
const CallExpr *expr) {
if (builtinID == Builtin::BI__builtin_cpu_is) {
Expand Down Expand Up @@ -661,12 +699,16 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_prolq128:
case X86::BI__builtin_ia32_prolq256:
case X86::BI__builtin_ia32_prolq512:
return emitX86FunnelShift(*this, getLoc(expr->getExprLoc()), ops[0], ops[0],
ops[1], false);
case X86::BI__builtin_ia32_prord128:
case X86::BI__builtin_ia32_prord256:
case X86::BI__builtin_ia32_prord512:
case X86::BI__builtin_ia32_prorq128:
case X86::BI__builtin_ia32_prorq256:
case X86::BI__builtin_ia32_prorq512:
return emitX86FunnelShift(*this, getLoc(expr->getExprLoc()), ops[0], ops[0],
ops[1], true);
case X86::BI__builtin_ia32_selectb_128:
case X86::BI__builtin_ia32_selectb_256:
case X86::BI__builtin_ia32_selectb_512:
Expand Down
33 changes: 33 additions & 0 deletions clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,36 @@ __m512i test_mm512_undefined_epi32(void) {
// OGCG: ret <8 x i64> zeroinitializer
return _mm512_undefined_epi32();
}

__m512i test_mm512_ror_epi32(__m512i __A) {
// CIR-LABEL: test_mm512_ror_epi32
// CIR: {{%.*}} = cir.cast integral {{%.*}} : !s32i -> !u32i
// CIR: {{%.*}} = cir.vec.splat {{%.*}} : !u32i, !cir.vector<16 x !u32i>
// CIR: {{%.*}} = cir.call_llvm_intrinsic "fshr" {{%.*}}: (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>

// LLVM-LABEL: test_mm512_ror_epi32
// LLVM: %[[CASTED_VAR:.*]] = bitcast <8 x i64> {{%.*}} to <16 x i32>
// LLVM: {{%.*}} = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %[[CASTED_VAR]], <16 x i32> %[[CASTED_VAR]], <16 x i32> splat (i32 5))

// OGCG-LABEL: test_mm512_ror_epi32
// OGCG: %[[CASTED_VAR:.*]] = bitcast <8 x i64> {{%.*}} to <16 x i32>
// OGCG: {{%.*}} = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %[[CASTED_VAR]], <16 x i32> %[[CASTED_VAR]], <16 x i32> splat (i32 5))
return _mm512_ror_epi32(__A, 5);
}

__m512i test_mm512_ror_epi64(__m512i __A) {
// CIR-LABEL: test_mm512_ror_epi64
// CIR: {{%.*}} = cir.cast integral {{%.*}} : !s32i -> !u32i
// CIR: {{%.*}} = cir.cast integral {{%.*}} : !u32i -> !u64i
// CIR: {{%.*}} = cir.vec.splat {{%.*}} : !u64i, !cir.vector<8 x !u64i>
// CIR: {{%.*}} = cir.call_llvm_intrinsic "fshr" {{%.*}}: (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>

// LLVM-LABEL: test_mm512_ror_epi64
// LLVM: %[[VAR:.*]] = load <8 x i64>, ptr {{%.*}}, align 64
// LLVM: {{%.*}} = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %[[VAR]], <8 x i64> %[[VAR]], <8 x i64> splat (i64 5))

// OGCG-LABEL: test_mm512_ror_epi64
// OGCG: %[[VAR:.*]] = load <8 x i64>, ptr {{%.*}}, align 64
// OGCG: {{%.*}} = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %[[VAR]], <8 x i64> %[[VAR]], <8 x i64> splat (i64 5))
return _mm512_ror_epi64(__A, 5);
}
84 changes: 84 additions & 0 deletions clang/test/CIR/CodeGenBuiltins/X86/xop-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -emit-cir -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-cir -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s

// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -fclangir -emit-llvm -o %t.ll
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -fclangir -emit-llvm -o %t.ll
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -emit-cir -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-cir -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s

// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -fclangir -emit-llvm -o %t.ll
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -fclangir -emit-llvm -o %t.ll
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG

#include <x86intrin.h>

// This test mimics clang/test/CodeGen/X86/xop-builtins.c, which eventually
// CIR shall be able to support fully.

__m128i test_mm_roti_epi8(__m128i a) {
// CIR-LABEL: test_mm_roti_epi8
// CIR: {{%.*}} = cir.vec.splat {{%.*}} : !{{[us]}}8i, !cir.vector<16 x !{{[us]}}8i>
// CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<16 x !{{[su]}}8i>, !cir.vector<16 x !{{[su]}}8i>, !cir.vector<16 x !{{[su]}}8i>) -> !cir.vector<16 x !{{[su]}}8i>
// LLVM-LABEL: test_mm_roti_epi8
// LLVM: %[[CASTED_VAR:.*]] = bitcast <2 x i64> {{%.*}} to <16 x i8>
// LLVM: {{%.*}} = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %[[CASTED_VAR]], <16 x i8> %[[CASTED_VAR]], <16 x i8> splat (i8 1))
// OGCG-LABEL: test_mm_roti_epi8
// OGCG: %[[CASTED_VAR:.*]] = bitcast <2 x i64> {{%.*}} to <16 x i8>
// OGCG: {{%.*}} = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %[[CASTED_VAR]], <16 x i8> %[[CASTED_VAR]], <16 x i8> splat (i8 1))
return _mm_roti_epi8(a, 1);
}

__m128i test_mm_roti_epi16(__m128i a) {
// CIR-LABEL: test_mm_roti_epi16
// CIR: {{%.*}} = cir.cast integral {{%.*}} : !u8i -> !u16i
// CIR: {{%.*}} = cir.vec.splat {{%.*}} : !{{[us]}}16i, !cir.vector<8 x !u16i>
// CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<8 x !{{[su]}}16i>, !cir.vector<8 x !{{[su]}}16i>, !cir.vector<8 x !u16i>) -> !cir.vector<8 x !{{[su]}}16i>
// LLVM-LABEL: test_mm_roti_epi16
// LLVM: %[[CASTED_VAR:.*]] = bitcast <2 x i64> {{%.*}} to <8 x i16>
// LLVM: {{%.*}} = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %[[CASTED_VAR]], <8 x i16> %[[CASTED_VAR]], <8 x i16> splat (i16 50))
// OGCG-LABEL: test_mm_roti_epi16
// OGCG: %[[CASTED_VAR:.*]] = bitcast <2 x i64> {{%.*}} to <8 x i16>
// OGCG: {{%.*}} = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %[[CASTED_VAR]], <8 x i16> %[[CASTED_VAR]], <8 x i16> splat (i16 50))
return _mm_roti_epi16(a, 50);
}

__m128i test_mm_roti_epi32(__m128i a) {
// CIR-LABEL: test_mm_roti_epi32
// CIR: {{%.*}} = cir.cast integral {{%.*}} : !u8i -> !u32i
// CIR: {{%.*}} = cir.vec.splat {{%.*}} : !{{[us]}}32i, !cir.vector<4 x !u32i>
// CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<4 x !{{[su]}}32i>, !cir.vector<4 x !{{[su]}}32i>, !cir.vector<4 x !u32i>) -> !cir.vector<4 x !{{[su]}}32i>
// LLVM-LABEL: test_mm_roti_epi32
// LLVM: %[[CASTED_VAR:.*]] = bitcast <2 x i64> {{%.*}} to <4 x i32>
// LLVM: {{%.*}} = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %[[CASTED_VAR]], <4 x i32> %[[CASTED_VAR]], <4 x i32> splat (i32 226))
// OGCG-LABEL: test_mm_roti_epi32
// OGCG: %[[CASTED_VAR:.*]] = bitcast <2 x i64> {{%.*}} to <4 x i32>
// OGCG: {{%.*}} = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %[[CASTED_VAR]], <4 x i32> %[[CASTED_VAR]], <4 x i32> splat (i32 226))
return _mm_roti_epi32(a, -30);
}

__m128i test_mm_roti_epi64(__m128i a) {
// CIR-LABEL: test_mm_roti_epi64
// CIR: {{%.*}} = cir.cast integral {{%.*}} : !u8i -> !u64i
// CIR: {{%.*}} = cir.vec.splat {{%.*}} : !u64i, !cir.vector<2 x !u64i>
// CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<2 x !{{[su]}}64i>, !cir.vector<2 x !{{[su]}}64i>, !cir.vector<2 x !u64i>) -> !cir.vector<2 x !s64i>
// LLVM-LABEL: test_mm_roti_epi64
// LLVM: %[[VAR:.*]] = load <2 x i64>, ptr {{%.*}}, align 16
// LLVM: {{%.*}} = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %[[VAR]], <2 x i64> %[[VAR]], <2 x i64> splat (i64 100))
// OGCG-LABEL: test_mm_roti_epi64
// OGCG: %[[VAR:.*]] = load <2 x i64>, ptr {{%.*}}, align 16
// OGCG: {{%.*}} = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %[[VAR]], <2 x i64> %[[VAR]], <2 x i64> splat (i64 100))
return _mm_roti_epi64(a, 100);
}