diff --git a/clang/lib/CIR/CodeGen/ABIInfo.h b/clang/lib/CIR/CodeGen/ABIInfo.h index 4d03db38cabb9..ef9ade0a4a3ac 100644 --- a/clang/lib/CIR/CodeGen/ABIInfo.h +++ b/clang/lib/CIR/CodeGen/ABIInfo.h @@ -9,6 +9,9 @@ #ifndef LLVM_CLANG_LIB_CIR_ABIINFO_H #define LLVM_CLANG_LIB_CIR_ABIINFO_H +#include "clang/Basic/LangOptions.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" + namespace clang::CIRGen { class CIRGenFunctionInfo; @@ -23,6 +26,12 @@ class ABIInfo { ABIInfo(CIRGenTypes &cgt) : cgt(cgt) {} virtual ~ABIInfo(); + + /// Returns the optimal vector memory type based on the given vector type. For + /// example, on certain targets, a vector with 3 elements might be promoted to + /// one with 4 elements to improve performance. + virtual cir::VectorType + getOptimalVectorMemoryType(cir::VectorType ty, const LangOptions &opt) const; }; } // namespace clang::CIRGen diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index fa68ad931ba74..e40fa87adeec9 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -314,21 +314,36 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr, bool isInit, bool isNontemporal) { assert(!cir::MissingFeatures::opLoadStoreThreadLocal()); + mlir::Type srcTy = addr.getElementType(); if (const auto *clangVecTy = ty->getAs()) { - // Boolean vectors use `iN` as storage type. - if (clangVecTy->isExtVectorBoolType()) - cgm.errorNYI(addr.getPointer().getLoc(), - "emitStoreOfScalar ExtVectorBoolType"); + if (auto vecTy = dyn_cast(srcTy)) { + cir::VectorType newVecTy = + cgm.getTargetCIRGenInfo().getABIInfo().getOptimalVectorMemoryType( + vecTy, getLangOpts()); + + if (!clangVecTy->isPackedVectorBoolType(getContext()) && + vecTy != newVecTy) { + + const unsigned oldNumElements = vecTy.getSize(); + const unsigned newNumElements = newVecTy.getSize(); + SmallVector indices; + indices.reserve(newNumElements); + for (unsigned i = 0; i < newNumElements; ++i) { + int64_t value = i < oldNumElements ? (int64_t)i : -1; + indices.push_back(cir::IntAttr::get(builder.getSInt64Ty(), value)); + } - // Handle vectors of size 3 like size 4 for better performance. - const mlir::Type elementType = addr.getElementType(); - const auto vecTy = cast(elementType); + cir::ConstantOp poison = builder.getConstant( + value.getLoc(), cir::PoisonAttr::get(value.getType())); + value = + cir::VecShuffleOp::create(builder, value.getLoc(), newVecTy, value, + poison, builder.getArrayAttr(indices)); + srcTy = newVecTy; + } - // TODO(CIR): Use `ABIInfo::getOptimalVectorMemoryType` once it upstreamed - assert(!cir::MissingFeatures::cirgenABIInfo()); - if (vecTy.getSize() == 3 && !getLangOpts().PreserveVec3Type) - cgm.errorNYI(addr.getPointer().getLoc(), - "emitStoreOfScalar Vec3 & PreserveVec3Type disabled"); + if (addr.getElementType() != srcTy) + addr = addr.withElementType(builder, srcTy); + } } value = emitToMemory(value, ty); @@ -565,13 +580,31 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile, return nullptr; } - const auto vecTy = cast(eltTy); + // Handles vectors of sizes that are likely to be expanded to a larger size + // to optimize performance. + auto vecTy = cast(eltTy); + cir::VectorType newVecTy = + cgm.getTargetCIRGenInfo().getABIInfo().getOptimalVectorMemoryType( + vecTy, getLangOpts()); + + if (vecTy != newVecTy) { + Address cast = addr.withElementType(builder, newVecTy); + mlir::Value value = builder.createLoad(cgm.getLoc(loc), cast, isVolatile); + + unsigned oldNumElements = vecTy.getSize(); + SmallVector indices; + indices.reserve(oldNumElements); + for (unsigned i = 0; i < oldNumElements; i++) { + indices.push_back(cir::IntAttr::get(builder.getSInt64Ty(), i)); + } + + cir::ConstantOp poison = builder.getConstant( + value.getLoc(), cir::PoisonAttr::get(value.getType())); + value = builder.create( + cgm.getLoc(loc), vecTy, value, poison, builder.getArrayAttr(indices)); - // Handle vectors of size 3 like size 4 for better performance. - assert(!cir::MissingFeatures::cirgenABIInfo()); - if (vecTy.getSize() == 3 && !getLangOpts().PreserveVec3Type) - cgm.errorNYI(addr.getPointer().getLoc(), - "emitLoadOfScalar Vec3 & PreserveVec3Type disabled"); + return value; + } } assert(!cir::MissingFeatures::opLoadStoreTbaa()); diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp index 62a8c59abe604..252d9e9ad48bc 100644 --- a/clang/lib/CIR/CodeGen/TargetInfo.cpp +++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp @@ -60,6 +60,15 @@ clang::CIRGen::createX8664TargetCIRGenInfo(CIRGenTypes &cgt) { ABIInfo::~ABIInfo() noexcept = default; +cir::VectorType +ABIInfo::getOptimalVectorMemoryType(cir::VectorType ty, + const LangOptions &opt) const { + if (ty.getSize() == 3 && !opt.PreserveVec3Type) { + return cir::VectorType::get(ty.getElementType(), 4); + } + return ty; +} + bool TargetCIRGenInfo::isNoProtoCallVariadic( const FunctionNoProtoType *fnType) const { // The following conventions are known to require this to be false: diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 58ef500446aa7..f4aab4fc6c5be 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -321,6 +321,12 @@ static LogicalResult checkConstantTypes(mlir::Operation *op, mlir::Type opType, "zero expects struct, array, vector, or complex type"); } + if (isa(attrType)) { + if (!::mlir::isa(opType)) + return success(); + return op->emitOpError("poison expects non-void type"); + } + if (mlir::isa(attrType)) { if (!mlir::isa(opType)) return op->emitOpError("result type (") @@ -2123,23 +2129,25 @@ OpFoldResult cir::VecShuffleOp::fold(FoldAdaptor adaptor) { if (!vec1Attr || !vec2Attr) return {}; - mlir::Type vec1ElemTy = - mlir::cast(vec1Attr.getType()).getElementType(); + mlir::ArrayAttr indicesElts = adaptor.getIndices(); + auto indicesEltsRange = indicesElts.getAsRange(); + + // In MLIR DenseElementsAttr can't contain undef attr, so we can't fold + // the shuffle op to ConstVector if it's contain index with -1 value + if (std::find_if(indicesEltsRange.begin(), indicesEltsRange.end(), + [](cir::IntAttr idx) { return idx.getSInt() != -1; }) != + indicesEltsRange.end()) { + return {}; + } mlir::ArrayAttr vec1Elts = vec1Attr.getElts(); mlir::ArrayAttr vec2Elts = vec2Attr.getElts(); - mlir::ArrayAttr indicesElts = adaptor.getIndices(); SmallVector elements; elements.reserve(indicesElts.size()); uint64_t vec1Size = vec1Elts.size(); - for (const auto &idxAttr : indicesElts.getAsRange()) { - if (idxAttr.getSInt() == -1) { - elements.push_back(cir::UndefAttr::get(vec1ElemTy)); - continue; - } - + for (const auto &idxAttr : indicesEltsRange) { uint64_t idxValue = idxAttr.getUInt(); elements.push_back(idxValue < vec1Size ? vec1Elts[idxValue] : vec2Elts[idxValue - vec1Size]); diff --git a/clang/test/CIR/CodeGen/vector-with-size-3.cpp b/clang/test/CIR/CodeGen/vector-with-size-3.cpp new file mode 100644 index 0000000000000..2108d3cefb8c5 --- /dev/null +++ b/clang/test/CIR/CodeGen/vector-with-size-3.cpp @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG + +typedef int vi3 __attribute__((ext_vector_type(3))); + +void store_load() { + vi3 a; + vi3 b = a; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<3 x !s32i>, !cir.ptr>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<3 x !s32i>, !cir.ptr>, ["b", init] +// CIR: %[[A_V4:.*]] = cir.cast(bitcast, %[[A_ADDR]] : !cir.ptr>), !cir.ptr> +// CIR: %[[TMP_A_V4:.*]] = cir.load{{.*}} %[[A_V4]] : !cir.ptr>, !cir.vector<4 x !s32i> +// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<4 x !s32i> +// CIR: %[[SHUFFLE_V4:.*]] = cir.vec.shuffle(%3, %[[POISON]] : !cir.vector<4 x !s32i>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i] : !cir.vector<3 x !s32i> +// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<3 x !s32i> +// CIR: %[[SHUFFLE_V3:.*]] = cir.vec.shuffle(%[[SHUFFLE_V4]], %[[POISON]] : !cir.vector<3 x !s32i>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<-1> : !s64i] : !cir.vector<4 x !s32i> +// CIR: %[[TMP_B_V4:.*]] = cir.cast(bitcast, %[[B_ADDR]] : !cir.ptr>), !cir.ptr> +// CIR: cir.store{{.*}} %[[SHUFFLE_V3]], %[[TMP_B_V4]] : !cir.vector<4 x !s32i>, !cir.ptr> + +// LLVM: %[[A_ADDR:.*]] = alloca <3 x i32>, i64 1, align 16 +// LLVM: %[[B_ADDR:.*]] = alloca <3 x i32>, i64 1, align 16 +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// LLVM: %[[SHUFFLE_V4:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <3 x i32> +// LLVM: %[[SHUFFLE_V3:.*]] = shufflevector <3 x i32> %[[SHUFFLE_V4]], <3 x i32> poison, <4 x i32> +// LLVM: store <4 x i32> %[[SHUFFLE_V3]], ptr %[[B_ADDR]], align 16 + +// OGCG: %[[A_ADDR:.*]] = alloca <3 x i32>, align 16 +// OGCG: %[[B_ADDR:.*]] = alloca <3 x i32>, align 16 +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// OGCG: %[[SHUFFLE_V4:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <3 x i32> +// OGCG: %[[SHUFFLE_V3:.*]] = shufflevector <3 x i32> %[[SHUFFLE_V4]], <3 x i32> poison, <4 x i32> +// OGCG: store <4 x i32> %[[SHUFFLE_V3]], ptr %[[B_ADDR]], align 16