Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions clang/lib/CIR/CodeGen/ABIInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#ifndef LLVM_CLANG_LIB_CIR_ABIINFO_H
#define LLVM_CLANG_LIB_CIR_ABIINFO_H

#include "clang/Basic/LangOptions.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"

namespace clang::CIRGen {

class CIRGenFunctionInfo;
Expand All @@ -23,6 +26,12 @@ class ABIInfo {
ABIInfo(CIRGenTypes &cgt) : cgt(cgt) {}

virtual ~ABIInfo();

/// Returns the optimal vector memory type based on the given vector type. For
/// example, on certain targets, a vector with 3 elements might be promoted to
/// one with 4 elements to improve performance.
virtual cir::VectorType
getOptimalVectorMemoryType(cir::VectorType ty, const LangOptions &opt) const;
};

} // namespace clang::CIRGen
Expand Down
69 changes: 51 additions & 18 deletions clang/lib/CIR/CodeGen/CIRGenExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,21 +314,36 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr,
bool isInit, bool isNontemporal) {
assert(!cir::MissingFeatures::opLoadStoreThreadLocal());

mlir::Type srcTy = addr.getElementType();
if (const auto *clangVecTy = ty->getAs<clang::VectorType>()) {
// Boolean vectors use `iN` as storage type.
if (clangVecTy->isExtVectorBoolType())
cgm.errorNYI(addr.getPointer().getLoc(),
"emitStoreOfScalar ExtVectorBoolType");
if (auto vecTy = dyn_cast<cir::VectorType>(srcTy)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At some point we may need to check that this is not a scalable vector type. Can you add a missing feature assert for scalable vectors here?

cir::VectorType newVecTy =
cgm.getTargetCIRGenInfo().getABIInfo().getOptimalVectorMemoryType(
vecTy, getLangOpts());

if (!clangVecTy->isPackedVectorBoolType(getContext()) &&
vecTy != newVecTy) {

const unsigned oldNumElements = vecTy.getSize();
const unsigned newNumElements = newVecTy.getSize();
SmallVector<mlir::Attribute, 8> indices;
indices.reserve(newNumElements);
for (unsigned i = 0; i < newNumElements; ++i) {
int64_t value = i < oldNumElements ? (int64_t)i : -1;
indices.push_back(cir::IntAttr::get(builder.getSInt64Ty(), value));
}

// Handle vectors of size 3 like size 4 for better performance.
const mlir::Type elementType = addr.getElementType();
const auto vecTy = cast<cir::VectorType>(elementType);
cir::ConstantOp poison = builder.getConstant(
value.getLoc(), cir::PoisonAttr::get(value.getType()));
value =
cir::VecShuffleOp::create(builder, value.getLoc(), newVecTy, value,
poison, builder.getArrayAttr(indices));
srcTy = newVecTy;
}

// TODO(CIR): Use `ABIInfo::getOptimalVectorMemoryType` once it upstreamed
assert(!cir::MissingFeatures::cirgenABIInfo());
if (vecTy.getSize() == 3 && !getLangOpts().PreserveVec3Type)
cgm.errorNYI(addr.getPointer().getLoc(),
"emitStoreOfScalar Vec3 & PreserveVec3Type disabled");
if (addr.getElementType() != srcTy)
addr = addr.withElementType(builder, srcTy);
}
}

value = emitToMemory(value, ty);
Expand Down Expand Up @@ -565,13 +580,31 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile,
return nullptr;
}

const auto vecTy = cast<cir::VectorType>(eltTy);
// Handles vectors of sizes that are likely to be expanded to a larger size
// to optimize performance.
auto vecTy = cast<cir::VectorType>(eltTy);
cir::VectorType newVecTy =
cgm.getTargetCIRGenInfo().getABIInfo().getOptimalVectorMemoryType(
vecTy, getLangOpts());

if (vecTy != newVecTy) {
Address cast = addr.withElementType(builder, newVecTy);
mlir::Value value = builder.createLoad(cgm.getLoc(loc), cast, isVolatile);

unsigned oldNumElements = vecTy.getSize();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you assert that newVecTy->getSize() > oldNumElements?

SmallVector<mlir::Attribute, 8> indices;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
SmallVector<mlir::Attribute, 8> indices;
SmallVector<mlir::Attribute> indices;

indices.reserve(oldNumElements);
for (unsigned i = 0; i < oldNumElements; i++) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need for braces here.

indices.push_back(cir::IntAttr::get(builder.getSInt64Ty(), i));
}

cir::ConstantOp poison = builder.getConstant(
value.getLoc(), cir::PoisonAttr::get(value.getType()));
value = builder.create<cir::VecShuffleOp>(
cgm.getLoc(loc), vecTy, value, poison, builder.getArrayAttr(indices));

// Handle vectors of size 3 like size 4 for better performance.
assert(!cir::MissingFeatures::cirgenABIInfo());
if (vecTy.getSize() == 3 && !getLangOpts().PreserveVec3Type)
cgm.errorNYI(addr.getPointer().getLoc(),
"emitLoadOfScalar Vec3 & PreserveVec3Type disabled");
return value;
}
}

assert(!cir::MissingFeatures::opLoadStoreTbaa());
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/CIR/CodeGen/TargetInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,15 @@ clang::CIRGen::createX8664TargetCIRGenInfo(CIRGenTypes &cgt) {

ABIInfo::~ABIInfo() noexcept = default;

cir::VectorType
ABIInfo::getOptimalVectorMemoryType(cir::VectorType ty,
const LangOptions &opt) const {
if (ty.getSize() == 3 && !opt.PreserveVec3Type) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need for braces here

return cir::VectorType::get(ty.getElementType(), 4);
}
return ty;
}

bool TargetCIRGenInfo::isNoProtoCallVariadic(
const FunctionNoProtoType *fnType) const {
// The following conventions are known to require this to be false:
Expand Down
26 changes: 17 additions & 9 deletions clang/lib/CIR/Dialect/IR/CIRDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,12 @@ static LogicalResult checkConstantTypes(mlir::Operation *op, mlir::Type opType,
"zero expects struct, array, vector, or complex type");
}

if (isa<cir::PoisonAttr>(attrType)) {
if (!::mlir::isa<cir::VoidType>(opType))
return success();
return op->emitOpError("poison expects non-void type");
}

if (mlir::isa<cir::BoolAttr>(attrType)) {
if (!mlir::isa<cir::BoolType>(opType))
return op->emitOpError("result type (")
Expand Down Expand Up @@ -2123,23 +2129,25 @@ OpFoldResult cir::VecShuffleOp::fold(FoldAdaptor adaptor) {
if (!vec1Attr || !vec2Attr)
return {};

mlir::Type vec1ElemTy =
mlir::cast<cir::VectorType>(vec1Attr.getType()).getElementType();
mlir::ArrayAttr indicesElts = adaptor.getIndices();
auto indicesEltsRange = indicesElts.getAsRange<cir::IntAttr>();

// In MLIR DenseElementsAttr can't contain undef attr, so we can't fold
// the shuffle op to ConstVector if it's contain index with -1 value
if (std::find_if(indicesEltsRange.begin(), indicesEltsRange.end(),
[](cir::IntAttr idx) { return idx.getSInt() != -1; }) !=
indicesEltsRange.end()) {
return {};
}

mlir::ArrayAttr vec1Elts = vec1Attr.getElts();
mlir::ArrayAttr vec2Elts = vec2Attr.getElts();
mlir::ArrayAttr indicesElts = adaptor.getIndices();

SmallVector<mlir::Attribute, 16> elements;
elements.reserve(indicesElts.size());

uint64_t vec1Size = vec1Elts.size();
for (const auto &idxAttr : indicesElts.getAsRange<cir::IntAttr>()) {
if (idxAttr.getSInt() == -1) {
elements.push_back(cir::UndefAttr::get(vec1ElemTy));
continue;
}

for (const auto &idxAttr : indicesEltsRange) {
uint64_t idxValue = idxAttr.getUInt();
elements.push_back(idxValue < vec1Size ? vec1Elts[idxValue]
: vec2Elts[idxValue - vec1Size]);
Expand Down
38 changes: 38 additions & 0 deletions clang/test/CIR/CodeGen/vector-with-size-3.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG

typedef int vi3 __attribute__((ext_vector_type(3)));

void store_load() {
vi3 a;
vi3 b = a;
}

// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<3 x !s32i>, !cir.ptr<!cir.vector<3 x !s32i>>, ["a"]
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<3 x !s32i>, !cir.ptr<!cir.vector<3 x !s32i>>, ["b", init]
// CIR: %[[A_V4:.*]] = cir.cast(bitcast, %[[A_ADDR]] : !cir.ptr<!cir.vector<3 x !s32i>>), !cir.ptr<!cir.vector<4 x !s32i>>
// CIR: %[[TMP_A_V4:.*]] = cir.load{{.*}} %[[A_V4]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
Comment on lines +17 to +18
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What guarantees that this isn't reading beyond the memory allocated for A_ADDR? In practice, it will be true, but it feels like static analysis should flag this as suspicious behavior. If we're loading 4xi32, it seems like we should also be reserving memory for that type.

// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<4 x !s32i>
// CIR: %[[SHUFFLE_V4:.*]] = cir.vec.shuffle(%3, %[[POISON]] : !cir.vector<4 x !s32i>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i] : !cir.vector<3 x !s32i>
// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<3 x !s32i>
// CIR: %[[SHUFFLE_V3:.*]] = cir.vec.shuffle(%[[SHUFFLE_V4]], %[[POISON]] : !cir.vector<3 x !s32i>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<-1> : !s64i] : !cir.vector<4 x !s32i>
// CIR: %[[TMP_B_V4:.*]] = cir.cast(bitcast, %[[B_ADDR]] : !cir.ptr<!cir.vector<3 x !s32i>>), !cir.ptr<!cir.vector<4 x !s32i>>
// CIR: cir.store{{.*}} %[[SHUFFLE_V3]], %[[TMP_B_V4]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>

// LLVM: %[[A_ADDR:.*]] = alloca <3 x i32>, i64 1, align 16
// LLVM: %[[B_ADDR:.*]] = alloca <3 x i32>, i64 1, align 16
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
// LLVM: %[[SHUFFLE_V4:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
// LLVM: %[[SHUFFLE_V3:.*]] = shufflevector <3 x i32> %[[SHUFFLE_V4]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// LLVM: store <4 x i32> %[[SHUFFLE_V3]], ptr %[[B_ADDR]], align 16

// OGCG: %[[A_ADDR:.*]] = alloca <3 x i32>, align 16
// OGCG: %[[B_ADDR:.*]] = alloca <3 x i32>, align 16
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
// OGCG: %[[SHUFFLE_V4:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
// OGCG: %[[SHUFFLE_V3:.*]] = shufflevector <3 x i32> %[[SHUFFLE_V4]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// OGCG: store <4 x i32> %[[SHUFFLE_V3]], ptr %[[B_ADDR]], align 16
Loading