-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[CIR] Implement handling for VectorType with size 3 #161232
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -314,21 +314,36 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr, | |||||
bool isInit, bool isNontemporal) { | ||||||
assert(!cir::MissingFeatures::opLoadStoreThreadLocal()); | ||||||
|
||||||
mlir::Type srcTy = addr.getElementType(); | ||||||
if (const auto *clangVecTy = ty->getAs<clang::VectorType>()) { | ||||||
// Boolean vectors use `iN` as storage type. | ||||||
if (clangVecTy->isExtVectorBoolType()) | ||||||
cgm.errorNYI(addr.getPointer().getLoc(), | ||||||
"emitStoreOfScalar ExtVectorBoolType"); | ||||||
if (auto vecTy = dyn_cast<cir::VectorType>(srcTy)) { | ||||||
cir::VectorType newVecTy = | ||||||
cgm.getTargetCIRGenInfo().getABIInfo().getOptimalVectorMemoryType( | ||||||
vecTy, getLangOpts()); | ||||||
|
||||||
if (!clangVecTy->isPackedVectorBoolType(getContext()) && | ||||||
vecTy != newVecTy) { | ||||||
|
||||||
const unsigned oldNumElements = vecTy.getSize(); | ||||||
const unsigned newNumElements = newVecTy.getSize(); | ||||||
SmallVector<mlir::Attribute, 8> indices; | ||||||
indices.reserve(newNumElements); | ||||||
for (unsigned i = 0; i < newNumElements; ++i) { | ||||||
int64_t value = i < oldNumElements ? (int64_t)i : -1; | ||||||
indices.push_back(cir::IntAttr::get(builder.getSInt64Ty(), value)); | ||||||
} | ||||||
|
||||||
// Handle vectors of size 3 like size 4 for better performance. | ||||||
const mlir::Type elementType = addr.getElementType(); | ||||||
const auto vecTy = cast<cir::VectorType>(elementType); | ||||||
cir::ConstantOp poison = builder.getConstant( | ||||||
value.getLoc(), cir::PoisonAttr::get(value.getType())); | ||||||
value = | ||||||
cir::VecShuffleOp::create(builder, value.getLoc(), newVecTy, value, | ||||||
poison, builder.getArrayAttr(indices)); | ||||||
srcTy = newVecTy; | ||||||
} | ||||||
|
||||||
// TODO(CIR): Use `ABIInfo::getOptimalVectorMemoryType` once it upstreamed | ||||||
assert(!cir::MissingFeatures::cirgenABIInfo()); | ||||||
if (vecTy.getSize() == 3 && !getLangOpts().PreserveVec3Type) | ||||||
cgm.errorNYI(addr.getPointer().getLoc(), | ||||||
"emitStoreOfScalar Vec3 & PreserveVec3Type disabled"); | ||||||
if (addr.getElementType() != srcTy) | ||||||
addr = addr.withElementType(builder, srcTy); | ||||||
} | ||||||
} | ||||||
|
||||||
value = emitToMemory(value, ty); | ||||||
|
@@ -565,13 +580,31 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile, | |||||
return nullptr; | ||||||
} | ||||||
|
||||||
const auto vecTy = cast<cir::VectorType>(eltTy); | ||||||
// Handles vectors of sizes that are likely to be expanded to a larger size | ||||||
// to optimize performance. | ||||||
auto vecTy = cast<cir::VectorType>(eltTy); | ||||||
cir::VectorType newVecTy = | ||||||
cgm.getTargetCIRGenInfo().getABIInfo().getOptimalVectorMemoryType( | ||||||
vecTy, getLangOpts()); | ||||||
|
||||||
if (vecTy != newVecTy) { | ||||||
Address cast = addr.withElementType(builder, newVecTy); | ||||||
mlir::Value value = builder.createLoad(cgm.getLoc(loc), cast, isVolatile); | ||||||
|
||||||
unsigned oldNumElements = vecTy.getSize(); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you assert that |
||||||
SmallVector<mlir::Attribute, 8> indices; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
indices.reserve(oldNumElements); | ||||||
for (unsigned i = 0; i < oldNumElements; i++) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need for braces here. |
||||||
indices.push_back(cir::IntAttr::get(builder.getSInt64Ty(), i)); | ||||||
} | ||||||
|
||||||
cir::ConstantOp poison = builder.getConstant( | ||||||
value.getLoc(), cir::PoisonAttr::get(value.getType())); | ||||||
value = builder.create<cir::VecShuffleOp>( | ||||||
cgm.getLoc(loc), vecTy, value, poison, builder.getArrayAttr(indices)); | ||||||
|
||||||
// Handle vectors of size 3 like size 4 for better performance. | ||||||
assert(!cir::MissingFeatures::cirgenABIInfo()); | ||||||
if (vecTy.getSize() == 3 && !getLangOpts().PreserveVec3Type) | ||||||
cgm.errorNYI(addr.getPointer().getLoc(), | ||||||
"emitLoadOfScalar Vec3 & PreserveVec3Type disabled"); | ||||||
return value; | ||||||
} | ||||||
} | ||||||
|
||||||
assert(!cir::MissingFeatures::opLoadStoreTbaa()); | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,6 +60,15 @@ clang::CIRGen::createX8664TargetCIRGenInfo(CIRGenTypes &cgt) { | |
|
||
ABIInfo::~ABIInfo() noexcept = default; | ||
|
||
cir::VectorType | ||
ABIInfo::getOptimalVectorMemoryType(cir::VectorType ty, | ||
const LangOptions &opt) const { | ||
if (ty.getSize() == 3 && !opt.PreserveVec3Type) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need for braces here |
||
return cir::VectorType::get(ty.getElementType(), 4); | ||
} | ||
return ty; | ||
} | ||
|
||
bool TargetCIRGenInfo::isNoProtoCallVariadic( | ||
const FunctionNoProtoType *fnType) const { | ||
// The following conventions are known to require this to be false: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir | ||
// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR | ||
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll | ||
// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM | ||
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll | ||
// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG | ||
|
||
typedef int vi3 __attribute__((ext_vector_type(3))); | ||
|
||
void store_load() { | ||
vi3 a; | ||
vi3 b = a; | ||
} | ||
|
||
// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<3 x !s32i>, !cir.ptr<!cir.vector<3 x !s32i>>, ["a"] | ||
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<3 x !s32i>, !cir.ptr<!cir.vector<3 x !s32i>>, ["b", init] | ||
// CIR: %[[A_V4:.*]] = cir.cast(bitcast, %[[A_ADDR]] : !cir.ptr<!cir.vector<3 x !s32i>>), !cir.ptr<!cir.vector<4 x !s32i>> | ||
// CIR: %[[TMP_A_V4:.*]] = cir.load{{.*}} %[[A_V4]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> | ||
Comment on lines
+17
to
+18
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What guarantees that this isn't reading beyond the memory allocated for A_ADDR? In practice, it will be true, but it feels like static analysis should flag this as suspicious behavior. If we're loading 4xi32, it seems like we should also be reserving memory for that type. |
||
// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<4 x !s32i> | ||
// CIR: %[[SHUFFLE_V4:.*]] = cir.vec.shuffle(%3, %[[POISON]] : !cir.vector<4 x !s32i>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i] : !cir.vector<3 x !s32i> | ||
// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<3 x !s32i> | ||
// CIR: %[[SHUFFLE_V3:.*]] = cir.vec.shuffle(%[[SHUFFLE_V4]], %[[POISON]] : !cir.vector<3 x !s32i>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<-1> : !s64i] : !cir.vector<4 x !s32i> | ||
// CIR: %[[TMP_B_V4:.*]] = cir.cast(bitcast, %[[B_ADDR]] : !cir.ptr<!cir.vector<3 x !s32i>>), !cir.ptr<!cir.vector<4 x !s32i>> | ||
// CIR: cir.store{{.*}} %[[SHUFFLE_V3]], %[[TMP_B_V4]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>> | ||
|
||
// LLVM: %[[A_ADDR:.*]] = alloca <3 x i32>, i64 1, align 16 | ||
// LLVM: %[[B_ADDR:.*]] = alloca <3 x i32>, i64 1, align 16 | ||
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 | ||
// LLVM: %[[SHUFFLE_V4:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2> | ||
// LLVM: %[[SHUFFLE_V3:.*]] = shufflevector <3 x i32> %[[SHUFFLE_V4]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> | ||
// LLVM: store <4 x i32> %[[SHUFFLE_V3]], ptr %[[B_ADDR]], align 16 | ||
|
||
// OGCG: %[[A_ADDR:.*]] = alloca <3 x i32>, align 16 | ||
// OGCG: %[[B_ADDR:.*]] = alloca <3 x i32>, align 16 | ||
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 | ||
// OGCG: %[[SHUFFLE_V4:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2> | ||
// OGCG: %[[SHUFFLE_V3:.*]] = shufflevector <3 x i32> %[[SHUFFLE_V4]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> | ||
// OGCG: store <4 x i32> %[[SHUFFLE_V3]], ptr %[[B_ADDR]], align 16 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At some point we may need to check that this is not a scalable vector type. Can you add a missing feature assert for scalable vectors here?