Skip to content

Commit 36692aa

Browse files
authored
[CIR] Implement Logical OR for VectorType (#158668)
This change adds support for local OR op for VectorType Issue #136487
1 parent 50b9ca4 commit 36692aa

File tree

3 files changed

+95
-3
lines changed

3 files changed

+95
-3
lines changed

clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -880,7 +880,7 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
880880
if (ty.UseExcessPrecision(cgf.getContext())) {
881881
if (ty->getAs<VectorType>()) {
882882
assert(!cir::MissingFeatures::vectorType());
883-
cgf.cgm.errorNYI("promotion to vector type");
883+
cgf.cgm.errorNYI("getPromotionType: promotion to vector type");
884884
return QualType();
885885
}
886886
return cgf.getContext().FloatTy;
@@ -1095,8 +1095,20 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
10951095

10961096
mlir::Value VisitBinLOr(const clang::BinaryOperator *e) {
10971097
if (e->getType()->isVectorType()) {
1098-
assert(!cir::MissingFeatures::vectorType());
1099-
return {};
1098+
mlir::Location loc = cgf.getLoc(e->getExprLoc());
1099+
auto vecTy = mlir::cast<cir::VectorType>(cgf.convertType(e->getType()));
1100+
mlir::Value zeroValue = builder.getNullValue(vecTy.getElementType(), loc);
1101+
SmallVector<mlir::Value, 16> elements(vecTy.getSize(), zeroValue);
1102+
auto zeroVec = cir::VecCreateOp::create(builder, loc, vecTy, elements);
1103+
1104+
mlir::Value lhs = Visit(e->getLHS());
1105+
mlir::Value rhs = Visit(e->getRHS());
1106+
1107+
auto cmpOpKind = cir::CmpOpKind::ne;
1108+
lhs = cir::VecCmpOp::create(builder, loc, vecTy, cmpOpKind, lhs, zeroVec);
1109+
rhs = cir::VecCmpOp::create(builder, loc, vecTy, cmpOpKind, rhs, zeroVec);
1110+
mlir::Value vecOr = builder.createOr(loc, lhs, rhs);
1111+
return builder.createIntCast(vecOr, vecTy);
11001112
}
11011113

11021114
assert(!cir::MissingFeatures::instrumentation());

clang/test/CIR/CodeGen/vector-ext.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,3 +1176,43 @@ void foo21() {
11761176

11771177
// OGCG: %[[SIZE:.*]] = alloca i64, align 8
11781178
// OGCG: store i64 4, ptr %[[SIZE]], align 8
1179+
1180+
void foo22() {
1181+
vi4 a;
1182+
vi4 b;
1183+
vi4 c = a || b;
1184+
}
1185+
1186+
// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
1187+
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b"]
1188+
// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["c", init]
1189+
// CIR: %[[ZERO_VEC:.*]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<4 x !s32i>
1190+
// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
1191+
// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
1192+
// CIR: %[[NE_A_ZERO:.*]] = cir.vec.cmp(ne, %[[TMP_A]], %[[ZERO_VEC]]) : !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>
1193+
// CIR: %[[NE_B_ZERO:.*]] = cir.vec.cmp(ne, %[[TMP_B]], %[[ZERO_VEC]]) : !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>
1194+
// CIR: %[[RESULT:.*]] = cir.binop(or, %[[NE_A_ZERO]], %[[NE_B_ZERO]]) : !cir.vector<4 x !s32i>
1195+
// CIR: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
1196+
1197+
// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
1198+
// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
1199+
// LLVM: %[[C_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
1200+
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
1201+
// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
1202+
// LLVM: %[[NE_A_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_A]], zeroinitializer
1203+
// LLVM: %[[NE_A_ZERO_SEXT:.*]] = sext <4 x i1> %[[NE_A_ZERO]] to <4 x i32>
1204+
// LLVM: %[[NE_B_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_B]], zeroinitializer
1205+
// LLVM: %[[NE_B_ZERO_SEXT:.*]] = sext <4 x i1> %[[NE_B_ZERO]] to <4 x i32>
1206+
// LLVM: %[[RESULT:.*]] = or <4 x i32> %[[NE_A_ZERO_SEXT]], %[[NE_B_ZERO_SEXT]]
1207+
// LLVM: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16
1208+
1209+
// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
1210+
// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16
1211+
// OGCG: %[[C_ADDR:.*]] = alloca <4 x i32>, align 16
1212+
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
1213+
// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
1214+
// OGCG: %[[NE_A_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_A]], zeroinitializer
1215+
// OGCG: %[[NE_B_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_B]], zeroinitializer
1216+
// OGCG: %[[VEC_OR:.*]] = or <4 x i1> %[[NE_A_ZERO]], %[[NE_B_ZERO]]
1217+
// OGCG: %[[RESULT:.*]] = sext <4 x i1> %[[VEC_OR]] to <4 x i32>
1218+
// OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16

clang/test/CIR/CodeGen/vector.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,3 +1218,43 @@ void foo24() {
12181218

12191219
// OGCG: %[[SIZE:.*]] = alloca i64, align 8
12201220
// OGCG: store i64 4, ptr %[[SIZE]], align 8
1221+
1222+
void foo25() {
1223+
vi4 a;
1224+
vi4 b;
1225+
vi4 c = a || b;
1226+
}
1227+
1228+
// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
1229+
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b"]
1230+
// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["c", init]
1231+
// CIR: %[[ZERO_VEC:.*]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<4 x !s32i>
1232+
// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
1233+
// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
1234+
// CIR: %[[NE_A_ZERO:.*]] = cir.vec.cmp(ne, %[[TMP_A]], %[[ZERO_VEC]]) : !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>
1235+
// CIR: %[[NE_B_ZERO:.*]] = cir.vec.cmp(ne, %[[TMP_B]], %[[ZERO_VEC]]) : !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>
1236+
// CIR: %[[RESULT:.*]] = cir.binop(or, %[[NE_A_ZERO]], %[[NE_B_ZERO]]) : !cir.vector<4 x !s32i>
1237+
// CIR: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
1238+
1239+
// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
1240+
// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
1241+
// LLVM: %[[C_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
1242+
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
1243+
// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
1244+
// LLVM: %[[NE_A_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_A]], zeroinitializer
1245+
// LLVM: %[[NE_A_ZERO_SEXT:.*]] = sext <4 x i1> %[[NE_A_ZERO]] to <4 x i32>
1246+
// LLVM: %[[NE_B_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_B]], zeroinitializer
1247+
// LLVM: %[[NE_B_ZERO_SEXT:.*]] = sext <4 x i1> %[[NE_B_ZERO]] to <4 x i32>
1248+
// LLVM: %[[RESULT:.*]] = or <4 x i32> %[[NE_A_ZERO_SEXT]], %[[NE_B_ZERO_SEXT]]
1249+
// LLVM: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16
1250+
1251+
// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
1252+
// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16
1253+
// OGCG: %[[C_ADDR:.*]] = alloca <4 x i32>, align 16
1254+
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
1255+
// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
1256+
// OGCG: %[[NE_A_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_A]], zeroinitializer
1257+
// OGCG: %[[NE_B_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_B]], zeroinitializer
1258+
// OGCG: %[[VEC_OR:.*]] = or <4 x i1> %[[NE_A_ZERO]], %[[NE_B_ZERO]]
1259+
// OGCG: %[[RESULT:.*]] = sext <4 x i1> %[[VEC_OR]] to <4 x i32>
1260+
// OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16

0 commit comments

Comments
 (0)