Skip to content

Commit b28d2ea

Browse files
authored
[CIR] Implement Logical AND for VectorType (#158696)
This change adds support for local AND op for VectorType Issue #136487
1 parent e229857 commit b28d2ea

File tree

3 files changed

+94
-2
lines changed

3 files changed

+94
-2
lines changed

clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,8 +1063,20 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
10631063

10641064
mlir::Value VisitBinLAnd(const clang::BinaryOperator *e) {
10651065
if (e->getType()->isVectorType()) {
1066-
assert(!cir::MissingFeatures::vectorType());
1067-
return {};
1066+
mlir::Location loc = cgf.getLoc(e->getExprLoc());
1067+
auto vecTy = mlir::cast<cir::VectorType>(cgf.convertType(e->getType()));
1068+
mlir::Value zeroValue = builder.getNullValue(vecTy.getElementType(), loc);
1069+
SmallVector<mlir::Value, 16> elements(vecTy.getSize(), zeroValue);
1070+
auto zeroVec = cir::VecCreateOp::create(builder, loc, vecTy, elements);
1071+
1072+
mlir::Value lhs = Visit(e->getLHS());
1073+
mlir::Value rhs = Visit(e->getRHS());
1074+
1075+
auto cmpOpKind = cir::CmpOpKind::ne;
1076+
lhs = cir::VecCmpOp::create(builder, loc, vecTy, cmpOpKind, lhs, zeroVec);
1077+
rhs = cir::VecCmpOp::create(builder, loc, vecTy, cmpOpKind, rhs, zeroVec);
1078+
mlir::Value vecOr = builder.createAnd(loc, lhs, rhs);
1079+
return builder.createIntCast(vecOr, vecTy);
10681080
}
10691081

10701082
assert(!cir::MissingFeatures::instrumentation());

clang/test/CIR/CodeGen/vector-ext.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1216,3 +1216,43 @@ void foo22() {
12161216
// OGCG: %[[VEC_OR:.*]] = or <4 x i1> %[[NE_A_ZERO]], %[[NE_B_ZERO]]
12171217
// OGCG: %[[RESULT:.*]] = sext <4 x i1> %[[VEC_OR]] to <4 x i32>
12181218
// OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16
1219+
1220+
void foo23() {
1221+
vi4 a;
1222+
vi4 b;
1223+
vi4 c = a && b;
1224+
}
1225+
1226+
// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
1227+
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b"]
1228+
// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["c", init]
1229+
// CIR: %[[ZERO_VEC:.*]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<4 x !s32i>
1230+
// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
1231+
// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
1232+
// CIR: %[[NE_A_ZERO:.*]] = cir.vec.cmp(ne, %[[TMP_A]], %[[ZERO_VEC]]) : !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>
1233+
// CIR: %[[NE_B_ZERO:.*]] = cir.vec.cmp(ne, %[[TMP_B]], %[[ZERO_VEC]]) : !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>
1234+
// CIR: %[[RESULT:.*]] = cir.binop(and, %[[NE_A_ZERO]], %[[NE_B_ZERO]]) : !cir.vector<4 x !s32i>
1235+
// CIR: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
1236+
1237+
// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
1238+
// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
1239+
// LLVM: %[[C_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
1240+
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
1241+
// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
1242+
// LLVM: %[[NE_A_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_A]], zeroinitializer
1243+
// LLVM: %[[NE_A_ZERO_SEXT:.*]] = sext <4 x i1> %[[NE_A_ZERO]] to <4 x i32>
1244+
// LLVM: %[[NE_B_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_B]], zeroinitializer
1245+
// LLVM: %[[NE_B_ZERO_SEXT:.*]] = sext <4 x i1> %[[NE_B_ZERO]] to <4 x i32>
1246+
// LLVM: %[[RESULT:.*]] = and <4 x i32> %[[NE_A_ZERO_SEXT]], %[[NE_B_ZERO_SEXT]]
1247+
// LLVM: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16
1248+
1249+
// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
1250+
// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16
1251+
// OGCG: %[[C_ADDR:.*]] = alloca <4 x i32>, align 16
1252+
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
1253+
// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
1254+
// OGCG: %[[NE_A_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_A]], zeroinitializer
1255+
// OGCG: %[[NE_B_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_B]], zeroinitializer
1256+
// OGCG: %[[VEC_OR:.*]] = and <4 x i1> %[[NE_A_ZERO]], %[[NE_B_ZERO]]
1257+
// OGCG: %[[RESULT:.*]] = sext <4 x i1> %[[VEC_OR]] to <4 x i32>
1258+
// OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16

clang/test/CIR/CodeGen/vector.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,3 +1258,43 @@ void foo25() {
12581258
// OGCG: %[[VEC_OR:.*]] = or <4 x i1> %[[NE_A_ZERO]], %[[NE_B_ZERO]]
12591259
// OGCG: %[[RESULT:.*]] = sext <4 x i1> %[[VEC_OR]] to <4 x i32>
12601260
// OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16
1261+
1262+
void foo26() {
1263+
vi4 a;
1264+
vi4 b;
1265+
vi4 c = a && b;
1266+
}
1267+
1268+
// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
1269+
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b"]
1270+
// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["c", init]
1271+
// CIR: %[[ZERO_VEC:.*]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<4 x !s32i>
1272+
// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
1273+
// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
1274+
// CIR: %[[NE_A_ZERO:.*]] = cir.vec.cmp(ne, %[[TMP_A]], %[[ZERO_VEC]]) : !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>
1275+
// CIR: %[[NE_B_ZERO:.*]] = cir.vec.cmp(ne, %[[TMP_B]], %[[ZERO_VEC]]) : !cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>
1276+
// CIR: %[[RESULT:.*]] = cir.binop(and, %[[NE_A_ZERO]], %[[NE_B_ZERO]]) : !cir.vector<4 x !s32i>
1277+
// CIR: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
1278+
1279+
// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
1280+
// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
1281+
// LLVM: %[[C_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
1282+
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
1283+
// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
1284+
// LLVM: %[[NE_A_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_A]], zeroinitializer
1285+
// LLVM: %[[NE_A_ZERO_SEXT:.*]] = sext <4 x i1> %[[NE_A_ZERO]] to <4 x i32>
1286+
// LLVM: %[[NE_B_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_B]], zeroinitializer
1287+
// LLVM: %[[NE_B_ZERO_SEXT:.*]] = sext <4 x i1> %[[NE_B_ZERO]] to <4 x i32>
1288+
// LLVM: %[[RESULT:.*]] = and <4 x i32> %[[NE_A_ZERO_SEXT]], %[[NE_B_ZERO_SEXT]]
1289+
// LLVM: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16
1290+
1291+
// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
1292+
// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16
1293+
// OGCG: %[[C_ADDR:.*]] = alloca <4 x i32>, align 16
1294+
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
1295+
// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
1296+
// OGCG: %[[NE_A_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_A]], zeroinitializer
1297+
// OGCG: %[[NE_B_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_B]], zeroinitializer
1298+
// OGCG: %[[VEC_OR:.*]] = and <4 x i1> %[[NE_A_ZERO]], %[[NE_B_ZERO]]
1299+
// OGCG: %[[RESULT:.*]] = sext <4 x i1> %[[VEC_OR]] to <4 x i32>
1300+
// OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16

0 commit comments

Comments
 (0)