Skip to content

Commit 56a5619

Browse files
authored
[CIR] Implement Type promotion for VectorType (#158715)
This change adds support for type promotion for VectorType Issue #136487
1 parent 6744919 commit 56a5619

File tree

3 files changed

+85
-6
lines changed

3 files changed

+85
-6
lines changed

clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -878,10 +878,9 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
878878
}
879879

880880
if (ty.UseExcessPrecision(cgf.getContext())) {
881-
if (ty->getAs<VectorType>()) {
882-
assert(!cir::MissingFeatures::vectorType());
883-
cgf.cgm.errorNYI("getPromotionType: promotion to vector type");
884-
return QualType();
881+
if (auto *vt = ty->getAs<VectorType>()) {
882+
unsigned numElements = vt->getNumElements();
883+
return ctx.getVectorType(ctx.FloatTy, numElements, vt->getVectorKind());
885884
}
886885
return cgf.getContext().FloatTy;
887886
}
@@ -2356,4 +2355,4 @@ mlir::Value CIRGenFunction::emitScalarPrePostIncDec(const UnaryOperator *e,
23562355
bool isPre) {
23572356
return ScalarExprEmitter(*this, builder)
23582357
.emitScalarPrePostIncDec(e, lv, kind, isPre);
2359-
}
2358+
}

clang/test/CIR/CodeGen/vector-ext.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ typedef int vi3 __attribute__((ext_vector_type(3)));
1313
typedef int vi2 __attribute__((ext_vector_type(2)));
1414
typedef float vf4 __attribute__((ext_vector_type(4)));
1515
typedef double vd2 __attribute__((ext_vector_type(2)));
16+
typedef _Float16 vh4 __attribute__((ext_vector_type(4)));
1617

1718
vi4 vec_a;
1819
// CIR: cir.global external @[[VEC_A:.*]] = #cir.zero : !cir.vector<4 x !s32i>
@@ -1217,6 +1218,45 @@ void foo22() {
12171218
// OGCG: %[[RESULT:.*]] = sext <4 x i1> %[[VEC_OR]] to <4 x i32>
12181219
// OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16
12191220

1221+
void foo24() {
1222+
vh4 a;
1223+
vh4 b;
1224+
vh4 c = a + b;
1225+
}
1226+
1227+
// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["a"]
1228+
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["b"]
1229+
// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["c", init]
1230+
// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !cir.f16>>, !cir.vector<4 x !cir.f16>
1231+
// CIR: %[[TMP_A_F16:.*]] = cir.cast(floating, %[[TMP_A]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float>
1232+
// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !cir.f16>>, !cir.vector<4 x !cir.f16>
1233+
// CIR: %[[TMP_B_F16:.*]] = cir.cast(floating, %[[TMP_B]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float>
1234+
// CIR: %[[RESULT:.*]] = cir.binop(add, %[[TMP_A_F16]], %[[TMP_B_F16]]) : !cir.vector<4 x !cir.float>
1235+
// CIR: %[[RESULT_VF16:.*]] = cir.cast(floating, %[[RESULT]] : !cir.vector<4 x !cir.float>), !cir.vector<4 x !cir.f16>
1236+
// CIR: cir.store{{.*}} %[[RESULT_VF16]], %[[C_ADDR]] : !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>
1237+
1238+
// LLVM: %[[A_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
1239+
// LLVM: %[[B_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
1240+
// LLVM: %[[C_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
1241+
// LLVM: %[[TMP_A:.*]] = load <4 x half>, ptr %[[A_ADDR]], align 8
1242+
// LLVM: %[[TMP_A_F16:.*]] = fpext <4 x half> %[[TMP_A]] to <4 x float>
1243+
// LLVM: %[[TMP_B:.*]] = load <4 x half>, ptr %[[B_ADDR]], align 8
1244+
// LLVM: %[[TMP_B_F16:.*]] = fpext <4 x half> %[[TMP_B]] to <4 x float>
1245+
// LLVM: %[[RESULT:.*]] = fadd <4 x float> %[[TMP_A_F16]], %[[TMP_B_F16]]
1246+
// LLVM: %[[RESULT_VF16:.*]] = fptrunc <4 x float> %[[RESULT]] to <4 x half>
1247+
// LLVM: store <4 x half> %[[RESULT_VF16]], ptr %[[C_ADDR]], align 8
1248+
1249+
// OGCG: %[[A_ADDR:.*]] = alloca <4 x half>, align 8
1250+
// OGCG: %[[B_ADDR:.*]] = alloca <4 x half>, align 8
1251+
// OGCG: %[[C_ADDR:.*]] = alloca <4 x half>, align 8
1252+
// OGCG: %[[TMP_A:.*]] = load <4 x half>, ptr %[[A_ADDR]], align 8
1253+
// OGCG: %[[TMP_A_F16:.*]] = fpext <4 x half> %[[TMP_A]] to <4 x float>
1254+
// OGCG: %[[TMP_B:.*]] = load <4 x half>, ptr %[[B_ADDR]], align 8
1255+
// OGCG: %[[TMP_B_F16:.*]] = fpext <4 x half> %[[TMP_B]] to <4 x float>
1256+
// OGCG: %[[RESULT:.*]] = fadd <4 x float> %[[TMP_A_F16]], %[[TMP_B_F16]]
1257+
// OGCG: %[[RESULT_VF16:.*]] = fptrunc <4 x float> %[[RESULT]] to <4 x half>
1258+
// OGCG: store <4 x half> %[[RESULT_VF16]], ptr %[[C_ADDR]], align 8
1259+
12201260
void foo23() {
12211261
vi4 a;
12221262
vi4 b;

clang/test/CIR/CodeGen/vector.cpp

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ typedef unsigned int uvi4 __attribute__((vector_size(16)));
1212
typedef float vf4 __attribute__((vector_size(16)));
1313
typedef double vd2 __attribute__((vector_size(16)));
1414
typedef long long vll2 __attribute__((vector_size(16)));
15+
typedef _Float16 vh4 __attribute__((vector_size(8)));
1516

1617
vi4 vec_a;
1718
// CIR: cir.global external @[[VEC_A:.*]] = #cir.zero : !cir.vector<4 x !s32i>
@@ -1259,6 +1260,45 @@ void foo25() {
12591260
// OGCG: %[[RESULT:.*]] = sext <4 x i1> %[[VEC_OR]] to <4 x i32>
12601261
// OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16
12611262

1263+
void foo27() {
1264+
vh4 a;
1265+
vh4 b;
1266+
vh4 c = a + b;
1267+
}
1268+
1269+
// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["a"]
1270+
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["b"]
1271+
// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["c", init]
1272+
// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !cir.f16>>, !cir.vector<4 x !cir.f16>
1273+
// CIR: %[[TMP_A_F16:.*]] = cir.cast(floating, %[[TMP_A]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float>
1274+
// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !cir.f16>>, !cir.vector<4 x !cir.f16>
1275+
// CIR: %[[TMP_B_F16:.*]] = cir.cast(floating, %[[TMP_B]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float>
1276+
// CIR: %[[RESULT:.*]] = cir.binop(add, %[[TMP_A_F16]], %[[TMP_B_F16]]) : !cir.vector<4 x !cir.float>
1277+
// CIR: %[[RESULT_VF16:.*]] = cir.cast(floating, %[[RESULT]] : !cir.vector<4 x !cir.float>), !cir.vector<4 x !cir.f16>
1278+
// CIR: cir.store{{.*}} %[[RESULT_VF16]], %[[C_ADDR]] : !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>
1279+
1280+
// LLVM: %[[A_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
1281+
// LLVM: %[[B_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
1282+
// LLVM: %[[C_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
1283+
// LLVM: %[[TMP_A:.*]] = load <4 x half>, ptr %[[A_ADDR]], align 8
1284+
// LLVM: %[[TMP_A_F16:.*]] = fpext <4 x half> %[[TMP_A]] to <4 x float>
1285+
// LLVM: %[[TMP_B:.*]] = load <4 x half>, ptr %[[B_ADDR]], align 8
1286+
// LLVM: %[[TMP_B_F16:.*]] = fpext <4 x half> %[[TMP_B]] to <4 x float>
1287+
// LLVM: %[[RESULT:.*]] = fadd <4 x float> %[[TMP_A_F16]], %[[TMP_B_F16]]
1288+
// LLVM: %[[RESULT_VF16:.*]] = fptrunc <4 x float> %[[RESULT]] to <4 x half>
1289+
// LLVM: store <4 x half> %[[RESULT_VF16]], ptr %[[C_ADDR]], align 8
1290+
1291+
// OGCG: %[[A_ADDR:.*]] = alloca <4 x half>, align 8
1292+
// OGCG: %[[B_ADDR:.*]] = alloca <4 x half>, align 8
1293+
// OGCG: %[[C_ADDR:.*]] = alloca <4 x half>, align 8
1294+
// OGCG: %[[TMP_A:.*]] = load <4 x half>, ptr %[[A_ADDR]], align 8
1295+
// OGCG: %[[TMP_A_F16:.*]] = fpext <4 x half> %[[TMP_A]] to <4 x float>
1296+
// OGCG: %[[TMP_B:.*]] = load <4 x half>, ptr %[[B_ADDR]], align 8
1297+
// OGCG: %[[TMP_B_F16:.*]] = fpext <4 x half> %[[TMP_B]] to <4 x float>
1298+
// OGCG: %[[RESULT:.*]] = fadd <4 x float> %[[TMP_A_F16]], %[[TMP_B_F16]]
1299+
// OGCG: %[[RESULT_VF16:.*]] = fptrunc <4 x float> %[[RESULT]] to <4 x half>
1300+
// OGCG: store <4 x half> %[[RESULT_VF16]], ptr %[[C_ADDR]], align 8
1301+
12621302
void foo26() {
12631303
vi4 a;
12641304
vi4 b;
@@ -1297,4 +1337,4 @@ void foo26() {
12971337
// OGCG: %[[NE_B_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_B]], zeroinitializer
12981338
// OGCG: %[[VEC_OR:.*]] = and <4 x i1> %[[NE_A_ZERO]], %[[NE_B_ZERO]]
12991339
// OGCG: %[[RESULT:.*]] = sext <4 x i1> %[[VEC_OR]] to <4 x i32>
1300-
// OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16
1340+
// OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16

0 commit comments

Comments
 (0)