llvm · AmrDeveloper · Sep 20, 2025 · Sep 15, 2025 · Sep 20, 2025
@@ -878,10 +878,9 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
     }
 
     if (ty.UseExcessPrecision(cgf.getContext())) {
-      if (ty->getAs<VectorType>()) {
-        assert(!cir::MissingFeatures::vectorType());
-        cgf.cgm.errorNYI("getPromotionType: promotion to vector type");
-        return QualType();
+      if (auto *vt = ty->getAs<VectorType>()) {
+        unsigned numElements = vt->getNumElements();
+        return ctx.getVectorType(ctx.FloatTy, numElements, vt->getVectorKind());
       }
       return cgf.getContext().FloatTy;
     }
@@ -2356,4 +2355,4 @@ mlir::Value CIRGenFunction::emitScalarPrePostIncDec(const UnaryOperator *e,
                                                     bool isPre) {
   return ScalarExprEmitter(*this, builder)
       .emitScalarPrePostIncDec(e, lv, kind, isPre);
-}
+}
diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp
@@ -13,6 +13,7 @@ typedef int vi3 __attribute__((ext_vector_type(3)));
 typedef int vi2 __attribute__((ext_vector_type(2)));
 typedef float vf4 __attribute__((ext_vector_type(4)));
 typedef double vd2 __attribute__((ext_vector_type(2)));
+typedef _Float16 vh4 __attribute__((ext_vector_type(4)));
 
 vi4 vec_a;
 // CIR: cir.global external @[[VEC_A:.*]] = #cir.zero : !cir.vector<4 x !s32i>
@@ -1217,6 +1218,45 @@ void foo22() {
 // OGCG: %[[RESULT:.*]] = sext <4 x i1> %[[VEC_OR]] to <4 x i32>
 // OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16
 
+void foo24() {
+  vh4 a;
+  vh4 b;
+  vh4 c = a + b;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["b"]
+// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["c", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !cir.f16>>, !cir.vector<4 x !cir.f16>
+// CIR: %[[TMP_A_F16:.*]] = cir.cast(floating, %[[TMP_A]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float>
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !cir.f16>>, !cir.vector<4 x !cir.f16>
+// CIR: %[[TMP_B_F16:.*]] = cir.cast(floating, %[[TMP_B]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float>
+// CIR: %[[RESULT:.*]] = cir.binop(add, %[[TMP_A_F16]], %[[TMP_B_F16]]) : !cir.vector<4 x !cir.float>
+// CIR: %[[RESULT_VF16:.*]] = cir.cast(floating, %[[RESULT]] : !cir.vector<4 x !cir.float>), !cir.vector<4 x !cir.f16>
+// CIR: cir.store{{.*}} %[[RESULT_VF16]], %[[C_ADDR]] : !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
+// LLVM: %[[B_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
+// LLVM: %[[C_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
+// LLVM: %[[TMP_A:.*]] = load <4 x half>, ptr %[[A_ADDR]], align 8
+// LLVM: %[[TMP_A_F16:.*]] = fpext <4 x half> %[[TMP_A]] to <4 x float>
+// LLVM: %[[TMP_B:.*]] = load <4 x half>, ptr %[[B_ADDR]], align 8
+// LLVM: %[[TMP_B_F16:.*]] = fpext <4 x half> %[[TMP_B]] to <4 x float>
+// LLVM: %[[RESULT:.*]] = fadd <4 x float> %[[TMP_A_F16]], %[[TMP_B_F16]]
+// LLVM: %[[RESULT_VF16:.*]] = fptrunc <4 x float> %[[RESULT]] to <4 x half>
+// LLVM: store <4 x half> %[[RESULT_VF16]], ptr %[[C_ADDR]], align 8
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x half>, align 8
+// OGCG: %[[B_ADDR:.*]] = alloca <4 x half>, align 8
+// OGCG: %[[C_ADDR:.*]] = alloca <4 x half>, align 8
+// OGCG: %[[TMP_A:.*]] = load <4 x half>, ptr %[[A_ADDR]], align 8
+// OGCG: %[[TMP_A_F16:.*]] = fpext <4 x half> %[[TMP_A]] to <4 x float>
+// OGCG: %[[TMP_B:.*]] = load <4 x half>, ptr %[[B_ADDR]], align 8
+// OGCG: %[[TMP_B_F16:.*]] = fpext <4 x half> %[[TMP_B]] to <4 x float>
+// OGCG: %[[RESULT:.*]] = fadd <4 x float> %[[TMP_A_F16]], %[[TMP_B_F16]]
+// OGCG: %[[RESULT_VF16:.*]] = fptrunc <4 x float> %[[RESULT]] to <4 x half>
+// OGCG: store <4 x half> %[[RESULT_VF16]], ptr %[[C_ADDR]], align 8
+
 void foo23() {
   vi4 a;
   vi4 b;

diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp
@@ -12,6 +12,7 @@ typedef unsigned int uvi4 __attribute__((vector_size(16)));
 typedef float vf4 __attribute__((vector_size(16)));
 typedef double vd2 __attribute__((vector_size(16)));
 typedef long long vll2 __attribute__((vector_size(16)));
+typedef _Float16 vh4 __attribute__((vector_size(8)));
 
 vi4 vec_a;
 // CIR: cir.global external @[[VEC_A:.*]] = #cir.zero : !cir.vector<4 x !s32i>
@@ -1259,6 +1260,45 @@ void foo25() {
 // OGCG: %[[RESULT:.*]] = sext <4 x i1> %[[VEC_OR]] to <4 x i32>
 // OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16
 
+void foo27() {
+  vh4 a;
+  vh4 b;
+  vh4 c = a + b;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["b"]
+// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>, ["c", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !cir.f16>>, !cir.vector<4 x !cir.f16>
+// CIR: %[[TMP_A_F16:.*]] = cir.cast(floating, %[[TMP_A]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float>
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<4 x !cir.f16>>, !cir.vector<4 x !cir.f16>
+// CIR: %[[TMP_B_F16:.*]] = cir.cast(floating, %[[TMP_B]] : !cir.vector<4 x !cir.f16>), !cir.vector<4 x !cir.float>
+// CIR: %[[RESULT:.*]] = cir.binop(add, %[[TMP_A_F16]], %[[TMP_B_F16]]) : !cir.vector<4 x !cir.float>
+// CIR: %[[RESULT_VF16:.*]] = cir.cast(floating, %[[RESULT]] : !cir.vector<4 x !cir.float>), !cir.vector<4 x !cir.f16>
+// CIR: cir.store{{.*}} %[[RESULT_VF16]], %[[C_ADDR]] : !cir.vector<4 x !cir.f16>, !cir.ptr<!cir.vector<4 x !cir.f16>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
+// LLVM: %[[B_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
+// LLVM: %[[C_ADDR:.*]] = alloca <4 x half>, i64 1, align 8
+// LLVM: %[[TMP_A:.*]] = load <4 x half>, ptr %[[A_ADDR]], align 8
+// LLVM: %[[TMP_A_F16:.*]] = fpext <4 x half> %[[TMP_A]] to <4 x float>
+// LLVM: %[[TMP_B:.*]] = load <4 x half>, ptr %[[B_ADDR]], align 8
+// LLVM: %[[TMP_B_F16:.*]] = fpext <4 x half> %[[TMP_B]] to <4 x float>
+// LLVM: %[[RESULT:.*]] = fadd <4 x float> %[[TMP_A_F16]], %[[TMP_B_F16]]
+// LLVM: %[[RESULT_VF16:.*]] = fptrunc <4 x float> %[[RESULT]] to <4 x half>
+// LLVM: store <4 x half> %[[RESULT_VF16]], ptr %[[C_ADDR]], align 8
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x half>, align 8
+// OGCG: %[[B_ADDR:.*]] = alloca <4 x half>, align 8
+// OGCG: %[[C_ADDR:.*]] = alloca <4 x half>, align 8
+// OGCG: %[[TMP_A:.*]] = load <4 x half>, ptr %[[A_ADDR]], align 8
+// OGCG: %[[TMP_A_F16:.*]] = fpext <4 x half> %[[TMP_A]] to <4 x float>
+// OGCG: %[[TMP_B:.*]] = load <4 x half>, ptr %[[B_ADDR]], align 8
+// OGCG: %[[TMP_B_F16:.*]] = fpext <4 x half> %[[TMP_B]] to <4 x float>
+// OGCG: %[[RESULT:.*]] = fadd <4 x float> %[[TMP_A_F16]], %[[TMP_B_F16]]
+// OGCG: %[[RESULT_VF16:.*]] = fptrunc <4 x float> %[[RESULT]] to <4 x half>
+// OGCG: store <4 x half> %[[RESULT_VF16]], ptr %[[C_ADDR]], align 8
+
 void foo26() {
   vi4 a;
   vi4 b;
@@ -1297,4 +1337,4 @@ void foo26() {
 // OGCG: %[[NE_B_ZERO:.*]] = icmp ne <4 x i32> %[[TMP_B]], zeroinitializer
 // OGCG: %[[VEC_OR:.*]] = and <4 x i1> %[[NE_A_ZERO]], %[[NE_B_ZERO]]
 // OGCG: %[[RESULT:.*]] = sext <4 x i1> %[[VEC_OR]] to <4 x i32>
-// OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16
+// OGCG: store <4 x i32> %[[RESULT]], ptr %[[C_ADDR]], align 16