[CIR] Implement __builtin_ia32_cmpnltps/cmpnltpd (#1908)

badumbatish · web-flow · commit 6ce3969b71b9 · 2025-09-25T09:59:01.000-07:00
This PR adds supports for __builtin_ia32_cmpnltps/cmpnltpd. Depends on #1893.
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1740,7 +1740,8 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
     llvm_unreachable("cmpneqps NYI");
   case X86::BI__builtin_ia32_cmpnltps:
   case X86::BI__builtin_ia32_cmpnltpd:
-    llvm_unreachable("cmpnltps NYI");
+    return getVectorFCmpIR(cir::CmpOpKind::lt, /*shouldInvert=*/true,
+                           /*isSignaling=*/true);
   case X86::BI__builtin_ia32_cmpnleps:
   case X86::BI__builtin_ia32_cmpnlepd:
     return getVectorFCmpIR(cir::CmpOpKind::le, /*shouldInvert=*/true,
diff --git a/clang/test/CIR/CodeGen/builtin-fcmp-sse.c b/clang/test/CIR/CodeGen/builtin-fcmp-sse.c
@@ -1,12 +1,11 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-cir %s -o -  | FileCheck %s --check-prefix=CIR
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -emit-llvm %s -o - | FileCheck %s -check-prefix=OG
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -emit-llvm %s -o - | FileCheck %s -check-prefix=OGCG
 
 typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
 typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
 
 __m128 test_cmpnleps(__m128 A, __m128 B) {
-
   // CIR-LABEL: @test_cmpnleps
   // CIR: [[CMP:%.*]] = cir.vec.cmp(le, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4>
   // CIR: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
@@ -21,17 +20,16 @@ __m128 test_cmpnleps(__m128 A, __m128 B) {
   // LLVM-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
   // LLVM-NEXT: ret <4 x float> [[CAST]]
 
-  // OG-LABEL: test_cmpnleps
-  // OG: [[CMP:%.*]] = fcmp ugt <4 x float> {{.*}}, {{.*}}
-  // OG-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
-  // OG-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
-  // OG-NEXT: ret <4 x float> [[CAST]]
+  // OGCG-LABEL: test_cmpnleps
+  // OGCG: [[CMP:%.*]] = fcmp ugt <4 x float> {{.*}}, {{.*}}
+  // OGCG-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // OGCG-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // OGCG-NEXT: ret <4 x float> [[CAST]]
   return __builtin_ia32_cmpnleps(A, B);
 }
 
 
 __m128d test_cmpnlepd(__m128d A, __m128d B) {
-
   // CIR-LABEL: @test_cmpnlepd
   // CIR: [[CMP:%.*]] = cir.vec.cmp(le, [[A:%.*]], [[B:%.*]]) :  !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
   // CIR-NEXT: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>
@@ -46,10 +44,59 @@ __m128d test_cmpnlepd(__m128d A, __m128d B) {
   // LLVM-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
   // LLVM-NEXT: ret <2 x double> [[CAST]]
 
-  // OG-LABEL: test_cmpnlepd
-  // OG: [[CMP:%.*]] = fcmp ugt <2 x double> {{.*}}, {{.*}}
-  // OG-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
-  // OG-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
-  // OG-NEXT: ret <2 x double> [[CAST]]
+  // OGCG-LABEL: test_cmpnlepd
+  // OGCG: [[CMP:%.*]] = fcmp ugt <2 x double> {{.*}}, {{.*}}
+  // OGCG-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // OGCG-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // OGCG-NEXT: ret <2 x double> [[CAST]]
  return  __builtin_ia32_cmpnlepd(A, B);
 }
+
+
+__m128 test_cmpnltps(__m128 A, __m128 B) {
+  // CIR-LABEL: @test_cmpnltps
+  // CIR: [[CMP:%.*]] = cir.vec.cmp(lt, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4>
+  // CIR: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: [[CAST:%.*]] = cir.cast(bitcast, [[NOTCMP:%.*]] : !cir.vector<!s32i x 4>), !cir.vector<!cir.float x 4>
+  // CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] :  !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>
+  // CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
+  // CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: test_cmpnltps
+  // LLVM: [[CMP:%.*]] = fcmp uge <4 x float> {{.*}}, {{.*}}
+  // LLVM-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // LLVM-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // LLVM-NEXT: ret <4 x float> [[CAST]]
+
+  // OGCG-LABEL: test_cmpnltps
+  // OGCG: [[CMP:%.*]] = fcmp uge <4 x float> {{.*}}, {{.*}}
+  // OGCG-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // OGCG-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // OGCG-NEXT: ret <4 x float> [[CAST]]
+  return __builtin_ia32_cmpnltps(A, B);
+}
+
+
+__m128d test_cmpnltpd(__m128d A, __m128d B) {
+  // CIR-LABEL: @test_cmpnltpd
+  // CIR: [[CMP:%.*]] = cir.vec.cmp(lt, [[A:%.*]], [[B:%.*]]) :  !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  // CIR-NEXT: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>
+  // CIR-NEXT: [[CAST:%.*]] = cir.cast(bitcast, [[NOTCMP]] :  !cir.vector<!s64i x 2>), !cir.vector<!cir.double x 2>
+  // CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>
+  // CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] :
+  // CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.double x 2>
+
+  // LLVM-LABEL: test_cmpnltpd
+  // LLVM: [[CMP:%.*]] = fcmp uge <2 x double> {{.*}}, {{.*}}
+  // LLVM-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // LLVM-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // LLVM-NEXT: ret <2 x double> [[CAST]]
+
+  // OGCG-LABEL: test_cmpnltpd
+  // OGCG: [[CMP:%.*]] = fcmp uge <2 x double> {{.*}}, {{.*}}
+  // OGCG-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // OGCG-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // OGCG-NEXT: ret <2 x double> [[CAST]]
+ return  __builtin_ia32_cmpnltpd(A, B);
+}
+