cast to unsigned properly

moar55 · moar55 · commit e556764cc428 · 2025-11-25T23:46:44.000+01:00
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -95,27 +95,35 @@ static mlir::Value getMaskVecValue(CIRGenFunction &cgf, const CallExpr *expr,
 static mlir::Value emitX86FunnelShift(CIRGenFunction &cgf, const CallExpr *e,
                                       mlir::Value &op0, mlir::Value &op1,
                                       mlir::Value &amt, bool isRight) {
-  auto ty = op0.getType();
+  auto &builder = cgf.getBuilder();
+  auto op0Ty = op0.getType();
 
   // Amount may be scalar immediate, in which case create a splat vector.
   // Funnel shifts amounts are treated as modulo and types are all power-of-2
   // so we only care about the lowest log2 bits anyway.
-  if (amt.getType() != ty) {
-    auto vecTy = mlir::cast<cir::VectorType>(ty);
-
+  if (amt.getType() != op0Ty) {
+    auto vecTy = mlir::cast<cir::VectorType>(op0Ty);
     auto numElems = vecTy.getSize();
-    auto vecElemType = mlir::cast<cir::IntType>(vecTy.getElementType());
-    auto signlessType =
-        cir::IntType::get(&cgf.getMLIRContext(), vecElemType.getWidth(), false);
-    amt = cgf.getBuilder().createIntCast(amt, signlessType);
 
-    amt = cir::VecSplatOp::create(cgf.getBuilder(), cgf.getLoc(e->getExprLoc()),
-                                  cir::VectorType::get(signlessType, numElems),
-                                  amt);
+    auto amtTy = mlir::cast<cir::IntType>(amt.getType());
+    auto vecElemTy = mlir::cast<cir::IntType>(vecTy.getElementType());
+
+    // Cast to same width unsigned if not already unsigned.
+    if (amtTy.isSigned()) {
+      auto unsignedAmtTy = builder.getUIntNTy(amtTy.getWidth());
+      amt = builder.createIntCast(amt,
+                                  builder.getUIntNTy(unsignedAmtTy.getWidth()));
+    }
+    // Cast the unsigned `amt` to operand element type's width unsigned.
+    auto unsingedVecElemType = builder.getUIntNTy(vecElemTy.getWidth());
+    amt = builder.createIntCast(amt, unsingedVecElemType);
+    amt = cir::VecSplatOp::create(
+        builder, cgf.getLoc(e->getExprLoc()),
+        cir::VectorType::get(unsingedVecElemType, numElems), amt);
   }
 
   const std::string intrinsicName = isRight ? "fshr" : "fshl";
-  return emitIntrinsicCallOp(cgf, e, intrinsicName, ty,
+  return emitIntrinsicCallOp(cgf, e, intrinsicName, op0Ty,
                              mlir::ValueRange{op0, op1, amt});
 }
 
diff --git a/clang/test/CIR/CodeGen/X86/xop-builtins.c b/clang/test/CIR/CodeGen/X86/xop-builtins.c
@@ -43,9 +43,9 @@ __m128i test_mm_roti_epi8(__m128i a) {
 
 __m128i test_mm_roti_epi16(__m128i a) {
   // CIR-LABEL: test_mm_roti_epi16
-  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !{{[us]}}8i -> !u16i
-  // CIR: {{%.*}} = cir.vec.splat {{%.*}} : !{{[us]}}16i, !cir.vector<8 x !{{[us]}}16i> 
-  // CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<8 x !{{[su]}}16i>, !cir.vector<8 x !{{[su]}}16i>, !cir.vector<8 x !{{[su]}}16i>) -> !cir.vector<8 x !{{[su]}}16i> 
+  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CIR: {{%.*}} = cir.vec.splat {{%.*}} : !{{[us]}}16i, !cir.vector<8 x !u16i> 
+  // CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<8 x !{{[su]}}16i>, !cir.vector<8 x !{{[su]}}16i>, !cir.vector<8 x !u16i>) -> !cir.vector<8 x !{{[su]}}16i> 
   // LLVM-LABEL: test_mm_roti_epi16
   // LLVM: %[[CASTED_VAR:.*]] = bitcast <2 x i64> {{%.*}} to <8 x i16>
   // LLVM: {{%.*}} = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %[[CASTED_VAR]], <8 x i16> %[[CASTED_VAR]], <8 x i16> splat (i16 50))
@@ -58,17 +58,23 @@ __m128i test_mm_roti_epi16(__m128i a) {
 //NOTE: This only works as I expect for CIR but not for LLVMIR
 __m128i test_mm_roti_epi32(__m128i a) {
   // CIR-LABEL: test_mm_roti_epi32
-  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !{{[us]}}8i -> !u32i
-  // CIR: {{%.*}} = cir.vec.splat {{%.*}} : !{{[us]}}32i, !cir.vector<4 x !{{[us]}}32i> 
-  // CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<4 x !{{[su]}}32i>, !cir.vector<4 x !{{[su]}}32i>, !cir.vector<4 x !{{[su]}}32i>) -> !cir.vector<4 x !{{[su]}}32i> 
+  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CIR: {{%.*}} = cir.vec.splat {{%.*}} : !{{[us]}}32i, !cir.vector<4 x !u32i> 
+  // CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<4 x !{{[su]}}32i>, !cir.vector<4 x !{{[su]}}32i>, !cir.vector<4 x !u32i>) -> !cir.vector<4 x !{{[su]}}32i> 
+  // LLVM-LABEL: test_mm_roti_epi32
+  // LLVM: %[[CASTED_VAR:.*]] = bitcast <2 x i64> {{%.*}} to <4 x i32>
+  // LLVM: {{%.*}} = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %[[CASTED_VAR]], <4 x i32> %[[CASTED_VAR]], <4 x i32> splat (i32 226))
+  // OGCG-LABEL: test_mm_roti_epi32
+  // OGCG: %[[CASTED_VAR:.*]] = bitcast <2 x i64> {{%.*}} to <4 x i32>
+  // OGCG: {{%.*}} = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %[[CASTED_VAR]], <4 x i32> %[[CASTED_VAR]], <4 x i32> splat (i32 226))
   return _mm_roti_epi32(a, -30);
  }
 
 __m128i test_mm_roti_epi64(__m128i a) {
   // CIR-LABEL: test_mm_roti_epi64
-  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !{{[us]}}8i -> !u64i
-  // CIR: {{%.*}} = cir.vec.splat {{%.*}} : !{{.}}64i, !cir.vector<2 x !{{[us]}}64i> 
-  // CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<2 x !{{[su]}}64i>, !cir.vector<2 x !{{[su]}}64i>, !cir.vector<2 x !u64i>) -> !cir.vector<2 x !{{[su]}}64i> 
+  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CIR: {{%.*}} = cir.vec.splat {{%.*}} : !u64i, !cir.vector<2 x !u64i> 
+  // CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<2 x !{{[su]}}64i>, !cir.vector<2 x !{{[su]}}64i>, !cir.vector<2 x !u64i>) -> !cir.vector<2 x !s64i> 
   // LLVM-LABEL: test_mm_roti_epi64
   // LLVM: %[[VAR:.*]] = load <2 x i64>, ptr {{%.*}}, align 16
   // LLVM: {{%.*}} = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %[[VAR]], <2 x i64> %[[VAR]], <2 x i64> splat (i64 100))