@@ -269,6 +269,40 @@ static mlir::Value emitX86FunnelShift(CIRGenBuilderTy &builder,
269269 mlir::ValueRange{op0, op1, amt});
270270}
271271
272+ static mlir::Value emitX86Muldq (CIRGenBuilderTy &builder, mlir::Location loc,
273+ bool isSigned,
274+ SmallVectorImpl<mlir::Value> &ops,
275+ unsigned opTypePrimitiveSizeInBits) {
276+ mlir::Type ty = cir::VectorType::get (builder.getSInt64Ty (),
277+ opTypePrimitiveSizeInBits / 64 );
278+ mlir::Value lhs = builder.createBitcast (loc, ops[0 ], ty);
279+ mlir::Value rhs = builder.createBitcast (loc, ops[1 ], ty);
280+ if (isSigned) {
281+ cir::ConstantOp shiftAmt =
282+ builder.getConstant (loc, cir::IntAttr::get (builder.getSInt64Ty (), 32 ));
283+ cir::VecSplatOp shiftSplatVecOp =
284+ cir::VecSplatOp::create (builder, loc, ty, shiftAmt.getResult ());
285+ mlir::Value shiftSplatValue = shiftSplatVecOp.getResult ();
286+ // In CIR, right-shift operations are automatically lowered to either an
287+ // arithmetic or logical shift depending on the operand type. The purpose
288+ // of the shifts here is to propagate the sign bit of the 32-bit input
289+ // into the upper bits of each vector lane.
290+ lhs = builder.createShift (loc, lhs, shiftSplatValue, true );
291+ lhs = builder.createShift (loc, lhs, shiftSplatValue, false );
292+ rhs = builder.createShift (loc, rhs, shiftSplatValue, true );
293+ rhs = builder.createShift (loc, rhs, shiftSplatValue, false );
294+ } else {
295+ cir::ConstantOp maskScalar = builder.getConstant (
296+ loc, cir::IntAttr::get (builder.getSInt64Ty (), 0xffffffff ));
297+ cir::VecSplatOp mask =
298+ cir::VecSplatOp::create (builder, loc, ty, maskScalar.getResult ());
299+ // Clear the upper bits
300+ lhs = builder.createAnd (loc, lhs, mask);
301+ rhs = builder.createAnd (loc, rhs, mask);
302+ }
303+ return builder.createMul (loc, lhs, rhs);
304+ }
305+
272306mlir::Value CIRGenFunction::emitX86BuiltinExpr (unsigned builtinID,
273307 const CallExpr *expr) {
274308 if (builtinID == Builtin::BI__builtin_cpu_is) {
@@ -1212,12 +1246,26 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
12121246 case X86::BI__builtin_ia32_sqrtph512:
12131247 case X86::BI__builtin_ia32_sqrtps512:
12141248 case X86::BI__builtin_ia32_sqrtpd512:
1249+ cgm.errorNYI (expr->getSourceRange (),
1250+ std::string (" unimplemented X86 builtin call: " ) +
1251+ getContext ().BuiltinInfo .getName (builtinID));
1252+ return {};
12151253 case X86::BI__builtin_ia32_pmuludq128:
12161254 case X86::BI__builtin_ia32_pmuludq256:
1217- case X86::BI__builtin_ia32_pmuludq512:
1255+ case X86::BI__builtin_ia32_pmuludq512: {
1256+ unsigned opTypePrimitiveSizeInBits =
1257+ cgm.getDataLayout ().getTypeSizeInBits (ops[0 ].getType ());
1258+ return emitX86Muldq (builder, getLoc (expr->getExprLoc ()), /* isSigned*/ false ,
1259+ ops, opTypePrimitiveSizeInBits);
1260+ }
12181261 case X86::BI__builtin_ia32_pmuldq128:
12191262 case X86::BI__builtin_ia32_pmuldq256:
1220- case X86::BI__builtin_ia32_pmuldq512:
1263+ case X86::BI__builtin_ia32_pmuldq512: {
1264+ unsigned opTypePrimitiveSizeInBits =
1265+ cgm.getDataLayout ().getTypeSizeInBits (ops[0 ].getType ());
1266+ return emitX86Muldq (builder, getLoc (expr->getExprLoc ()), /* isSigned*/ true ,
1267+ ops, opTypePrimitiveSizeInBits);
1268+ }
12211269 case X86::BI__builtin_ia32_pternlogd512_mask:
12221270 case X86::BI__builtin_ia32_pternlogq512_mask:
12231271 case X86::BI__builtin_ia32_pternlogd128_mask:
0 commit comments