Skip to content

Commit 41fa386

Browse files
committed
[CIR][X86] Implement lowering for pmuldq / pmuludq builtins
This patch adds CIR codegen support for X86 pmuldq and pmuludq operations, covering the signed and unsigned variants across all supported vector widths. The builtins now lower to the expected CIR representation matching the semantics of the corresponding LLVM intrinsics.
1 parent 036279a commit 41fa386

File tree

5 files changed

+238
-0
lines changed

5 files changed

+238
-0
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,46 @@ static mlir::Value emitX86MaskLogic(CIRGenBuilderTy &builder,
113113
lhs = builder.createNot(lhs);
114114
return builder.createBitcast(builder.createBinop(loc, lhs, binOpKind, rhs),
115115
ops[0].getType());
116+
117+
static mlir::Value emitX86Muldq(CIRGenFunction &cgf, const CallExpr *expr,
118+
bool isSigned,
119+
SmallVectorImpl<mlir::Value> &ops) {
120+
CIRGenBuilderTy &builder = cgf.getBuilder();
121+
mlir::Location loc = cgf.getLoc(expr->getExprLoc());
122+
mlir::Type ty = ops[0].getType();
123+
unsigned tyPrimitiveSizeInBits =
124+
cgf.cgm.getDataLayout().getTypeSizeInBits(ty);
125+
mlir::Value lhs, rhs;
126+
// in cir, if a shiftOperation is shift right,it will be translated into Ashr
127+
// or lShr automatically in match and rewrite stage according to its operand's
128+
// type
129+
if (isSigned) {
130+
ty =
131+
cir::VectorType::get(builder.getSInt64Ty(), tyPrimitiveSizeInBits / 64);
132+
cir::ConstantOp shiftAmt =
133+
builder.getConstant(loc, cir::IntAttr::get(builder.getSInt64Ty(), 32));
134+
cir::VecSplatOp shiftSplatVecOp =
135+
cir::VecSplatOp::create(builder, loc, ty, shiftAmt.getResult());
136+
mlir::Value shiftSplatValue = shiftSplatVecOp.getResult();
137+
lhs = builder.createBitcast(loc, ops[0], ty);
138+
rhs = builder.createBitcast(loc, ops[1], ty);
139+
lhs = builder.createShift(loc, lhs, shiftSplatValue, true);
140+
lhs = builder.createShift(loc, lhs, shiftSplatValue, false);
141+
rhs = builder.createShift(loc, rhs, shiftSplatValue, true);
142+
rhs = builder.createShift(loc, rhs, shiftSplatValue, false);
143+
} else {
144+
ty =
145+
cir::VectorType::get(builder.getSInt64Ty(), tyPrimitiveSizeInBits / 64);
146+
cir::ConstantOp maskScalar = builder.getConstant(
147+
loc, cir::IntAttr::get(builder.getSInt64Ty(), 0xffffffff));
148+
cir::VecSplatOp mask =
149+
cir::VecSplatOp::create(builder, loc, ty, maskScalar.getResult());
150+
lhs = builder.createBitcast(loc, ops[0], ty);
151+
rhs = builder.createBitcast(loc, ops[1], ty);
152+
lhs = builder.createAnd(loc, lhs, mask);
153+
rhs = builder.createAnd(loc, rhs, mask);
154+
}
155+
return builder.createMul(loc, lhs, rhs);
116156
}
117157

118158
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
@@ -851,12 +891,18 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
851891
case X86::BI__builtin_ia32_sqrtph512:
852892
case X86::BI__builtin_ia32_sqrtps512:
853893
case X86::BI__builtin_ia32_sqrtpd512:
894+
cgm.errorNYI(expr->getSourceRange(),
895+
std::string("unimplemented X86 builtin call: ") +
896+
getContext().BuiltinInfo.getName(builtinID));
897+
return {};
854898
case X86::BI__builtin_ia32_pmuludq128:
855899
case X86::BI__builtin_ia32_pmuludq256:
856900
case X86::BI__builtin_ia32_pmuludq512:
901+
return emitX86Muldq(*this, expr, /*IsSigned*/ false, ops);
857902
case X86::BI__builtin_ia32_pmuldq128:
858903
case X86::BI__builtin_ia32_pmuldq256:
859904
case X86::BI__builtin_ia32_pmuldq512:
905+
return emitX86Muldq(*this, expr, /*IsSigned*/ true, ops);
860906
case X86::BI__builtin_ia32_pternlogd512_mask:
861907
case X86::BI__builtin_ia32_pternlogq512_mask:
862908
case X86::BI__builtin_ia32_pternlogd128_mask:
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
4+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
5+
6+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
7+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
8+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
9+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
10+
11+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
12+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx2 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
13+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
14+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx2 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
15+
16+
#include <immintrin.h>
17+
18+
__m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
19+
// CIR-LABEL: _mm256_mul_epu32
20+
// CIR: [[MASK_SCALAR:%.*]] = cir.const #cir.int<4294967295> : !s64i
21+
// CIR: [[MASK_VEC:%.*]] = cir.vec.splat [[MASK_SCALAR]] : !s64i, !cir.vector<4 x !s64i>
22+
// CIR: [[BC_A:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x !s64i>
23+
// CIR: [[BC_B:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x !s64i>
24+
// CIR: [[AND_A:%.*]] = cir.binop(and, [[BC_A]], [[MASK_VEC]])
25+
// CIR: [[AND_B:%.*]] = cir.binop(and, [[BC_B]], [[MASK_VEC]])
26+
// CIR: [[MUL:%.*]] = cir.binop(mul, [[AND_A]], [[AND_B]])
27+
28+
// LLVM-LABEL: _mm256_mul_epu32
29+
// LLVM: and <4 x i64> %{{.*}}, splat (i64 4294967295)
30+
// LLVM: and <4 x i64> %{{.*}}, splat (i64 4294967295)
31+
// LLVM: mul <4 x i64> %{{.*}}, %{{.*}}
32+
33+
// OGCG-LABEL: _mm256_mul_epu32
34+
// OGCG: and <4 x i64> %{{.*}}, splat (i64 4294967295)
35+
// OGCG: and <4 x i64> %{{.*}}, splat (i64 4294967295)
36+
// OGCG: mul <4 x i64> %{{.*}}, %{{.*}}
37+
38+
return _mm256_mul_epu32(a, b);
39+
}
40+
41+
__m256i test_mm256_mul_epi32(__m256i a, __m256i b) {
42+
// CIR-LABEL: _mm256_mul_epi32
43+
// CIR: [[SC:%.*]] = cir.const #cir.int<32> : !s64i
44+
// CIR: [[SV:%.*]] = cir.vec.splat [[SC]] : !s64i, !cir.vector<4 x !s64i>
45+
// CIR: [[A64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x !s64i>
46+
// CIR: [[B64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<4 x !s64i>
47+
// CIR: [[SHL_A:%.*]] = cir.shift(left, [[A64]] : !cir.vector<4 x !s64i>, [[SV]] : !cir.vector<4 x !s64i>)
48+
// CIR: [[ASHR_A:%.*]] = cir.shift(right, [[SHL_A]] : !cir.vector<4 x !s64i>, [[SV]] : !cir.vector<4 x !s64i>)
49+
// CIR: [[SHL_B:%.*]] = cir.shift(left, [[B64]] : !cir.vector<4 x !s64i>, [[SV]] : !cir.vector<4 x !s64i>)
50+
// CIR: [[ASHR_B:%.*]] = cir.shift(right, [[SHL_B]] : !cir.vector<4 x !s64i>, [[SV]] : !cir.vector<4 x !s64i>)
51+
// CIR: [[MUL:%.*]] = cir.binop(mul, [[ASHR_A]], [[ASHR_B]])
52+
53+
// LLVM-LABEL: _mm256_mul_epi32
54+
// LLVM: shl <4 x i64> %{{.*}}, splat (i64 32)
55+
// LLVM: ashr <4 x i64> %{{.*}}, splat (i64 32)
56+
// LLVM: shl <4 x i64> %{{.*}}, splat (i64 32)
57+
// LLVM: ashr <4 x i64> %{{.*}}, splat (i64 32)
58+
// LLVM: mul <4 x i64> %{{.*}}, %{{.*}}
59+
60+
// OGCG-LABEL: _mm256_mul_epi32
61+
// OGCG: shl <4 x i64> %{{.*}}, splat (i64 32)
62+
// OGCG: ashr <4 x i64> %{{.*}}, splat (i64 32)
63+
// OGCG: shl <4 x i64> %{{.*}}, splat (i64 32)
64+
// OGCG: ashr <4 x i64> %{{.*}}, splat (i64 32)
65+
// OGCG: mul <4 x i64> %{{.*}}, %{{.*}}
66+
67+
return _mm256_mul_epi32(a, b);
68+
}

clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c

100644100755
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ __m512i test_mm512_undefined_epi32(void) {
7878
return _mm512_undefined_epi32();
7979
}
8080

81+
<<<<<<< HEAD
8182
__mmask16 test_mm512_kand(__mmask16 A, __mmask16 B) {
8283
// CIR-LABEL: _mm512_kand
8384
// CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
@@ -228,3 +229,58 @@ __mmask16 test_kmov_w(__mmask16 A) {
228229
// OGCG: bitcast <16 x i1> {{.*}} to i16
229230
return __builtin_ia32_kmovw(A);
230231
}
232+
233+
234+
__m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) {
235+
// CIR-LABEL: _mm512_mul_epi32
236+
// CIR: [[SC:%.*]] = cir.const #cir.int<32> : !s64i
237+
// CIR: [[SV:%.*]] = cir.vec.splat [[SC]] : !s64i, !cir.vector<8 x !s64i>
238+
// CIR: [[A64:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s32i> -> !cir.vector<8 x !s64i>
239+
// CIR: [[B64:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s32i> -> !cir.vector<8 x !s64i>
240+
// CIR: [[SHL_A:%.*]] = cir.shift(left, [[A64]] : !cir.vector<8 x !s64i>, [[SV]] : !cir.vector<8 x !s64i>)
241+
// CIR: [[ASHR_A:%.*]] = cir.shift(right, [[SHL_A]] : !cir.vector<8 x !s64i>, [[SV]] : !cir.vector<8 x !s64i>)
242+
// CIR: [[SHL_B:%.*]] = cir.shift(left, [[B64]] : !cir.vector<8 x !s64i>, [[SV]] : !cir.vector<8 x !s64i>)
243+
// CIR: [[ASHR_B:%.*]] = cir.shift(right, [[SHL_B]] : !cir.vector<8 x !s64i>, [[SV]] : !cir.vector<8 x !s64i>)
244+
// CIR: [[MUL:%.*]] = cir.binop(mul, [[ASHR_A]], [[ASHR_B]])
245+
246+
// LLVM-LABEL: _mm512_mul_epi32
247+
// LLVM: shl <8 x i64> %{{.*}}, splat (i64 32)
248+
// LLVM: ashr <8 x i64> %{{.*}}, splat (i64 32)
249+
// LLVM: shl <8 x i64> %{{.*}}, splat (i64 32)
250+
// LLVM: ashr <8 x i64> %{{.*}}, splat (i64 32)
251+
// LLVM: mul <8 x i64> %{{.*}}, %{{.*}}
252+
253+
// OGCG-LABEL: _mm512_mul_epi32
254+
// OGCG: shl <8 x i64> %{{.*}}, splat (i64 32)
255+
// OGCG: ashr <8 x i64> %{{.*}}, splat (i64 32)
256+
// OGCG: shl <8 x i64> %{{.*}}, splat (i64 32)
257+
// OGCG: ashr <8 x i64> %{{.*}}, splat (i64 32)
258+
// OGCG: mul <8 x i64> %{{.*}}, %{{.*}}
259+
260+
return _mm512_mul_epi32(__A, __B);
261+
}
262+
263+
264+
__m512i test_mm512_mul_epu32(__m512i __A, __m512i __B) {
265+
// CIR-LABEL: _mm512_mul_epu32
266+
// CIR: [[MASK_SCALAR:%.*]] = cir.const #cir.int<4294967295> : !s64i
267+
// CIR: [[MASK_VEC:%.*]] = cir.vec.splat [[MASK_SCALAR]] : !s64i, !cir.vector<8 x !s64i>
268+
// CIR: [[BC_A:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<8 x !s64i>
269+
// CIR: [[BC_B:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<8 x !s64i>
270+
// CIR: [[AND_A:%.*]] = cir.binop(and, [[BC_A]], [[MASK_VEC]])
271+
// CIR: [[AND_B:%.*]] = cir.binop(and, [[BC_B]], [[MASK_VEC]])
272+
// CIR: [[MUL:%.*]] = cir.binop(mul, [[AND_A]], [[AND_B]])
273+
274+
// LLVM-LABEL: _mm512_mul_epu32
275+
// LLVM: and <8 x i64> %{{.*}}, splat (i64 4294967295)
276+
// LLVM: and <8 x i64> %{{.*}}, splat (i64 4294967295)
277+
// LLVM: mul <8 x i64> %{{.*}}, %{{.*}}
278+
279+
// OGCG-LABEL: _mm512_mul_epu32
280+
// OGCG: and <8 x i64> %{{.*}}, splat (i64 4294967295)
281+
// OGCG: and <8 x i64> %{{.*}}, splat (i64 4294967295)
282+
// OGCG: mul <8 x i64> %{{.*}}, %{{.*}}
283+
284+
return _mm512_mul_epu32(__A, __B);
285+
}
286+

clang/test/CIR/CodeGenBuiltins/X86/sse2-builtins.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,26 @@ void test_mm_pause(void) {
108108
// LLVM: call void @llvm.x86.sse2.pause()
109109
// OGCG: call void @llvm.x86.sse2.pause()
110110
}
111+
112+
__m128i test_mm_mul_epu32(__m128i A, __m128i B) {
113+
// CIR-LABEL: _mm_mul_epu32
114+
// CIR: [[MASK_SCALAR:%.*]] = cir.const #cir.int<4294967295> : !s64i
115+
// CIR: [[MASK_VEC:%.*]] = cir.vec.splat [[MASK_SCALAR]] : !s64i, !cir.vector<2 x !s64i>
116+
// CIR: [[BC_A:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x !s64i>
117+
// CIR: [[BC_B:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x !s64i>
118+
// CIR: [[AND_A:%.*]] = cir.binop(and, [[BC_A]], [[MASK_VEC]])
119+
// CIR: [[AND_B:%.*]] = cir.binop(and, [[BC_B]], [[MASK_VEC]])
120+
// CIR: [[MUL:%.*]] = cir.binop(mul, [[AND_A]], [[AND_B]])
121+
122+
// LLVM-LABEL: _mm_mul_epu32
123+
// LLVM: and <2 x i64> %{{.*}}, splat (i64 4294967295)
124+
// LLVM: and <2 x i64> %{{.*}}, splat (i64 4294967295)
125+
// LLVM: mul <2 x i64> %{{.*}}, %{{.*}}
126+
127+
// OGCG-LABEL: _mm_mul_epu32
128+
// OGCG: and <2 x i64> %{{.*}}, splat (i64 4294967295)
129+
// OGCG: and <2 x i64> %{{.*}}, splat (i64 4294967295)
130+
// OGCG: mul <2 x i64> %{{.*}}, %{{.*}}
131+
132+
return _mm_mul_epu32(A, B);
133+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
4+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
5+
6+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
7+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
8+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
9+
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
10+
11+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
12+
// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
13+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
14+
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
15+
16+
#include <immintrin.h>
17+
18+
__m128i test_mm_mul_epi32(__m128i x, __m128i y) {
19+
// CIR-LABEL: _mm_mul_epi32
20+
// CIR: [[SC:%.*]] = cir.const #cir.int<32> : !s64i
21+
// CIR: [[SV:%.*]] = cir.vec.splat [[SC]] : !s64i, !cir.vector<2 x !s64i>
22+
// CIR: [[A64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x !s64i>
23+
// CIR: [[B64:%.*]] = cir.cast bitcast %{{.*}} : {{.*}} -> !cir.vector<2 x !s64i>
24+
// CIR: [[SHL_A:%.*]] = cir.shift(left, [[A64]] : !cir.vector<2 x !s64i>, [[SV]] : !cir.vector<2 x !s64i>)
25+
// CIR: [[ASHR_A:%.*]] = cir.shift(right, [[SHL_A]] : !cir.vector<2 x !s64i>, [[SV]] : !cir.vector<2 x !s64i>)
26+
// CIR: [[SHL_B:%.*]] = cir.shift(left, [[B64]] : !cir.vector<2 x !s64i>, [[SV]] : !cir.vector<2 x !s64i>)
27+
// CIR: [[ASHR_B:%.*]] = cir.shift(right, [[SHL_B]] : !cir.vector<2 x !s64i>, [[SV]] : !cir.vector<2 x !s64i>)
28+
// CIR: [[MUL:%.*]] = cir.binop(mul, [[ASHR_A]], [[ASHR_B]])
29+
30+
// LLVM-LABEL: _mm_mul_epi32
31+
// LLVM: shl <2 x i64> %{{.*}}, splat (i64 32)
32+
// LLVM: ashr <2 x i64> %{{.*}}, splat (i64 32)
33+
// LLVM: shl <2 x i64> %{{.*}}, splat (i64 32)
34+
// LLVM: ashr <2 x i64> %{{.*}}, splat (i64 32)
35+
// LLVM: mul <2 x i64> %{{.*}}, %{{.*}}
36+
37+
// OGCG-LABEL: _mm_mul_epi32
38+
// OGCG: shl <2 x i64> %{{.*}}, splat (i64 32)
39+
// OGCG: ashr <2 x i64> %{{.*}}, splat (i64 32)
40+
// OGCG: shl <2 x i64> %{{.*}}, splat (i64 32)
41+
// OGCG: ashr <2 x i64> %{{.*}}, splat (i64 32)
42+
// OGCG: mul <2 x i64> %{{.*}}, %{{.*}}
43+
44+
return _mm_mul_epi32(x, y);
45+
}

0 commit comments

Comments
 (0)