Skip to content

Commit 3ab6d12

Browse files
authored
[WebAssembly] Implement f16x8 madd and nmadd instructions. (#95151)
Implemented with intrinsics and builtins. Specified at: https://github.com/WebAssembly/half-precision/blob/main/proposals/half-precision/Overview.md
1 parent 511a195 commit 3ab6d12

File tree

5 files changed

+40
-13
lines changed

5 files changed

+40
-13
lines changed

clang/include/clang/Basic/BuiltinsWebAssembly.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@ TARGET_BUILTIN(__builtin_wasm_relaxed_madd_f32x4, "V4fV4fV4fV4f", "nc", "relaxed
170170
TARGET_BUILTIN(__builtin_wasm_relaxed_nmadd_f32x4, "V4fV4fV4fV4f", "nc", "relaxed-simd")
171171
TARGET_BUILTIN(__builtin_wasm_relaxed_madd_f64x2, "V2dV2dV2dV2d", "nc", "relaxed-simd")
172172
TARGET_BUILTIN(__builtin_wasm_relaxed_nmadd_f64x2, "V2dV2dV2dV2d", "nc", "relaxed-simd")
173+
TARGET_BUILTIN(__builtin_wasm_relaxed_madd_f16x8, "V8hV8hV8hV8h", "nc", "half-precision")
174+
TARGET_BUILTIN(__builtin_wasm_relaxed_nmadd_f16x8, "V8hV8hV8hV8h", "nc", "half-precision")
173175

174176
TARGET_BUILTIN(__builtin_wasm_relaxed_laneselect_i8x16, "V16ScV16ScV16ScV16Sc", "nc", "relaxed-simd")
175177
TARGET_BUILTIN(__builtin_wasm_relaxed_laneselect_i16x8, "V8sV8sV8sV8s", "nc", "relaxed-simd")

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21149,6 +21149,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
2114921149
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
2115021150
return Builder.CreateCall(Callee, Ops);
2115121151
}
21152+
case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
21153+
case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
2115221154
case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
2115321155
case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
2115421156
case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
@@ -21158,10 +21160,12 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
2115821160
Value *C = EmitScalarExpr(E->getArg(2));
2115921161
unsigned IntNo;
2116021162
switch (BuiltinID) {
21163+
case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
2116121164
case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
2116221165
case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
2116321166
IntNo = Intrinsic::wasm_relaxed_madd;
2116421167
break;
21168+
case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
2116521169
case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
2116621170
case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
2116721171
IntNo = Intrinsic::wasm_relaxed_nmadd;

clang/test/CodeGen/builtins-wasm.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,20 @@ f64x2 nmadd_f64x2(f64x2 a, f64x2 b, f64x2 c) {
690690
// WEBASSEMBLY-NEXT: ret
691691
}
692692

693+
f16x8 madd_f16x8(f16x8 a, f16x8 b, f16x8 c) {
694+
return __builtin_wasm_relaxed_madd_f16x8(a, b, c);
695+
// WEBASSEMBLY: call <8 x half> @llvm.wasm.relaxed.madd.v8f16(
696+
// WEBASSEMBLY-SAME: <8 x half> %a, <8 x half> %b, <8 x half> %c)
697+
// WEBASSEMBLY-NEXT: ret
698+
}
699+
700+
f16x8 nmadd_f16x8(f16x8 a, f16x8 b, f16x8 c) {
701+
return __builtin_wasm_relaxed_nmadd_f16x8(a, b, c);
702+
// WEBASSEMBLY: call <8 x half> @llvm.wasm.relaxed.nmadd.v8f16(
703+
// WEBASSEMBLY-SAME: <8 x half> %a, <8 x half> %b, <8 x half> %c)
704+
// WEBASSEMBLY-NEXT: ret
705+
}
706+
693707
i8x16 laneselect_i8x16(i8x16 a, i8x16 b, i8x16 c) {
694708
return __builtin_wasm_relaxed_laneselect_i8x16(a, b, c);
695709
// WEBASSEMBLY: call <16 x i8> @llvm.wasm.relaxed.laneselect.v16i8(

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,23 +1480,24 @@ defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_unsigned_zero,
14801480
// Relaxed (Negative) Multiply-Add (madd/nmadd)
14811481
//===----------------------------------------------------------------------===//
14821482

1483-
multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS> {
1483+
multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate> reqs> {
14841484
defm MADD_#vec :
1485-
RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
1486-
[(set (vec.vt V128:$dst), (int_wasm_relaxed_madd
1487-
(vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
1488-
vec.prefix#".relaxed_madd\t$dst, $a, $b, $c",
1489-
vec.prefix#".relaxed_madd", simdopA>;
1485+
SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
1486+
[(set (vec.vt V128:$dst), (int_wasm_relaxed_madd
1487+
(vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
1488+
vec.prefix#".relaxed_madd\t$dst, $a, $b, $c",
1489+
vec.prefix#".relaxed_madd", simdopA, reqs>;
14901490
defm NMADD_#vec :
1491-
RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
1492-
[(set (vec.vt V128:$dst), (int_wasm_relaxed_nmadd
1493-
(vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
1494-
vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c",
1495-
vec.prefix#".relaxed_nmadd", simdopS>;
1491+
SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
1492+
[(set (vec.vt V128:$dst), (int_wasm_relaxed_nmadd
1493+
(vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
1494+
vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c",
1495+
vec.prefix#".relaxed_nmadd", simdopS, reqs>;
14961496
}
14971497

1498-
defm "" : SIMDMADD<F32x4, 0x105, 0x106>;
1499-
defm "" : SIMDMADD<F64x2, 0x107, 0x108>;
1498+
defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
1499+
defm "" : SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>;
1500+
defm "" : SIMDMADD<F16x8, 0x146, 0x147, [HasHalfPrecision]>;
15001501

15011502
//===----------------------------------------------------------------------===//
15021503
// Laneselect

llvm/test/MC/WebAssembly/simd-encodings.s

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -914,4 +914,10 @@ main:
914914
# CHECK: f16x8.nearest # encoding: [0xfd,0xbf,0x02]
915915
f16x8.nearest
916916

917+
# CHECK: f16x8.relaxed_madd # encoding: [0xfd,0xc6,0x02]
918+
f16x8.relaxed_madd
919+
920+
# CHECK: f16x8.relaxed_nmadd # encoding: [0xfd,0xc7,0x02]
921+
f16x8.relaxed_nmadd
922+
917923
end_function

0 commit comments

Comments
 (0)