Skip to content

Commit 0c1a4c1

Browse files
badumbatishmahesh-attarde
authored andcommitted
[WebAssembly] Added vectorized version of fexp10 to the supported list (llvm#150564)
Fixes llvm#117200. The default behavior in TargetLoweringBase is only scalar floats on fexp are supported by default, not the vectorized version. This PR adds `ISD::FEXP10` to the supported list.
1 parent 720020a commit 0c1a4c1

File tree

2 files changed

+34
-1
lines changed

2 files changed

+34
-1
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
288288

289289
// Expand float operations supported for scalars but not SIMD
290290
for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
291-
ISD::FEXP, ISD::FEXP2})
291+
ISD::FEXP, ISD::FEXP2, ISD::FEXP10})
292292
for (auto T : {MVT::v4f32, MVT::v2f64})
293293
setOperationAction(Op, T, Expand);
294294

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
3+
; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers -mattr=+simd128 | FileCheck %s
4+
5+
target triple = "wasm32-unknown-unknown"
6+
7+
declare <4 x float> @llvm.exp10.v4f32(<4 x float>)
8+
9+
define <4 x float> @exp10_f32v4(<4 x float> %v) {
10+
; CHECK-LABEL: exp10_f32v4:
11+
; CHECK: .functype exp10_f32v4 (v128) -> (v128)
12+
; CHECK-NEXT: # %bb.0: # %entry
13+
; CHECK-NEXT: local.get $push12=, 0
14+
; CHECK-NEXT: f32x4.extract_lane $push0=, $pop12, 0
15+
; CHECK-NEXT: call $push1=, exp10f, $pop0
16+
; CHECK-NEXT: f32x4.splat $push2=, $pop1
17+
; CHECK-NEXT: local.get $push13=, 0
18+
; CHECK-NEXT: f32x4.extract_lane $push3=, $pop13, 1
19+
; CHECK-NEXT: call $push4=, exp10f, $pop3
20+
; CHECK-NEXT: f32x4.replace_lane $push5=, $pop2, 1, $pop4
21+
; CHECK-NEXT: local.get $push14=, 0
22+
; CHECK-NEXT: f32x4.extract_lane $push6=, $pop14, 2
23+
; CHECK-NEXT: call $push7=, exp10f, $pop6
24+
; CHECK-NEXT: f32x4.replace_lane $push8=, $pop5, 2, $pop7
25+
; CHECK-NEXT: local.get $push15=, 0
26+
; CHECK-NEXT: f32x4.extract_lane $push9=, $pop15, 3
27+
; CHECK-NEXT: call $push10=, exp10f, $pop9
28+
; CHECK-NEXT: f32x4.replace_lane $push11=, $pop8, 3, $pop10
29+
; CHECK-NEXT: return $pop11
30+
entry:
31+
%r = call <4 x float> @llvm.exp10.v4f32(<4 x float> %v)
32+
ret <4 x float> %r
33+
}

0 commit comments

Comments
 (0)