Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
Original file line number Diff line number Diff line change
Expand Up @@ -1662,6 +1662,13 @@ multiclass SIMDLANESELECT<Vec vec, bits<32> op> {
(vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
vec.prefix#".relaxed_laneselect\t$dst, $a, $b, $c",
vec.prefix#".relaxed_laneselect", op>;

let AddedComplexity = 1 in {
def : Pat<(vec.vt (int_wasm_bitselect
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should there not just be a relaxed form of the intrinsic, just like laneselect above?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm can you clarify a bit on that, i think i'm a bit lost

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I think Sam means is that there should probably be a separate intrinsic for relaxed laneselect rather than selecting the bitselect intrinsic to laneselect. (At least, that's what my feedback would be :)
The reason is that our current convention is that the @llvm.wasm.foo intrinsics each correspond directly to a particular wasm instruction, so if a user generates bitselect intrinsic, they probably want its particular semantics. Or put another way, the semantics of the existing @llvm.wasm.bitselect intrinsic currently correspond to the bitselect instruction which has more strict semantics than the relaxed laneselect instruction; so implementing the intrinsic with the laneselect instruction would be incorrect.

The conventions of the intrinsics in the @llvm.wasm space are something that could in principle be changed, but I don't think we'd want to do that in this case.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm i think there is already an intrinsics int_wasm_relaxed_laneselect for it at llvm/include/llvm/IR/IntrinsicsWebAssembly.td, but in this PR, i was hoping to convert the bitselect to the laneselect when the relaxed flag is present. Should I be doing something else here?

(vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c))),
(!cast<Instruction>("LANESELECT_"#vec) V128:$a, V128:$b, V128:$c)>,
Requires<[HasRelaxedSIMD]>;
}
}

defm "" : SIMDLANESELECT<I8x16, 0x109>;
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ define i32 @bitmask_v16i8(<16 x i8> %x) {

; CHECK-LABEL: bitselect_v16i8:
; CHECK-NEXT: .functype bitselect_v16i8 (v128, v128, v128) -> (v128){{$}}
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
; CHECK-NEXT: i8x16.relaxed_laneselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
; CHECK-NEXT: return $pop[[R]]{{$}}
declare <16 x i8> @llvm.wasm.bitselect.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
define <16 x i8> @bitselect_v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %c) {
Expand Down Expand Up @@ -309,7 +309,7 @@ define i32 @bitmask_v8i16(<8 x i16> %x) {

; CHECK-LABEL: bitselect_v8i16:
; CHECK-NEXT: .functype bitselect_v8i16 (v128, v128, v128) -> (v128){{$}}
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
; CHECK-NEXT: i16x8.relaxed_laneselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
; CHECK-NEXT: return $pop[[R]]{{$}}
declare <8 x i16> @llvm.wasm.bitselect.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
define <8 x i16> @bitselect_v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %c) {
Expand Down Expand Up @@ -445,7 +445,7 @@ define i32 @bitmask_v4i32(<4 x i32> %x) {

; CHECK-LABEL: bitselect_v4i32:
; CHECK-NEXT: .functype bitselect_v4i32 (v128, v128, v128) -> (v128){{$}}
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
; CHECK-NEXT: i32x4.relaxed_laneselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
; CHECK-NEXT: return $pop[[R]]{{$}}
declare <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
define <4 x i32> @bitselect_v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %c) {
Expand Down Expand Up @@ -628,7 +628,7 @@ define i32 @bitmask_v2i64(<2 x i64> %x) {

; CHECK-LABEL: bitselect_v2i64:
; CHECK-NEXT: .functype bitselect_v2i64 (v128, v128, v128) -> (v128){{$}}
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
; CHECK-NEXT: i64x2.relaxed_laneselect $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
; CHECK-NEXT: return $pop[[R]]{{$}}
declare <2 x i64> @llvm.wasm.bitselect.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
define <2 x i64> @bitselect_v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %c) {
Expand Down
79 changes: 79 additions & 0 deletions llvm/test/CodeGen/WebAssembly/simd-relaxed-laneselect.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py

; RUN: llc < %s -verify-machineinstrs -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128,+relaxed-simd | FileCheck %s --check-prefix=RELAXED
; RUN: llc < %s -verify-machineinstrs -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefix=SIMD

; Test that bitselect intrinsic lowers to relaxed_laneselect when relaxed-simd is enabled

target triple = "wasm32-unknown-unknown"

define <16 x i8> @bitselect_to_laneselect_v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
; RELAXED-LABEL: bitselect_to_laneselect_v16i8:
; RELAXED: .functype bitselect_to_laneselect_v16i8 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
; RELAXED-NEXT: i8x16.relaxed_laneselect $push0=, $0, $1, $2
; RELAXED-NEXT: # fallthrough-return
;
; SIMD-LABEL: bitselect_to_laneselect_v16i8:
; SIMD: .functype bitselect_to_laneselect_v16i8 (v128, v128, v128) -> (v128)
; SIMD-NEXT: # %bb.0:
; SIMD-NEXT: v128.bitselect $push0=, $0, $1, $2
; SIMD-NEXT: # fallthrough-return
%res = call <16 x i8> @llvm.wasm.bitselect.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
ret <16 x i8> %res
}

define <8 x i16> @bitselect_to_laneselect_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
; RELAXED-LABEL: bitselect_to_laneselect_v8i16:
; RELAXED: .functype bitselect_to_laneselect_v8i16 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
; RELAXED-NEXT: i16x8.relaxed_laneselect $push0=, $0, $1, $2
; RELAXED-NEXT: # fallthrough-return
;
; SIMD-LABEL: bitselect_to_laneselect_v8i16:
; SIMD: .functype bitselect_to_laneselect_v8i16 (v128, v128, v128) -> (v128)
; SIMD-NEXT: # %bb.0:
; SIMD-NEXT: v128.bitselect $push0=, $0, $1, $2
; SIMD-NEXT: # fallthrough-return
%res = call <8 x i16> @llvm.wasm.bitselect.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
ret <8 x i16> %res
}

define <4 x i32> @bitselect_to_laneselect_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; RELAXED-LABEL: bitselect_to_laneselect_v4i32:
; RELAXED: .functype bitselect_to_laneselect_v4i32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
; RELAXED-NEXT: i32x4.relaxed_laneselect $push0=, $0, $1, $2
; RELAXED-NEXT: # fallthrough-return
;
; SIMD-LABEL: bitselect_to_laneselect_v4i32:
; SIMD: .functype bitselect_to_laneselect_v4i32 (v128, v128, v128) -> (v128)
; SIMD-NEXT: # %bb.0:
; SIMD-NEXT: v128.bitselect $push0=, $0, $1, $2
; SIMD-NEXT: # fallthrough-return
%res = call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
ret <4 x i32> %res
}

define <2 x i64> @bitselect_to_laneselect_v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; RELAXED-LABEL: bitselect_to_laneselect_v2i64:
; RELAXED: .functype bitselect_to_laneselect_v2i64 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
; RELAXED-NEXT: i64x2.relaxed_laneselect $push0=, $0, $1, $2
; RELAXED-NEXT: # fallthrough-return
;
; SIMD-LABEL: bitselect_to_laneselect_v2i64:
; SIMD: .functype bitselect_to_laneselect_v2i64 (v128, v128, v128) -> (v128)
; SIMD-NEXT: # %bb.0:
; SIMD-NEXT: v128.bitselect $push0=, $0, $1, $2
; SIMD-NEXT: # fallthrough-return
%res = call <2 x i64> @llvm.wasm.bitselect.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
ret <2 x i64> %res
}



declare <16 x i8> @llvm.wasm.bitselect.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.wasm.bitselect.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
declare <2 x i64> @llvm.wasm.bitselect.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)