Skip to content

Commit ffc2951

Browse files
committed
[WebAssembly] Fold constant i8x16.swizzle to shufflevector
1 parent 9746078 commit ffc2951

File tree

5 files changed

+119
-18
lines changed

5 files changed

+119
-18
lines changed

llvm/lib/Target/WebAssembly/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ add_llvm_target(WebAssemblyCodeGen
3232
WebAssemblyFixIrreducibleControlFlow.cpp
3333
WebAssemblyFixFunctionBitcasts.cpp
3434
WebAssemblyFrameLowering.cpp
35+
WebAssemblyInstCombineIntrinsic.cpp
3536
WebAssemblyISelDAGToDAG.cpp
3637
WebAssemblyISelLowering.cpp
3738
WebAssemblyInstrInfo.cpp
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
//=== WebAssemblyInstCombineIntrinsic.cpp -
2+
// WebAssembly specific InstCombine pass ---===//
3+
//
4+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
/// \file
10+
/// This file implements a TargetTransformInfo analysis pass specific to
11+
/// WebAssembly. It uses the target's detailed information to provide more
12+
/// precise answers to certain TTI queries, while letting the target independent
13+
/// and default TTI implementations handle the rest.
14+
///
15+
//===----------------------------------------------------------------------===//
16+
17+
#include "WebAssemblyTargetTransformInfo.h"
18+
#include "llvm/IR/IntrinsicInst.h"
19+
#include "llvm/IR/IntrinsicsWebAssembly.h"
20+
#include "llvm/Transforms/InstCombine/InstCombiner.h"
21+
#include <optional>
22+
23+
using namespace llvm;
24+
using namespace llvm::PatternMatch;
25+
26+
/// Attempt to convert [relaxed_]swizzle to shufflevector if the mask is
27+
/// constant.
28+
static Value *simplifyWasmSwizzle(const IntrinsicInst &II,
29+
InstCombiner::BuilderTy &Builder,
30+
bool IsRelaxed) {
31+
auto *V = dyn_cast<Constant>(II.getArgOperand(1));
32+
if (!V)
33+
return nullptr;
34+
35+
auto *VecTy = cast<FixedVectorType>(II.getType());
36+
unsigned NumElts = VecTy->getNumElements();
37+
assert(NumElts == 16);
38+
39+
// Construct a shuffle mask from constant integers or UNDEFs.
40+
int Indexes[16];
41+
bool AnyOutOfBounds = false;
42+
43+
for (unsigned I = 0; I < NumElts; ++I) {
44+
Constant *COp = V->getAggregateElement(I);
45+
if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
46+
return nullptr;
47+
48+
if (isa<UndefValue>(COp)) {
49+
Indexes[I] = -1;
50+
continue;
51+
}
52+
53+
int64_t Index = cast<ConstantInt>(COp)->getSExtValue();
54+
55+
if (Index >= NumElts && IsRelaxed) {
56+
// For lane indices above 15, the relaxed_swizzle operation can choose
57+
// between returning 0 or the lane at `Index % 16`. However, the choice
58+
// must be made consistently. As the WebAssembly spec states:
59+
//
60+
// "The result of relaxed operators are implementation-dependent, because
61+
// the set of possible results may depend on properties of the host
62+
// environment, such as its hardware. Technically, their behaviour is
63+
// controlled by a set of global parameters to the semantics that an
64+
// implementation can instantiate in different ways. These choices are
65+
// fixed, that is, parameters are constant during the execution of any
66+
// given program."
67+
//
68+
// The WebAssembly runtime may choose differently from us, so we can't
69+
// optimize a relaxed swizzle with lane indices above 15.
70+
return nullptr;
71+
}
72+
73+
if (Index >= NumElts || Index < 0) {
74+
AnyOutOfBounds = true;
75+
// If there are out-of-bounds indices, the swizzle instruction returns
76+
// zeroes in those lanes. We'll provide an all-zeroes vector as the
77+
// second argument to shufflevector and read the first element from it.
78+
Indexes[I] = NumElts;
79+
continue;
80+
}
81+
82+
Indexes[I] = Index;
83+
}
84+
85+
auto *V1 = II.getArgOperand(0);
86+
auto *V2 =
87+
AnyOutOfBounds ? Constant::getNullValue(VecTy) : PoisonValue::get(VecTy);
88+
89+
return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts));
90+
}
91+
92+
std::optional<Instruction *>
93+
WebAssemblyTTIImpl::instCombineIntrinsic(InstCombiner &IC,
94+
IntrinsicInst &II) const {
95+
Intrinsic::ID IID = II.getIntrinsicID();
96+
switch (IID) {
97+
case Intrinsic::wasm_swizzle:
98+
case Intrinsic::wasm_relaxed_swizzle:
99+
if (Value *V = simplifyWasmSwizzle(
100+
II, IC.Builder, IID == Intrinsic::wasm_relaxed_swizzle)) {
101+
return IC.replaceInstUsesWith(II, V);
102+
}
103+
break;
104+
}
105+
106+
return std::nullopt;
107+
}

llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
9090
TTI::TargetCostKind CostKind,
9191
unsigned Index, const Value *Op0,
9292
const Value *Op1) const override;
93+
std::optional<Instruction *>
94+
instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
9395
InstructionCost getPartialReductionCost(
9496
unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
9597
ElementCount VF, TTI::PartialReductionExtendKind OpAExtend,

llvm/test/Transforms/InstCombine/WebAssembly/fold-swizzle.ll

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,35 +7,31 @@ declare <16 x i8> @llvm.wasm.swizzle(<16 x i8>, <16 x i8>)
77
declare <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8>, <16 x i8>)
88

99
; Identity swizzle pattern
10-
; TODO: Should simplify to nothing.
1110
define <16 x i8> @swizzle_identity(<16 x i8> %v) {
1211
; CHECK-LABEL: define <16 x i8> @swizzle_identity(
1312
; CHECK-SAME: <16 x i8> [[V:%.*]]) {
14-
; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
15-
; CHECK-NEXT: ret <16 x i8> [[RESULT]]
13+
; CHECK-NEXT: ret <16 x i8> [[V]]
1614
;
1715
%result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
1816
ret <16 x i8> %result
1917
}
2018

2119
; Reverse swizzle pattern
22-
; TODO: Should simplify to shufflevector.
2320
define <16 x i8> @swizzle_reverse(<16 x i8> %v) {
2421
; CHECK-LABEL: define <16 x i8> @swizzle_reverse(
2522
; CHECK-SAME: <16 x i8> [[V:%.*]]) {
26-
; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
23+
; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2724
; CHECK-NEXT: ret <16 x i8> [[RESULT]]
2825
;
2926
%result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
3027
ret <16 x i8> %result
3128
}
3229

3330
; undef elements
34-
; TODO: Should simplify to shufflevector.
3531
define <16 x i8> @swizzle_with_undef(<16 x i8> %v) {
3632
; CHECK-LABEL: define <16 x i8> @swizzle_with_undef(
3733
; CHECK-SAME: <16 x i8> [[V:%.*]]) {
38-
; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> <i8 0, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
34+
; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3935
; CHECK-NEXT: ret <16 x i8> [[RESULT]]
4036
;
4137
%result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> <i8 0, i8 undef, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
@@ -54,59 +50,53 @@ define <16 x i8> @swizzle_non_constant(<16 x i8> %v, <16 x i8> %mask) {
5450
}
5551

5652
; Out-of-bounds index, otherwise identity pattern
57-
; TODO: Should simplify to insertelement.
5853
define <16 x i8> @swizzle_out_of_bounds_1(<16 x i8> %v) {
5954
; CHECK-LABEL: define <16 x i8> @swizzle_out_of_bounds_1(
6055
; CHECK-SAME: <16 x i8> [[V:%.*]]) {
61-
; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 16>)
56+
; CHECK-NEXT: [[RESULT:%.*]] = insertelement <16 x i8> [[V]], i8 0, i64 15
6257
; CHECK-NEXT: ret <16 x i8> [[RESULT]]
6358
;
6459
%result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 16>)
6560
ret <16 x i8> %result
6661
}
6762

6863
; Out-of-bounds indices, both negative and positive
69-
; TODO: Should simplify to shufflevector.
7064
define <16 x i8> @swizzle_out_of_bounds_2(<16 x i8> %v) {
7165
; CHECK-LABEL: define <16 x i8> @swizzle_out_of_bounds_2(
7266
; CHECK-SAME: <16 x i8> [[V:%.*]]) {
73-
; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> <i8 99, i8 -1, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
67+
; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <16 x i32> <i32 16, i32 16, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
7468
; CHECK-NEXT: ret <16 x i8> [[RESULT]]
7569
;
7670
%result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> <i8 99, i8 -1, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
7771
ret <16 x i8> %result
7872
}
7973

8074
; Identity swizzle pattern (relaxed_swizzle)
81-
; TODO: Should simplify to nothing.
8275
define <16 x i8> @relaxed_swizzle_identity(<16 x i8> %v) {
8376
; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_identity(
8477
; CHECK-SAME: <16 x i8> [[V:%.*]]) {
85-
; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> [[V]], <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
86-
; CHECK-NEXT: ret <16 x i8> [[RESULT]]
78+
; CHECK-NEXT: ret <16 x i8> [[V]]
8779
;
8880
%result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
8981
ret <16 x i8> %result
9082
}
9183

9284
; Reverse swizzle pattern (relaxed_swizzle)
93-
; TODO: Should simplify to shufflevector.
9485
define <16 x i8> @relaxed_swizzle_reverse(<16 x i8> %v) {
9586
; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_reverse(
9687
; CHECK-SAME: <16 x i8> [[V:%.*]]) {
97-
; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> [[V]], <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
88+
; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
9889
; CHECK-NEXT: ret <16 x i8> [[RESULT]]
9990
;
10091
%result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
10192
ret <16 x i8> %result
10293
}
10394

10495
; Out-of-bounds index, only negative (relaxed_swizzle)
105-
; TODO: Should simplify to shufflevector.
10696
define <16 x i8> @relaxed_swizzle_out_of_bounds(<16 x i8> %v) {
10797
; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_out_of_bounds(
10898
; CHECK-SAME: <16 x i8> [[V:%.*]]) {
109-
; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> [[V]], <16 x i8> <i8 -99, i8 -1, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
99+
; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <16 x i32> <i32 16, i32 16, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
110100
; CHECK-NEXT: ret <16 x i8> [[RESULT]]
111101
;
112102
%result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> <i8 -99, i8 -1, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)

llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ static_library("LLVMWebAssemblyCodeGen") {
5454
"WebAssemblyFixFunctionBitcasts.cpp",
5555
"WebAssemblyFixIrreducibleControlFlow.cpp",
5656
"WebAssemblyFrameLowering.cpp",
57+
"WebAssemblyInstCombineIntrinsic.cpp",
5758
"WebAssemblyISelDAGToDAG.cpp",
5859
"WebAssemblyISelLowering.cpp",
5960
"WebAssemblyInstrInfo.cpp",

0 commit comments

Comments
 (0)