Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Target/WebAssembly/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ add_llvm_target(WebAssemblyCodeGen
WebAssemblyFixIrreducibleControlFlow.cpp
WebAssemblyFixFunctionBitcasts.cpp
WebAssemblyFrameLowering.cpp
WebAssemblyInstCombineIntrinsic.cpp
WebAssemblyISelDAGToDAG.cpp
WebAssemblyISelLowering.cpp
WebAssemblyInstrInfo.cpp
Expand Down
107 changes: 107 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyInstCombineIntrinsic.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
//=== WebAssemblyInstCombineIntrinsic.cpp -
// WebAssembly specific InstCombine pass ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This file implements a TargetTransformInfo analysis pass specific to
/// WebAssembly. It uses the target's detailed information to provide more
/// precise answers to certain TTI queries, while letting the target independent
/// and default TTI implementations handle the rest.
///
//===----------------------------------------------------------------------===//

#include "WebAssemblyTargetTransformInfo.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include <optional>

using namespace llvm;
using namespace llvm::PatternMatch;

/// Attempt to convert [relaxed_]swizzle to shufflevector if the mask is
/// constant.
static Value *simplifyWasmSwizzle(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder,
bool IsRelaxed) {
auto *V = dyn_cast<Constant>(II.getArgOperand(1));
if (!V)
return nullptr;

auto *VecTy = cast<FixedVectorType>(II.getType());
unsigned NumElts = VecTy->getNumElements();
assert(NumElts == 16);

// Construct a shuffle mask from constant integers or UNDEFs.
int Indexes[16];
bool AnyOutOfBounds = false;

for (unsigned I = 0; I < NumElts; ++I) {
Constant *COp = V->getAggregateElement(I);
if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
return nullptr;

if (isa<UndefValue>(COp)) {
Indexes[I] = -1;
continue;
}

int64_t Index = cast<ConstantInt>(COp)->getSExtValue();

if (Index >= NumElts && IsRelaxed) {
// For lane indices above 15, the relaxed_swizzle operation can choose
// between returning 0 or the lane at `Index % 16`. However, the choice
// must be made consistently. As the WebAssembly spec states:
//
// "The result of relaxed operators are implementation-dependent, because
// the set of possible results may depend on properties of the host
// environment, such as its hardware. Technically, their behaviour is
// controlled by a set of global parameters to the semantics that an
// implementation can instantiate in different ways. These choices are
// fixed, that is, parameters are constant during the execution of any
// given program."
//
// The WebAssembly runtime may choose differently from us, so we can't
// optimize a relaxed swizzle with lane indices above 15.
return nullptr;
}

if (Index >= NumElts || Index < 0) {
AnyOutOfBounds = true;
// If there are out-of-bounds indices, the swizzle instruction returns
// zeroes in those lanes. We'll provide an all-zeroes vector as the
// second argument to shufflevector and read the first element from it.
Indexes[I] = NumElts;
continue;
}

Indexes[I] = Index;
}

auto *V1 = II.getArgOperand(0);
auto *V2 =
AnyOutOfBounds ? Constant::getNullValue(VecTy) : PoisonValue::get(VecTy);

return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts));
}

std::optional<Instruction *>
WebAssemblyTTIImpl::instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const {
Intrinsic::ID IID = II.getIntrinsicID();
switch (IID) {
case Intrinsic::wasm_swizzle:
case Intrinsic::wasm_relaxed_swizzle:
if (Value *V = simplifyWasmSwizzle(
II, IC.Builder, IID == Intrinsic::wasm_relaxed_swizzle)) {
return IC.replaceInstUsesWith(II, V);
}
break;
}

return std::nullopt;
}
2 changes: 2 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
TTI::TargetCostKind CostKind,
unsigned Index, const Value *Op0,
const Value *Op1) const override;
std::optional<Instruction *>
instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
InstructionCost getPartialReductionCost(
unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
ElementCount VF, TTI::PartialReductionExtendKind OpAExtend,
Expand Down
116 changes: 116 additions & 0 deletions llvm/test/Transforms/InstCombine/WebAssembly/fold-swizzle.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt < %s -passes=instcombine -mtriple=wasm32-unknown-unknown -S | FileCheck %s

; swizzle with a constant operand should be optimized to a shufflevector.

declare <16 x i8> @llvm.wasm.swizzle(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8>, <16 x i8>)

; Identity swizzle pattern
define <16 x i8> @swizzle_identity(<16 x i8> %v) {
; CHECK-LABEL: define <16 x i8> @swizzle_identity(
; CHECK-SAME: <16 x i8> [[V:%.*]]) {
; CHECK-NEXT: ret <16 x i8> [[V]]
;
%result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
ret <16 x i8> %result
}

; Reverse swizzle pattern
define <16 x i8> @swizzle_reverse(<16 x i8> %v) {
; CHECK-LABEL: define <16 x i8> @swizzle_reverse(
; CHECK-SAME: <16 x i8> [[V:%.*]]) {
; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <16 x i8> [[RESULT]]
;
%result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
ret <16 x i8> %result
}

; poison elements
define <16 x i8> @swizzle_with_poison(<16 x i8> %v) {
; CHECK-LABEL: define <16 x i8> @swizzle_with_poison(
; CHECK-SAME: <16 x i8> [[V:%.*]]) {
; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <16 x i8> [[RESULT]]
;
%result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> <i8 0, i8 poison, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
ret <16 x i8> %result
}

; Negative test: non-constant operand
define <16 x i8> @swizzle_non_constant(<16 x i8> %v, <16 x i8> %mask) {
; CHECK-LABEL: define <16 x i8> @swizzle_non_constant(
; CHECK-SAME: <16 x i8> [[V:%.*]], <16 x i8> [[MASK:%.*]]) {
; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[V]], <16 x i8> [[MASK]])
; CHECK-NEXT: ret <16 x i8> [[RESULT]]
;
%result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> %mask)
ret <16 x i8> %result
}

; Out-of-bounds index, otherwise identity pattern
define <16 x i8> @swizzle_out_of_bounds_1(<16 x i8> %v) {
; CHECK-LABEL: define <16 x i8> @swizzle_out_of_bounds_1(
; CHECK-SAME: <16 x i8> [[V:%.*]]) {
; CHECK-NEXT: [[RESULT:%.*]] = insertelement <16 x i8> [[V]], i8 0, i64 15
; CHECK-NEXT: ret <16 x i8> [[RESULT]]
;
%result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 16>)
ret <16 x i8> %result
}

; Out-of-bounds indices, both negative and positive
define <16 x i8> @swizzle_out_of_bounds_2(<16 x i8> %v) {
; CHECK-LABEL: define <16 x i8> @swizzle_out_of_bounds_2(
; CHECK-SAME: <16 x i8> [[V:%.*]]) {
; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <16 x i32> <i32 16, i32 16, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <16 x i8> [[RESULT]]
;
%result = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %v, <16 x i8> <i8 99, i8 -1, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
ret <16 x i8> %result
}

; Identity swizzle pattern (relaxed_swizzle)
define <16 x i8> @relaxed_swizzle_identity(<16 x i8> %v) {
; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_identity(
; CHECK-SAME: <16 x i8> [[V:%.*]]) {
; CHECK-NEXT: ret <16 x i8> [[V]]
;
%result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
ret <16 x i8> %result
}

; Reverse swizzle pattern (relaxed_swizzle)
define <16 x i8> @relaxed_swizzle_reverse(<16 x i8> %v) {
; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_reverse(
; CHECK-SAME: <16 x i8> [[V:%.*]]) {
; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <16 x i8> [[RESULT]]
;
%result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
ret <16 x i8> %result
}

; Out-of-bounds index, only negative (relaxed_swizzle)
define <16 x i8> @relaxed_swizzle_out_of_bounds(<16 x i8> %v) {
; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_out_of_bounds(
; CHECK-SAME: <16 x i8> [[V:%.*]]) {
; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <16 x i32> <i32 16, i32 16, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: ret <16 x i8> [[RESULT]]
;
%result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> <i8 -99, i8 -1, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
ret <16 x i8> %result
}

; Negative test: out-of-bounds index, both positive and negative (relaxed_swizzle)
; The choice between different relaxed semantics can only be made at runtime, since it must be consistent.
define <16 x i8> @relaxed_swizzle_out_of_bounds_positive(<16 x i8> %v) {
; CHECK-LABEL: define <16 x i8> @relaxed_swizzle_out_of_bounds_positive(
; CHECK-SAME: <16 x i8> [[V:%.*]]) {
; CHECK-NEXT: [[RESULT:%.*]] = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> [[V]], <16 x i8> <i8 99, i8 -1, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
; CHECK-NEXT: ret <16 x i8> [[RESULT]]
;
%result = tail call <16 x i8> @llvm.wasm.relaxed.swizzle(<16 x i8> %v, <16 x i8> <i8 99, i8 -1, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
ret <16 x i8> %result
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ static_library("LLVMWebAssemblyCodeGen") {
"WebAssemblyFixFunctionBitcasts.cpp",
"WebAssemblyFixIrreducibleControlFlow.cpp",
"WebAssemblyFrameLowering.cpp",
"WebAssemblyInstCombineIntrinsic.cpp",
"WebAssemblyISelDAGToDAG.cpp",
"WebAssemblyISelLowering.cpp",
"WebAssemblyInstrInfo.cpp",
Expand Down