Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 54 additions & 7 deletions llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/DiagnosticInfo.h"
Expand Down Expand Up @@ -3214,20 +3215,23 @@ static SDValue performTruncateCombine(SDNode *N,

static SDValue performBitcastCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
using namespace llvm::SDPatternMatch;
auto &DAG = DCI.DAG;
SDLoc DL(N);
SDValue Src = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT SrcVT = Src.getValueType();

// bitcast <N x i1> to iN
if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1))
return SDValue();

unsigned NumElts = SrcVT.getVectorNumElements();
EVT Width = MVT::getIntegerVT(128 / NumElts);

// bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
// ==> bitmask
if (DCI.isBeforeLegalize() && VT.isScalarInteger() &&
SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1) {
unsigned NumElts = SrcVT.getVectorNumElements();
if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
return SDValue();
EVT Width = MVT::getIntegerVT(128 / NumElts);
if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
return DAG.getZExtOrTrunc(
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
{DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
Expand All @@ -3236,6 +3240,49 @@ static SDValue performBitcastCombine(SDNode *N,
DL, VT);
}

// bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
if (NumElts == 32 || NumElts == 64) {
// Strategy: We will setcc them seperately in v16i8 -> v16i1
// Bitcast them to i16, extend them to either i32 or i64.
// Add them together, shifting left 1 by 1.
SDValue Concat, SetCCVector;
ISD::CondCode SetCond;

if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
m_CondCode(SetCond)))))
return SDValue();
if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
return SDValue();

uint64_t ElementWidth =
SetCCVector.getValueType().getVectorElementType().getFixedSizeInBits();

SmallVector<SDValue> VectorsToShuffle;
for (size_t I = 0; I < Concat->ops().size(); I++) {
VectorsToShuffle.push_back(DAG.getBitcast(
MVT::i16,
DAG.getSetCC(DL, MVT::v16i1, Concat->ops()[I],
extractSubVector(SetCCVector, I * (128 / ElementWidth),
DAG, DL, 128),
SetCond)));
}

MVT ReturnType = VectorsToShuffle.size() == 2 ? MVT::i32 : MVT::i64;
SDValue ReturningInteger = DAG.getConstant(0, DL, ReturnType);

for (SDValue V : VectorsToShuffle) {
ReturningInteger = DAG.getNode(
ISD::SHL, DL, ReturnType,
{DAG.getShiftAmountConstant(16, ReturnType, DL), ReturningInteger});

SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType);
ReturningInteger =
DAG.getNode(ISD::ADD, DL, ReturnType, {ReturningInteger, ExtendedV});
}

return ReturningInteger;
}

return SDValue();
}

Expand Down
226 changes: 5 additions & 221 deletions llvm/test/CodeGen/WebAssembly/simd-bitmask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -177,236 +177,20 @@ define i32 @bitmask_v32i8(<32 x i8> %v) {
; CHECK: .functype bitmask_v32i8 (v128, v128) -> (i32)
; CHECK-NEXT: .local v128
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: global.get __stack_pointer
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.sub
; CHECK-NEXT: drop
; CHECK-NEXT: local.get 0
; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
; CHECK-NEXT: local.tee 2
; CHECK-NEXT: i8x16.eq
; CHECK-NEXT: local.tee 0
; CHECK-NEXT: i8x16.extract_lane_u 0
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 1
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 2
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 2
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 3
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 3
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 4
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 4
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 5
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 5
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 6
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 6
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 7
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 7
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 8
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 8
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 9
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 9
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 10
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 10
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 11
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 11
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 12
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 12
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 13
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 13
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 14
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 14
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 15
; CHECK-NEXT: i32.const 15
; CHECK-NEXT: i8x16.bitmask
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.add
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.const 65535
; CHECK-NEXT: i32.and
; CHECK-NEXT: local.get 1
; CHECK-NEXT: local.get 2
; CHECK-NEXT: i8x16.eq
; CHECK-NEXT: local.tee 0
; CHECK-NEXT: i8x16.extract_lane_u 15
; CHECK-NEXT: i32.const 31
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 14
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 30
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 13
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 29
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 12
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 28
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 11
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 27
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 10
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 26
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 9
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 25
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 8
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 24
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 7
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 23
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 6
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 22
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 5
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 21
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 4
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 20
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 3
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 19
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 2
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 18
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 1
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 17
; CHECK-NEXT: i32.shl
; CHECK-NEXT: local.get 0
; CHECK-NEXT: i8x16.extract_lane_u 0
; CHECK-NEXT: i32.const 1
; CHECK-NEXT: i32.and
; CHECK-NEXT: i32.const 16
; CHECK-NEXT: i32.shl
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i32.or
; CHECK-NEXT: i8x16.bitmask
; CHECK-NEXT: i32.add
; CHECK-NEXT: # fallthrough-return
%cmp = icmp eq <32 x i8> %v, zeroinitializer
%bitmask = bitcast <32 x i1> %cmp to i32
Expand Down
Loading
Loading