Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

if (Subtarget.useHVX128BOps())
setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
Subtarget.useHVXFloatingPoint()) {

Expand Down Expand Up @@ -2001,6 +2003,28 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {

return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
}

// Handle bitcast from i32, v2i16, and v4i8 to v32i1.
// Splat the input into a 32-element i32 vector, then AND each element
// with a unique bitmask to isolate individual bits.
if (ResTy == MVT::v32i1 &&
(ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
Subtarget.useHVX128BOps()) {
SDValue Val32 = Val;
if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);

MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
SmallVector<SDValue, 32> Mask;
for (unsigned i = 0; i < 32; ++i)
Mask.push_back(DAG.getConstant(1u << i, dl, MVT::i32));

SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
return DAG.getNode(HexagonISD::V2Q, dl, ResTy, Anded);
}

if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
// Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
unsigned BitWidth = ValTy.getSizeInBits();
Expand Down
20 changes: 20 additions & 0 deletions llvm/test/CodeGen/Hexagon/bitcast-i32-to-v32i1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s

; CHECK: [[VREG1:v([0-9]+)]] = vsplat(r{{[0-9]*}})
; CHECK: [[VREG2:v([0-9]+)]] = vand([[VREG1]],v{{[0-9]+}})
; CHECK: q[[QREG:[0-9]+]] = vand([[VREG2]],r{{[0-9]+}})

define void @bitcast_i32_to_v32i1_full(ptr %in, ptr %out) {
entry:
%load = load i32, ptr %in, align 4
%bitcast = bitcast i32 %load to <32 x i1>
%e0 = extractelement <32 x i1> %bitcast, i32 0
%e1 = extractelement <32 x i1> %bitcast, i32 1
%z0 = zext i1 %e0 to i8
%z1 = zext i1 %e1 to i8
%ptr0 = getelementptr i8, ptr %out, i32 0
%ptr1 = getelementptr i8, ptr %out, i32 1
store i8 %z0, ptr %ptr0, align 1
store i8 %z1, ptr %ptr1, align 1
ret void
}
16 changes: 16 additions & 0 deletions llvm/test/CodeGen/Hexagon/bitcast-v2i16-to-v32i1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s

; CHECK: [[REG0:r[0-9]+]] = memw(r{{[0-9]+}}+#0)
; CHECK: [[VREG1:v([0-9]+)]] = vsplat([[REG0]])
; CHECK: [[VREG2:v([0-9]+)]] = vand([[VREG1]],v{{[0-9]+}})
; CHECK: q[[QREG:[0-9]+]] = vand([[VREG2]],r{{[0-9]+}})

define void @bitcast_v2i16_to_v32i1(ptr %in, ptr %out) {
entry:
%load = load <2 x i16>, ptr %in, align 4
%bitcast = bitcast <2 x i16> %load to <32 x i1>
%extract = extractelement <32 x i1> %bitcast, i32 0
%zext = zext i1 %extract to i8
store i8 %zext, ptr %out, align 1
ret void
}
16 changes: 16 additions & 0 deletions llvm/test/CodeGen/Hexagon/bitcast-v4i8-to-v32i1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s

; CHECK: [[REG0:r[0-9]+]] = memw(r{{[0-9]+}}+#0)
; CHECK: [[VREG1:v([0-9]+)]] = vsplat([[REG0]])
; CHECK: [[VREG2:v([0-9]+)]] = vand([[VREG1]],v{{[0-9]+}})
; CHECK: q[[QREG:[0-9]+]] = vand([[VREG2]],r{{[0-9]+}})

define void @bitcast_v4i8_to_v32i1(ptr %in, ptr %out) {
entry:
%load = load <4 x i8>, ptr %in, align 4
%bitcast = bitcast <4 x i8> %load to <32 x i1>
%extract = extractelement <32 x i1> %bitcast, i32 0
%zext = zext i1 %extract to i8
store i8 %zext, ptr %out, align 1
ret void
}