Skip to content

Commit 297f972

Browse files
authored
[Hexagon] Handle bitcast of i64 -> v64i1 when Hvx is enabled (#163332)
Partially Fixes #160806
1 parent 7b190b7 commit 297f972

File tree

2 files changed

+64
-9
lines changed

2 files changed

+64
-9
lines changed

llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,10 @@ HexagonTargetLowering::initializeHVXLowering() {
117117
setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
118118
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
119119

120-
if (Subtarget.useHVX128BOps())
120+
if (Subtarget.useHVX128BOps()) {
121121
setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
122+
setOperationAction(ISD::BITCAST, MVT::v64i1, Custom);
123+
}
122124
if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
123125
Subtarget.useHVXFloatingPoint()) {
124126

@@ -2024,13 +2026,9 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
20242026
// Handle bitcast from i32, v2i16, and v4i8 to v32i1.
20252027
// Splat the input into a 32-element i32 vector, then AND each element
20262028
// with a unique bitmask to isolate individual bits.
2027-
if (ResTy == MVT::v32i1 &&
2028-
(ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2029-
Subtarget.useHVX128BOps()) {
2030-
SDValue Val32 = Val;
2031-
if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2032-
Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
2033-
2029+
auto bitcastI32ToV32I1 = [&](SDValue Val32) {
2030+
assert(Val32.getValueType().getSizeInBits() == 32 &&
2031+
"Input must be 32 bits");
20342032
MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
20352033
SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
20362034
SmallVector<SDValue, 32> Mask;
@@ -2039,7 +2037,31 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
20392037

20402038
SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
20412039
SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
2042-
return DAG.getNode(HexagonISD::V2Q, dl, ResTy, Anded);
2040+
return DAG.getNode(HexagonISD::V2Q, dl, MVT::v32i1, Anded);
2041+
};
2042+
// === Case: v32i1 ===
2043+
if (ResTy == MVT::v32i1 &&
2044+
(ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2045+
Subtarget.useHVX128BOps()) {
2046+
SDValue Val32 = Val;
2047+
if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2048+
Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
2049+
return bitcastI32ToV32I1(Val32);
2050+
}
2051+
// === Case: v64i1 ===
2052+
if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) {
2053+
// Split i64 into lo/hi 32-bit halves.
2054+
SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Val);
2055+
SDValue HiShifted = DAG.getNode(ISD::SRL, dl, MVT::i64, Val,
2056+
DAG.getConstant(32, dl, MVT::i64));
2057+
SDValue Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, HiShifted);
2058+
2059+
// Reuse the same 32-bit logic twice.
2060+
SDValue LoRes = bitcastI32ToV32I1(Lo);
2061+
SDValue HiRes = bitcastI32ToV32I1(Hi);
2062+
2063+
// Concatenate into a v64i1 predicate.
2064+
return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, LoRes, HiRes);
20432065
}
20442066

20452067
if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
; RUN: llc --mtriple=hexagon -mattr=+hvxv79,+hvx-length128b < %s | FileCheck %s
2+
; CHECK-DAG: r[[REGH:([0-9]+)]]:[[REGL:([0-9]+)]] = combine(##.LCPI0_0,#-1)
3+
; CHECK-DAG: [[VREG1:v([0-9]+)]] = vmem(r[[REGH]]+#0)
4+
; CHECK-DAG: [[REG1:(r[0-9]+)]] = memw(r{{[0-9]+}}+#4)
5+
; CHECK-DAG: [[VREG2:v([0-9]+)]] = vsplat([[REG1]])
6+
; CHECK-DAG: [[REG2:(r[0-9]+)]] = memw(r{{[0-9]+}}+#0)
7+
; CHECK-DAG: [[VREG3:v([0-9]+)]] = vsplat([[REG2]])
8+
; CHECK-DAG: [[VREG4:v([0-9]+)]] = vand([[VREG2]],[[VREG1]])
9+
; CHECK-DAG: [[VREG5:v([0-9]+)]] = vand([[VREG3]],[[VREG1]])
10+
; CHECK-DAG: [[QREG:q[0-9]+]] = vand([[VREG4]],r{{[0-9]+}})
11+
; CHECK-DAG: [[VREG6:v([0-9]+)]] = vand([[QREG]],r{{[0-9]+}})
12+
; CHECK-DAG: [[QREG1:q[0-9]+]] = vand([[VREG5]],r{{[0-9]+}})
13+
; CHECK-DAG: [[VREG7:v([0-9]+)]] = vand([[QREG1]],r{{[0-9]+}})
14+
; CHECK-DAG: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
15+
; CHECK-DAG: v{{[0-9]+}}.b = vpacke(v{{[0-9]+}}.h,v{{[0-9]+}}.h)
16+
; CHECK-DAG: [[VREG8:v([0-9]+)]] = vror(v{{[0-9]+}},r{{[0-9]+}})
17+
; CHECK-DAG: [[VREG9:v([0-9]+)]] = vor([[VREG8]],v{{[0-9]+}})
18+
; CHECK-DAG: q{{[0-9]+}} = vand([[VREG9]],r{{[0-9]+}})
19+
define void @bitcast_i64_to_v64i1_full(ptr %in, ptr %out) {
20+
entry:
21+
%load = load i64, ptr %in, align 4
22+
%bitcast = bitcast i64 %load to <64 x i1>
23+
%e0 = extractelement <64 x i1> %bitcast, i32 0
24+
%e1 = extractelement <64 x i1> %bitcast, i32 1
25+
%z0 = zext i1 %e0 to i8
26+
%z1 = zext i1 %e1 to i8
27+
%ptr0 = getelementptr i8, ptr %out, i32 0
28+
%ptr1 = getelementptr i8, ptr %out, i32 1
29+
store i8 %z0, ptr %ptr0, align 1
30+
store i8 %z1, ptr %ptr1, align 1
31+
ret void
32+
}
33+

0 commit comments

Comments
 (0)