Skip to content

Conversation

@quic-santdas
Copy link
Contributor

No description provided.

When concatenation of vector instructions is formed, as a part of it
vector rotation is performed. The direction of the shift was
not correctly calculated. This fixes the rotation factor.
Removed some extra function attributes which were not needed.
Change-Id: I04898ffa9d75a8177b9aa13e0836988e03eb26c9
HexagonISD::PFALSE and PTRUE patterns do not form independently in
general as they are treated like operands of all 0s or all 1s.
Eg: i32 = transfer HEXAGONISD::PFALSE.
In this case, v8i1 = HEXAGONISD::PFALSE is formed independently without
accompanying opcode.

This patch adds a pattern to transfer all 0s or all 1s to a scalar
register and then use that register and this PFALSE/PTRUE opcode
to transfer to a predicate register like v8i1.
@llvmbot
Copy link
Member

llvmbot commented Dec 20, 2024

@llvm/pr-subscribers-backend-hexagon

Author: Santanu Das (quic-santdas)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/120695.diff

6 Files Affected:

  • (modified) llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp (+1-1)
  • (modified) llvm/lib/Target/Hexagon/HexagonPatterns.td (+3)
  • (modified) llvm/lib/Target/Hexagon/HexagonTargetMachine.h (+4)
  • (added) llvm/test/CodeGen/Hexagon/addrspacecast-crash.ll (+12)
  • (added) llvm/test/CodeGen/Hexagon/hvx-concat-lower.ll (+135)
  • (added) llvm/test/CodeGen/Hexagon/isel/isel-tfrrp.ll (+15)
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 81035849491bcb..39b8c829a0b217 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -1748,7 +1748,7 @@ HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
 
   unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
   MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
-  SDValue S = DAG.getConstant(InpLen*BitBytes, dl, MVT::i32);
+  SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
   SDValue Res = getZero(dl, ByteTy, DAG);
   for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
     Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index baa552fcd220d8..e54f97eb92e57d 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -108,6 +108,9 @@ def ptrue:  PatFrag<(ops), (HexagonPTRUE)>;
 def pfalse: PatFrag<(ops), (HexagonPFALSE)>;
 def pnot:   PatFrag<(ops node:$Pu), (xor node:$Pu, ptrue)>;
 
+def: Pat<(v8i1 (HexagonPFALSE)), (C2_tfrrp (A2_tfrsi (i32 0)))>;
+def: Pat<(v8i1 (HexagonPTRUE)), (C2_tfrrp (A2_tfrsi (i32 -1)))>;
+
 def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru),
                     (HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>;
 def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>;
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
index 6e9a78b7665042..84edbe6fd870b3 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -46,6 +46,10 @@ class HexagonTargetMachine : public LLVMTargetMachine {
   MachineFunctionInfo *
   createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F,
                             const TargetSubtargetInfo *STI) const override;
+
+  bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+    return true;
+  }
 };
 
 } // end namespace llvm
diff --git a/llvm/test/CodeGen/Hexagon/addrspacecast-crash.ll b/llvm/test/CodeGen/Hexagon/addrspacecast-crash.ll
new file mode 100644
index 00000000000000..ce197bdeea1f38
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/addrspacecast-crash.ll
@@ -0,0 +1,12 @@
+; Tests if addrspacecast is handled in Hexagon backend
+
+; REQUIRES: asserts
+
+; RUN: llc -march=hexagon %s -o /dev/null
+
+define double @f(ptr %G, ptr %x) {
+BB:
+  %Castaddrspacecast = addrspacecast ptr %x to ptr addrspace(1)
+  store ptr addrspace(1) %Castaddrspacecast, ptr %G, align 8
+  ret double 0.000000e+00
+}
diff --git a/llvm/test/CodeGen/Hexagon/hvx-concat-lower.ll b/llvm/test/CodeGen/Hexagon/hvx-concat-lower.ll
new file mode 100644
index 00000000000000..2037493972fc76
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/hvx-concat-lower.ll
@@ -0,0 +1,135 @@
+; During lowering of HVX instruction for 64B vector, the rotation
+; direction for VROR (as part of concat of vectors lowering) is fixed.
+
+; RUN: llc -march=hexagon -O2 %s -o - | FileCheck %s
+
+; CHECK: vec.epilog.ph
+; CHECK: r{{.*}} = {{.*}}#48
+; CHECK: vec.epilog.vector.body
+
+
+%struct.str = type { i8, i8, i8 }
+
+define dso_local void @foo(i16* nocapture noundef writeonly %pOut, i16* nocapture noundef readonly %Coefs, %struct.str* nocapture noundef readonly %pQ, i32 noundef %Intra) local_unnamed_addr #0 {
+entry:
+  %Coefs13 = ptrtoint i16* %Coefs to i32
+  %pOut12 = ptrtoint i16* %pOut to i32
+  %cmp10 = icmp slt i32 %Intra, 16
+  br i1 %cmp10, label %iter.check, label %for.end
+
+iter.check:                                       ; preds = %entry
+  %Q = getelementptr inbounds %struct.str, %struct.str* %pQ, i32 0, i32 0
+  %0 = load i8, i8* %Q, align 1
+  %conv3 = zext i8 %0 to i32
+  %1 = sub nsw i32 0, %conv3
+  %2 = sub i32 16, %Intra
+  %min.iters.check = icmp ult i32 %2, 8
+  br i1 %min.iters.check, label %for.body.preheader, label %vector.memcheck
+
+vector.memcheck:                                  ; preds = %iter.check
+  %3 = shl i32 %Intra, 1
+  %4 = add i32 %3, %pOut12
+  %5 = add i32 %3, %Coefs13
+  %6 = sub i32 %4, %5
+  %diff.check = icmp ult i32 %6, 128
+  br i1 %diff.check, label %for.body.preheader, label %vector.main.loop.iter.check
+
+vector.main.loop.iter.check:                      ; preds = %vector.memcheck
+  %min.iters.check14 = icmp ult i32 %2, 64
+  br i1 %min.iters.check14, label %vec.epilog.ph, label %vector.ph
+
+vector.ph:                                        ; preds = %vector.main.loop.iter.check
+  %n.vec = and i32 %2, -64
+  %bd.spinsert = insertelement <32 x i32> poison, i32 %1, i64 0
+  %bd.sp = shufflevector <32 x i32> %bd.spinsert, <32 x i32> poison, <32 x i32> zeroinitializer
+  %bd.spinsert16 = insertelement <32 x i32> poison, i32 %conv3, i64 0
+  %bd.sp17 = shufflevector <32 x i32> %bd.spinsert16, <32 x i32> poison, <32 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %offset.idx = add i32 %index, %Intra
+  %7 = getelementptr inbounds i16, i16* %Coefs, i32 %offset.idx
+  %8 = bitcast i16* %7 to <32 x i16>*
+  %wide.load = load <32 x i16>, <32 x i16>* %8, align 2
+  %9 = getelementptr inbounds i16, i16* %7, i32 32
+  %10 = bitcast i16* %9 to <32 x i16>*
+  %wide.load15 = load <32 x i16>, <32 x i16>* %10, align 2
+  %11 = icmp slt <32 x i16> %wide.load, zeroinitializer
+  %12 = icmp slt <32 x i16> %wide.load15, zeroinitializer
+  %13 = select <32 x i1> %11, <32 x i32> %bd.sp, <32 x i32> %bd.sp17
+  %14 = select <32 x i1> %12, <32 x i32> %bd.sp, <32 x i32> %bd.sp17
+  %15 = trunc <32 x i32> %13 to <32 x i16>
+  %16 = trunc <32 x i32> %14 to <32 x i16>
+  %17 = getelementptr inbounds i16, i16* %pOut, i32 %offset.idx
+  %18 = bitcast i16* %17 to <32 x i16>*
+  store <32 x i16> %15, <32 x i16>* %18, align 2
+  %19 = getelementptr inbounds i16, i16* %17, i32 32
+  %20 = bitcast i16* %19 to <32 x i16>*
+  store <32 x i16> %16, <32 x i16>* %20, align 2
+  %index.next = add nuw i32 %index, 64
+  %21 = icmp eq i32 %index.next, %n.vec
+  br i1 %21, label %middle.block, label %vector.body
+
+middle.block:                                     ; preds = %vector.body
+  %cmp.n = icmp eq i32 %2, %n.vec
+  br i1 %cmp.n, label %for.end, label %vec.epilog.iter.check
+
+vec.epilog.iter.check:                            ; preds = %middle.block
+  %ind.end24 = add i32 %n.vec, %Intra
+  %n.vec.remaining = and i32 %2, 56
+  %min.epilog.iters.check = icmp eq i32 %n.vec.remaining, 0
+  br i1 %min.epilog.iters.check, label %for.body.preheader, label %vec.epilog.ph
+
+vec.epilog.ph:                                    ; preds = %vector.main.loop.iter.check, %vec.epilog.iter.check
+  %vec.epilog.resume.val = phi i32 [ %n.vec, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ]
+  %n.vec23 = and i32 %2, -8
+  %ind.end = add i32 %n.vec23, %Intra
+  %bd.spinsert29 = insertelement <8 x i32> poison, i32 %1, i64 0
+  %bd.sp30 = shufflevector <8 x i32> %bd.spinsert29, <8 x i32> poison, <8 x i32> zeroinitializer
+  %bd.spinsert31 = insertelement <8 x i32> poison, i32 %conv3, i64 0
+  %bd.sp32 = shufflevector <8 x i32> %bd.spinsert31, <8 x i32> poison, <8 x i32> zeroinitializer
+  br label %vec.epilog.vector.body
+
+vec.epilog.vector.body:                           ; preds = %vec.epilog.vector.body, %vec.epilog.ph
+  %index26 = phi i32 [ %vec.epilog.resume.val, %vec.epilog.ph ], [ %index.next33, %vec.epilog.vector.body ]
+  %offset.idx27 = add i32 %index26, %Intra
+  %22 = getelementptr inbounds i16, i16* %Coefs, i32 %offset.idx27
+  %23 = bitcast i16* %22 to <8 x i16>*
+  %wide.load28 = load <8 x i16>, <8 x i16>* %23, align 2
+  %24 = icmp slt <8 x i16> %wide.load28, zeroinitializer
+  %25 = select <8 x i1> %24, <8 x i32> %bd.sp30, <8 x i32> %bd.sp32
+  %26 = trunc <8 x i32> %25 to <8 x i16>
+  %27 = getelementptr inbounds i16, i16* %pOut, i32 %offset.idx27
+  %28 = bitcast i16* %27 to <8 x i16>*
+  store <8 x i16> %26, <8 x i16>* %28, align 2
+  %index.next33 = add nuw i32 %index26, 8
+  %29 = icmp eq i32 %index.next33, %n.vec23
+  br i1 %29, label %vec.epilog.middle.block, label %vec.epilog.vector.body
+
+vec.epilog.middle.block:                          ; preds = %vec.epilog.vector.body
+  %cmp.n25 = icmp eq i32 %2, %n.vec23
+  br i1 %cmp.n25, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %vector.memcheck, %iter.check, %vec.epilog.iter.check, %vec.epilog.middle.block
+  %i.011.ph = phi i32 [ %Intra, %iter.check ], [ %Intra, %vector.memcheck ], [ %ind.end24, %vec.epilog.iter.check ], [ %ind.end, %vec.epilog.middle.block ]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.011 = phi i32 [ %inc, %for.body ], [ %i.011.ph, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i16, i16* %Coefs, i32 %i.011
+  %30 = load i16, i16* %arrayidx, align 2
+  %cmp1 = icmp slt i16 %30, 0
+  %31 = select i1 %cmp1, i32 %1, i32 %conv3
+  %conv4 = trunc i32 %31 to i16
+  %arrayidx5 = getelementptr inbounds i16, i16* %pOut, i32 %i.011
+  store i16 %conv4, i16* %arrayidx5, align 2
+  %inc = add i32 %i.011, 1
+  %exitcond.not = icmp eq i32 %inc, 16
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %middle.block, %vec.epilog.middle.block, %entry
+  ret void
+}
+
+attributes #0 = { argmemonly nofree norecurse nosync nounwind "target-cpu"="hexagonv66" "target-features"="+hvx-length64b,+hvxv66,+v66" }
diff --git a/llvm/test/CodeGen/Hexagon/isel/isel-tfrrp.ll b/llvm/test/CodeGen/Hexagon/isel/isel-tfrrp.ll
new file mode 100644
index 00000000000000..b2a9f732bdddc7
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/isel/isel-tfrrp.ll
@@ -0,0 +1,15 @@
+; Check if a C2_tfrrp instruction with constant i32 0 input is generated
+; The constant 0 is generated by a transfer immediate instruction.
+
+; RUN: llc -march=hexagon -debug-only=isel 2>&1 < %s - | FileCheck %s
+
+; CHECK: [[R0:%[0-9]+]]:intregs = A2_tfrsi 0
+; CHECK-NEXT: predregs = C2_tfrrp killed [[R0]]:intregs
+
+define void @test_false(i1 %0) {
+  %2 = insertelement <1024 x i1> zeroinitializer, i1 %0, i64 0
+  tail call void @llvm.masked.store.v1024f32.p0(<1024 x float> zeroinitializer, ptr null, i32 1, <1024 x i1> %2)
+  ret void
+}
+
+declare void @llvm.masked.store.v1024f32.p0(<1024 x float>, ptr nocapture, i32 immarg, <1024 x i1>)

@quic-santdas quic-santdas deleted the patv8i1 branch December 20, 2024 08:20
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants