-
Notifications
You must be signed in to change notification settings - Fork 15.3k
Patv8i1 #120695
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Patv8i1 #120695
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
When concatenation of vector instructions is formed, as a part of it vector rotation is performed. The direction of the shift was not correctly calculated. This fixes the rotation factor.
Removed some extra function attributes which were not needed.
Change-Id: I04898ffa9d75a8177b9aa13e0836988e03eb26c9
HexagonISD::PFALSE and PTRUE patterns do not form independently in general as they are treated like operands of all 0s or all 1s. Eg: i32 = transfer HEXAGONISD::PFALSE. In this case, v8i1 = HEXAGONISD::PFALSE is formed independently without accompanying opcode. This patch adds a pattern to transfer all 0s or all 1s to a scalar register and then use that register and this PFALSE/PTRUE opcode to transfer to a predicate register like v8i1.
Member
|
@llvm/pr-subscribers-backend-hexagon Author: Santanu Das (quic-santdas) ChangesFull diff: https://github.com/llvm/llvm-project/pull/120695.diff 6 Files Affected:
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 81035849491bcb..39b8c829a0b217 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -1748,7 +1748,7 @@ HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
- SDValue S = DAG.getConstant(InpLen*BitBytes, dl, MVT::i32);
+ SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
SDValue Res = getZero(dl, ByteTy, DAG);
for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index baa552fcd220d8..e54f97eb92e57d 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -108,6 +108,9 @@ def ptrue: PatFrag<(ops), (HexagonPTRUE)>;
def pfalse: PatFrag<(ops), (HexagonPFALSE)>;
def pnot: PatFrag<(ops node:$Pu), (xor node:$Pu, ptrue)>;
+def: Pat<(v8i1 (HexagonPFALSE)), (C2_tfrrp (A2_tfrsi (i32 0)))>;
+def: Pat<(v8i1 (HexagonPTRUE)), (C2_tfrrp (A2_tfrsi (i32 -1)))>;
+
def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru),
(HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>;
def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>;
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
index 6e9a78b7665042..84edbe6fd870b3 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -46,6 +46,10 @@ class HexagonTargetMachine : public LLVMTargetMachine {
MachineFunctionInfo *
createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F,
const TargetSubtargetInfo *STI) const override;
+
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+ return true;
+ }
};
} // end namespace llvm
diff --git a/llvm/test/CodeGen/Hexagon/addrspacecast-crash.ll b/llvm/test/CodeGen/Hexagon/addrspacecast-crash.ll
new file mode 100644
index 00000000000000..ce197bdeea1f38
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/addrspacecast-crash.ll
@@ -0,0 +1,12 @@
+; Tests if addrspacecast is handled in Hexagon backend
+
+; REQUIRES: asserts
+
+; RUN: llc -march=hexagon %s -o /dev/null
+
+define double @f(ptr %G, ptr %x) {
+BB:
+ %Castaddrspacecast = addrspacecast ptr %x to ptr addrspace(1)
+ store ptr addrspace(1) %Castaddrspacecast, ptr %G, align 8
+ ret double 0.000000e+00
+}
diff --git a/llvm/test/CodeGen/Hexagon/hvx-concat-lower.ll b/llvm/test/CodeGen/Hexagon/hvx-concat-lower.ll
new file mode 100644
index 00000000000000..2037493972fc76
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/hvx-concat-lower.ll
@@ -0,0 +1,135 @@
+; During lowering of HVX instruction for 64B vector, the rotation
+; direction for VROR (as part of concat of vectors lowering) is fixed.
+
+; RUN: llc -march=hexagon -O2 %s -o - | FileCheck %s
+
+; CHECK: vec.epilog.ph
+; CHECK: r{{.*}} = {{.*}}#48
+; CHECK: vec.epilog.vector.body
+
+
+%struct.str = type { i8, i8, i8 }
+
+define dso_local void @foo(i16* nocapture noundef writeonly %pOut, i16* nocapture noundef readonly %Coefs, %struct.str* nocapture noundef readonly %pQ, i32 noundef %Intra) local_unnamed_addr #0 {
+entry:
+ %Coefs13 = ptrtoint i16* %Coefs to i32
+ %pOut12 = ptrtoint i16* %pOut to i32
+ %cmp10 = icmp slt i32 %Intra, 16
+ br i1 %cmp10, label %iter.check, label %for.end
+
+iter.check: ; preds = %entry
+ %Q = getelementptr inbounds %struct.str, %struct.str* %pQ, i32 0, i32 0
+ %0 = load i8, i8* %Q, align 1
+ %conv3 = zext i8 %0 to i32
+ %1 = sub nsw i32 0, %conv3
+ %2 = sub i32 16, %Intra
+ %min.iters.check = icmp ult i32 %2, 8
+ br i1 %min.iters.check, label %for.body.preheader, label %vector.memcheck
+
+vector.memcheck: ; preds = %iter.check
+ %3 = shl i32 %Intra, 1
+ %4 = add i32 %3, %pOut12
+ %5 = add i32 %3, %Coefs13
+ %6 = sub i32 %4, %5
+ %diff.check = icmp ult i32 %6, 128
+ br i1 %diff.check, label %for.body.preheader, label %vector.main.loop.iter.check
+
+vector.main.loop.iter.check: ; preds = %vector.memcheck
+ %min.iters.check14 = icmp ult i32 %2, 64
+ br i1 %min.iters.check14, label %vec.epilog.ph, label %vector.ph
+
+vector.ph: ; preds = %vector.main.loop.iter.check
+ %n.vec = and i32 %2, -64
+ %bd.spinsert = insertelement <32 x i32> poison, i32 %1, i64 0
+ %bd.sp = shufflevector <32 x i32> %bd.spinsert, <32 x i32> poison, <32 x i32> zeroinitializer
+ %bd.spinsert16 = insertelement <32 x i32> poison, i32 %conv3, i64 0
+ %bd.sp17 = shufflevector <32 x i32> %bd.spinsert16, <32 x i32> poison, <32 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %vector.ph
+ %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+ %offset.idx = add i32 %index, %Intra
+ %7 = getelementptr inbounds i16, i16* %Coefs, i32 %offset.idx
+ %8 = bitcast i16* %7 to <32 x i16>*
+ %wide.load = load <32 x i16>, <32 x i16>* %8, align 2
+ %9 = getelementptr inbounds i16, i16* %7, i32 32
+ %10 = bitcast i16* %9 to <32 x i16>*
+ %wide.load15 = load <32 x i16>, <32 x i16>* %10, align 2
+ %11 = icmp slt <32 x i16> %wide.load, zeroinitializer
+ %12 = icmp slt <32 x i16> %wide.load15, zeroinitializer
+ %13 = select <32 x i1> %11, <32 x i32> %bd.sp, <32 x i32> %bd.sp17
+ %14 = select <32 x i1> %12, <32 x i32> %bd.sp, <32 x i32> %bd.sp17
+ %15 = trunc <32 x i32> %13 to <32 x i16>
+ %16 = trunc <32 x i32> %14 to <32 x i16>
+ %17 = getelementptr inbounds i16, i16* %pOut, i32 %offset.idx
+ %18 = bitcast i16* %17 to <32 x i16>*
+ store <32 x i16> %15, <32 x i16>* %18, align 2
+ %19 = getelementptr inbounds i16, i16* %17, i32 32
+ %20 = bitcast i16* %19 to <32 x i16>*
+ store <32 x i16> %16, <32 x i16>* %20, align 2
+ %index.next = add nuw i32 %index, 64
+ %21 = icmp eq i32 %index.next, %n.vec
+ br i1 %21, label %middle.block, label %vector.body
+
+middle.block: ; preds = %vector.body
+ %cmp.n = icmp eq i32 %2, %n.vec
+ br i1 %cmp.n, label %for.end, label %vec.epilog.iter.check
+
+vec.epilog.iter.check: ; preds = %middle.block
+ %ind.end24 = add i32 %n.vec, %Intra
+ %n.vec.remaining = and i32 %2, 56
+ %min.epilog.iters.check = icmp eq i32 %n.vec.remaining, 0
+ br i1 %min.epilog.iters.check, label %for.body.preheader, label %vec.epilog.ph
+
+vec.epilog.ph: ; preds = %vector.main.loop.iter.check, %vec.epilog.iter.check
+ %vec.epilog.resume.val = phi i32 [ %n.vec, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ]
+ %n.vec23 = and i32 %2, -8
+ %ind.end = add i32 %n.vec23, %Intra
+ %bd.spinsert29 = insertelement <8 x i32> poison, i32 %1, i64 0
+ %bd.sp30 = shufflevector <8 x i32> %bd.spinsert29, <8 x i32> poison, <8 x i32> zeroinitializer
+ %bd.spinsert31 = insertelement <8 x i32> poison, i32 %conv3, i64 0
+ %bd.sp32 = shufflevector <8 x i32> %bd.spinsert31, <8 x i32> poison, <8 x i32> zeroinitializer
+ br label %vec.epilog.vector.body
+
+vec.epilog.vector.body: ; preds = %vec.epilog.vector.body, %vec.epilog.ph
+ %index26 = phi i32 [ %vec.epilog.resume.val, %vec.epilog.ph ], [ %index.next33, %vec.epilog.vector.body ]
+ %offset.idx27 = add i32 %index26, %Intra
+ %22 = getelementptr inbounds i16, i16* %Coefs, i32 %offset.idx27
+ %23 = bitcast i16* %22 to <8 x i16>*
+ %wide.load28 = load <8 x i16>, <8 x i16>* %23, align 2
+ %24 = icmp slt <8 x i16> %wide.load28, zeroinitializer
+ %25 = select <8 x i1> %24, <8 x i32> %bd.sp30, <8 x i32> %bd.sp32
+ %26 = trunc <8 x i32> %25 to <8 x i16>
+ %27 = getelementptr inbounds i16, i16* %pOut, i32 %offset.idx27
+ %28 = bitcast i16* %27 to <8 x i16>*
+ store <8 x i16> %26, <8 x i16>* %28, align 2
+ %index.next33 = add nuw i32 %index26, 8
+ %29 = icmp eq i32 %index.next33, %n.vec23
+ br i1 %29, label %vec.epilog.middle.block, label %vec.epilog.vector.body
+
+vec.epilog.middle.block: ; preds = %vec.epilog.vector.body
+ %cmp.n25 = icmp eq i32 %2, %n.vec23
+ br i1 %cmp.n25, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %vector.memcheck, %iter.check, %vec.epilog.iter.check, %vec.epilog.middle.block
+ %i.011.ph = phi i32 [ %Intra, %iter.check ], [ %Intra, %vector.memcheck ], [ %ind.end24, %vec.epilog.iter.check ], [ %ind.end, %vec.epilog.middle.block ]
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.011 = phi i32 [ %inc, %for.body ], [ %i.011.ph, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i16, i16* %Coefs, i32 %i.011
+ %30 = load i16, i16* %arrayidx, align 2
+ %cmp1 = icmp slt i16 %30, 0
+ %31 = select i1 %cmp1, i32 %1, i32 %conv3
+ %conv4 = trunc i32 %31 to i16
+ %arrayidx5 = getelementptr inbounds i16, i16* %pOut, i32 %i.011
+ store i16 %conv4, i16* %arrayidx5, align 2
+ %inc = add i32 %i.011, 1
+ %exitcond.not = icmp eq i32 %inc, 16
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %middle.block, %vec.epilog.middle.block, %entry
+ ret void
+}
+
+attributes #0 = { argmemonly nofree norecurse nosync nounwind "target-cpu"="hexagonv66" "target-features"="+hvx-length64b,+hvxv66,+v66" }
diff --git a/llvm/test/CodeGen/Hexagon/isel/isel-tfrrp.ll b/llvm/test/CodeGen/Hexagon/isel/isel-tfrrp.ll
new file mode 100644
index 00000000000000..b2a9f732bdddc7
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/isel/isel-tfrrp.ll
@@ -0,0 +1,15 @@
+; Check if a C2_tfrrp instruction with constant i32 0 input is generated
+; The constant 0 is generated by a transfer immediate instruction.
+
+; RUN: llc -march=hexagon -debug-only=isel 2>&1 < %s - | FileCheck %s
+
+; CHECK: [[R0:%[0-9]+]]:intregs = A2_tfrsi 0
+; CHECK-NEXT: predregs = C2_tfrrp killed [[R0]]:intregs
+
+define void @test_false(i1 %0) {
+ %2 = insertelement <1024 x i1> zeroinitializer, i1 %0, i64 0
+ tail call void @llvm.masked.store.v1024f32.p0(<1024 x float> zeroinitializer, ptr null, i32 1, <1024 x i1> %2)
+ ret void
+}
+
+declare void @llvm.masked.store.v1024f32.p0(<1024 x float>, ptr nocapture, i32 immarg, <1024 x i1>)
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.