From 84727b62e626877c90b690c73155e5990b9b5c66 Mon Sep 17 00:00:00 2001 From: quic-santdas Date: Sun, 22 Sep 2024 09:45:14 -0700 Subject: [PATCH] [Hexagon] Add missing pattern for v8i1 type HexagonISD::PFALSE and PTRUE patterns do not form independently in general as they are treated like operands of all 0s or all 1s. Eg: i32 = transfer HEXAGONISD::PFALSE. In this case, v8i1 = HEXAGONISD::PFALSE is formed independently without accompanying opcode. This patch adds a pattern to transfer all 0s or all 1s to a scalar register and then use that register and this PFALSE/PTRUE opcode to transfer to a predicate register like v8i1. --- llvm/lib/Target/Hexagon/HexagonPatterns.td | 3 +++ llvm/test/CodeGen/Hexagon/isel/isel-tfrrp.ll | 15 +++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/isel/isel-tfrrp.ll diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 9bd45c72b7d4d..cba5ff1ab0d9b 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -108,6 +108,9 @@ def ptrue: PatFrag<(ops), (HexagonPTRUE)>; def pfalse: PatFrag<(ops), (HexagonPFALSE)>; def pnot: PatFrag<(ops node:$Pu), (xor node:$Pu, ptrue)>; +def: Pat<(v8i1 (HexagonPFALSE)), (C2_tfrrp (A2_tfrsi (i32 0)))>; +def: Pat<(v8i1 (HexagonPTRUE)), (C2_tfrrp (A2_tfrsi (i32 -1)))>; + def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru), (HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>; def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>; diff --git a/llvm/test/CodeGen/Hexagon/isel/isel-tfrrp.ll b/llvm/test/CodeGen/Hexagon/isel/isel-tfrrp.ll new file mode 100644 index 0000000000000..b2a9f732bdddc --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/isel/isel-tfrrp.ll @@ -0,0 +1,15 @@ +; Check if a C2_tfrrp instruction with constant i32 0 input is generated +; The constant 0 is generated by a transfer immediate instruction. + +; RUN: llc -march=hexagon -debug-only=isel 2>&1 < %s - | FileCheck %s + +; CHECK: [[R0:%[0-9]+]]:intregs = A2_tfrsi 0 +; CHECK-NEXT: predregs = C2_tfrrp killed [[R0]]:intregs + +define void @test_false(i1 %0) { + %2 = insertelement <1024 x i1> zeroinitializer, i1 %0, i64 0 + tail call void @llvm.masked.store.v1024f32.p0(<1024 x float> zeroinitializer, ptr null, i32 1, <1024 x i1> %2) + ret void +} + +declare void @llvm.masked.store.v1024f32.p0(<1024 x float>, ptr nocapture, i32 immarg, <1024 x i1>)