From 085f68546325db92d17396891a1a5488350ed7a1 Mon Sep 17 00:00:00 2001 From: James Newling Date: Thu, 19 Jun 2025 23:07:06 -0700 Subject: [PATCH 1/2] [AIE2p] Use tablegen pattern for trunc where dst is 1/4 bitsize --- .../Target/AIE/aie2p/AIE2PInstrPatterns.td | 45 +++++++++++++---- .../Target/AIE/aie2p/AIE2PLegalizerInfo.cpp | 21 ++++++-- .../aie2p/GlobalIsel/inst-select-trunc.mir | 48 +++++++++++++++++++ 3 files changed, 100 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td b/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td index 77c3d23f6f5b..dfdd3e293a82 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td @@ -15,6 +15,7 @@ include "AIEBaseInstrPatterns.td" + // Placeholder for a bare frameindex. This pseudo represents the // pointer register to be allocated, initialized with the address // represented by the frameindex in its only operand. @@ -1257,26 +1258,52 @@ defm : Extract_512 : + +// | 1024-bits -> 512-bits | 512-bits -> 256-bits | 1024-bits -> 256-bits | +// +=======================+=======================+=======================| +// | v16i64 -> v16i32 | v8i32 -> v8i16 | v16i64 -> v16i16 | +// | v32i32 -> v32i16 | v16i16 -> v16i8 | v32i32 -> v32i8 | +// | v64i16 -> v64i8 | v32i8 -> v32i4 | | +// +=======================+=======================+=======================| +class Trunc1024To512Pat : Pat<(DstTy (trunc SrcTy:$s1)), (ShuffleInstOpc (EXTRACT_SUBREG VEC1024:$s1, sub_512_lo ), (EXTRACT_SUBREG VEC1024:$s1, sub_512_hi ), (MOV_RLC_imm11_pseudo (i32 Mode)))>; -def : Trunc1024Pat; -def : Trunc1024Pat; -def : Trunc1024Pat; -def : Trunc1024Pat; +def : Trunc1024To512Pat; +def : Trunc1024To512Pat; +def : Trunc1024To512Pat; +def : Trunc1024To512Pat; -class Trunc512Pat : +class Trunc512To256Pat : Pat<(DstTy (trunc SrcTy:$s1)), (EXTRACT_SUBREG (VSHUFFLE_vec_shuffle_x VEC512:$s1, VEC512:$s1, (MOV_RLC_imm11_pseudo (i32 Mode))), sub_256_lo)>; -def : Trunc512Pat; -def : Trunc512Pat; -def : Trunc512Pat; +def : Trunc512To256Pat; +def : Trunc512To256Pat; +def : Trunc512To256Pat; + +// This is effectively Trunc1024To512 followed by Trunc512To256. +class Trunc1024To256Pat : + Pat<(DstTy (trunc SrcTy:$s1)), + (EXTRACT_SUBREG + (VSHUFFLE_vec_shuffle_x + (VSHUFFLE_vec_shuffle_x + (EXTRACT_SUBREG VEC1024:$s1, sub_512_lo), + (EXTRACT_SUBREG VEC1024:$s1, sub_512_hi), + (MOV_RLC_imm11_pseudo (i32 LargeMode))), + (VSHUFFLE_vec_shuffle_x + (EXTRACT_SUBREG VEC1024:$s1, sub_512_lo), + (EXTRACT_SUBREG VEC1024:$s1, sub_512_hi), + (MOV_RLC_imm11_pseudo (i32 LargeMode))), + (MOV_RLC_imm11_pseudo (i32 SmallMode))), + sub_256_lo)>; + +def : Trunc1024To256Pat; +def : Trunc1024To256Pat; class EventPat : Pat<(int_aie2p_event Imm), (Inst)>; diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp index 5ea47376c667..9e0a55fd87c5 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp @@ -281,13 +281,24 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST) getActionDefinitionsBuilder(G_TRUNC) .legalIf([=](const LegalityQuery &Query) { + // Return true if there is a tablegen pattern to lower truncs on vectors + // of specific element types and lengths to shuffles. const LLT &SrcTy = Query.Types[1]; const LLT &DstTy = Query.Types[0]; - return SrcTy.isVector() && DstTy.isVector() && - (SrcTy.getSizeInBits() == 512 || - SrcTy.getSizeInBits() == 1024) && - DstTy.getElementType().getSizeInBits() * 2 == - SrcTy.getElementType().getSizeInBits(); + + if (!SrcTy.isVector() || !DstTy.isVector()) + return false; + + const auto SrcElmBits = SrcTy.getElementType().getSizeInBits(); + if (SrcElmBits != 64 && SrcElmBits != 32 && SrcElmBits != 16) + return false; + + const TypeSize SrcBits = SrcTy.getSizeInBits(); + const TypeSize DstBits = DstTy.getSizeInBits(); + + return ((SrcBits == 1024 && DstBits == 256) || + (SrcBits == 1024 && DstBits == 512) || + (SrcBits == 512 && DstBits == 256)); }) .legalIf([=](const LegalityQuery &Query) { const LLT &SrcTy = Query.Types[1]; diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-trunc.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-trunc.mir index 67a709e48189..d0d68b677425 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-trunc.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-trunc.mir @@ -184,4 +184,52 @@ body: | %0:vregbank(<32 x s8>) = G_TRUNC %1(<32 x s16>) PseudoRET implicit $lr, implicit %0 ... +--- +name: v32s8_trunc_v32s32_vec1024 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + ; CHECK-LABEL: name: v32s8_trunc_v32s32_vec1024 + ; CHECK: [[DEF:%[0-9]+]]:vec1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 0 + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo1:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:exo = COPY [[DEF]].sub_512_hi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:exe = COPY [[DEF]].sub_512_lo + ; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x:%[0-9]+]]:vec512 = VSHUFFLE_vec_shuffle_x [[COPY1]], [[COPY]], [[MOV_RLC_imm11_pseudo1]] + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo2:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:exo = COPY [[DEF]].sub_512_hi + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:exe = COPY [[DEF]].sub_512_lo + ; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x1:%[0-9]+]]:vec512 = VSHUFFLE_vec_shuffle_x [[COPY3]], [[COPY2]], [[MOV_RLC_imm11_pseudo2]] + ; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x2:%[0-9]+]]:mshflxdst = VSHUFFLE_vec_shuffle_x [[VSHUFFLE_vec_shuffle_x1]], [[VSHUFFLE_vec_shuffle_x]], [[MOV_RLC_imm11_pseudo]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ewl = COPY [[VSHUFFLE_vec_shuffle_x2]].sub_256_lo + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY4]] + %1:vregbank(<32 x s32>) = G_IMPLICIT_DEF + %0:vregbank(<32 x s8>) = G_TRUNC %1(<32 x s32>) + PseudoRET implicit $lr, implicit %0 +... +--- +name: v16s16_trunc_v16s64_vec1024 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + ; CHECK-LABEL: name: v16s16_trunc_v16s64_vec1024 + ; CHECK: [[DEF:%[0-9]+]]:vec1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo1:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:exo = COPY [[DEF]].sub_512_hi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:exe = COPY [[DEF]].sub_512_lo + ; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x:%[0-9]+]]:vec512 = VSHUFFLE_vec_shuffle_x [[COPY1]], [[COPY]], [[MOV_RLC_imm11_pseudo1]] + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo2:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:exo = COPY [[DEF]].sub_512_hi + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:exe = COPY [[DEF]].sub_512_lo + ; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x1:%[0-9]+]]:vec512 = VSHUFFLE_vec_shuffle_x [[COPY3]], [[COPY2]], [[MOV_RLC_imm11_pseudo2]] + ; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x2:%[0-9]+]]:mshflxdst = VSHUFFLE_vec_shuffle_x [[VSHUFFLE_vec_shuffle_x1]], [[VSHUFFLE_vec_shuffle_x]], [[MOV_RLC_imm11_pseudo]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ewl = COPY [[VSHUFFLE_vec_shuffle_x2]].sub_256_lo + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY4]] + %1:vregbank(<16 x s64>) = G_IMPLICIT_DEF + %0:vregbank(<16 x s16>) = G_TRUNC %1(<16 x s64>) + PseudoRET implicit $lr, implicit %0 +... From a815f2b27637f11fc5a9994a13078d48d3f667ac Mon Sep 17 00:00:00 2001 From: James Newling Date: Thu, 26 Jun 2025 18:19:37 -0700 Subject: [PATCH 2/2] [AIE2p] Use shuffle mode 36 (trunc 512->128) and legalize trunc 1024->256 by splitting in two --- .../Target/AIE/aie2p/AIE2PInstrPatterns.td | 48 ++++++------- .../Target/AIE/aie2p/AIE2PLegalizerInfo.cpp | 67 ++++++++++++++----- .../aie2p/GlobalIsel/inst-select-trunc.mir | 55 ++++++--------- .../AIE/aie2p/GlobalIsel/legalize-trunc.mir | 54 +++++++++++++++ 4 files changed, 148 insertions(+), 76 deletions(-) diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td b/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td index dfdd3e293a82..dff5b5b45246 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td @@ -15,7 +15,6 @@ include "AIEBaseInstrPatterns.td" - // Placeholder for a bare frameindex. This pseudo represents the // pointer register to be allocated, initialized with the address // represented by the frameindex in its only operand. @@ -1257,14 +1256,19 @@ defm : Extract_512 512-bits | 512-bits -> 256-bits | 1024-bits -> 256-bits | -// +=======================+=======================+=======================| -// | v16i64 -> v16i32 | v8i32 -> v8i16 | v16i64 -> v16i16 | -// | v32i32 -> v32i16 | v16i16 -> v16i8 | v32i32 -> v32i8 | -// | v64i16 -> v64i8 | v32i8 -> v32i4 | | -// +=======================+=======================+=======================| +// vectors of the VSHUFFLE instruction. Specifically, the VSHUFFLE instruction +// with mode 0 performs a transpose on a tensor of type 64x2xi8. Other modes +// used here perform transposes on different types: + +// Mode Input type +// =====+============+ +// 0 | 64x2xi8 | +// 2 | 32x2xi16 | +// 4 | 16x2xi32 | +// 28 | 8x4xi16 | +// 36 | 16x4xi8 | +// =====+============+ + class Trunc1024To512Pat : Pat<(DstTy (trunc SrcTy:$s1)), (ShuffleInstOpc @@ -1286,24 +1290,16 @@ def : Trunc512To256Pat; def : Trunc512To256Pat; def : Trunc512To256Pat; -// This is effectively Trunc1024To512 followed by Trunc512To256. -class Trunc1024To256Pat : +class Trunc512To128Pat : Pat<(DstTy (trunc SrcTy:$s1)), - (EXTRACT_SUBREG - (VSHUFFLE_vec_shuffle_x - (VSHUFFLE_vec_shuffle_x - (EXTRACT_SUBREG VEC1024:$s1, sub_512_lo), - (EXTRACT_SUBREG VEC1024:$s1, sub_512_hi), - (MOV_RLC_imm11_pseudo (i32 LargeMode))), - (VSHUFFLE_vec_shuffle_x - (EXTRACT_SUBREG VEC1024:$s1, sub_512_lo), - (EXTRACT_SUBREG VEC1024:$s1, sub_512_hi), - (MOV_RLC_imm11_pseudo (i32 LargeMode))), - (MOV_RLC_imm11_pseudo (i32 SmallMode))), - sub_256_lo)>; - -def : Trunc1024To256Pat; -def : Trunc1024To256Pat; + (VMOV_alu_mv_mv_w_to_q + (EXTRACT_SUBREG + (VSHUFFLE_vec_shuffle_x VEC512:$s1, VEC512:$s1, + (MOV_RLC_imm11_pseudo (i32 Mode))), + sub_256_lo) + )>; +def : Trunc512To128Pat; +def : Trunc512To128Pat; class EventPat : Pat<(int_aie2p_event Imm), (Inst)>; diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp index 9e0a55fd87c5..2dc05d7abdd9 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp @@ -280,31 +280,60 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST) .clampScalar(1, S32, S32); getActionDefinitionsBuilder(G_TRUNC) + // Mark as legal all G_TRUNC with tablegen selection pattern: .legalIf([=](const LegalityQuery &Query) { - // Return true if there is a tablegen pattern to lower truncs on vectors - // of specific element types and lengths to shuffles. const LLT &SrcTy = Query.Types[1]; const LLT &DstTy = Query.Types[0]; - if (!SrcTy.isVector() || !DstTy.isVector()) + if (!SrcTy.isVector()) return false; - - const auto SrcElmBits = SrcTy.getElementType().getSizeInBits(); - if (SrcElmBits != 64 && SrcElmBits != 32 && SrcElmBits != 16) - return false; - - const TypeSize SrcBits = SrcTy.getSizeInBits(); - const TypeSize DstBits = DstTy.getSizeInBits(); - - return ((SrcBits == 1024 && DstBits == 256) || - (SrcBits == 1024 && DstBits == 512) || - (SrcBits == 512 && DstBits == 256)); + assert(DstTy.isVector() && "Src is vector so Dst must be vector"); + + const uint16_t SrcElemBits = SrcTy.getElementType().getSizeInBits(); + const uint16_t DstElemBits = DstTy.getElementType().getSizeInBits(); + const uint16_t VectorSize = SrcTy.getNumElements(); + assert(VectorSize == DstTy.getNumElements() && + "Src and Dst vectors must have same number of elements"); + + // The case where the source vector's element type is i64: + // v16i64 -> v16i32, + // v8i64 -> v8i32, + // v8i64 -> v8i16, + if (SrcElemBits == 64) { + return (VectorSize == 16 && DstElemBits == 32) || + (VectorSize == 8 && DstElemBits == 32) || + (VectorSize == 8 && DstElemBits == 16); + } + + // The case where the source vector's element type is i32: + // v32i32 -> v32i16, + // v16i32 -> v16i16, + // v16i32 -> v16i8. + if (SrcElemBits == 32) { + return (VectorSize == 32 && DstElemBits == 16) || + (VectorSize == 16 && DstElemBits == 16) || + (VectorSize == 16 && DstElemBits == 8); + } + + // The case where the source vector's element type is i16: + // v64i16 -> v64i8, + // v32i16 -> v32i8. + if (SrcElemBits == 16) { + return (VectorSize == 64 && DstElemBits == 8) || + (VectorSize == 32 && DstElemBits == 8); + } + + return false; }) + + // Mark as legal all scalar G_TRUNC: .legalIf([=](const LegalityQuery &Query) { const LLT &SrcTy = Query.Types[1]; const LLT &DstTy = Query.Types[0]; return SrcTy.isScalar() && DstTy.isScalar(); }) + + // G_TRUNC 256-bit -> 128-bit is legalized by padding to 2x bitwidth: .customIf([=](const LegalityQuery &Query) { const LLT &SrcTy = Query.Types[1]; const LLT &DstTy = Query.Types[0]; @@ -312,10 +341,18 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST) DstTy.getElementType().getSizeInBits() * 2 == SrcTy.getElementType().getSizeInBits(); }) + + // G_TRUNC on 2048-bit vector is legalized to 2 smaller G_TRUNCs. + // Similarly for G_TRUNC 1024-bit -> 256-bit: .fewerElementsIf( [=](const LegalityQuery &Query) { const LLT &SrcTy = Query.Types[1]; - return SrcTy.isVector() && SrcTy.getSizeInBits() == 2048; + const LLT &DstTy = Query.Types[0]; + if (!SrcTy.isVector() || !DstTy.isVector()) + return false; + const TypeSize SrcBits = SrcTy.getSizeInBits(); + const TypeSize DstBits = DstTy.getSizeInBits(); + return (SrcBits == 2048 || (SrcBits == 1024 && DstBits == 256)); }, [=](const LegalityQuery &Query) { const LLT &SrcTy = Query.Types[1]; diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-trunc.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-trunc.mir index d0d68b677425..f0b8d0c4e463 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-trunc.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-trunc.mir @@ -185,51 +185,36 @@ body: | PseudoRET implicit $lr, implicit %0 ... --- -name: v32s8_trunc_v32s32_vec1024 +name: v16s8_trunc_v16s32_vec512 legalized: true regBankSelected: true body: | bb.1.entry: - ; CHECK-LABEL: name: v32s8_trunc_v32s32_vec1024 - ; CHECK: [[DEF:%[0-9]+]]:vec1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 0 - ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo1:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 2 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:exo = COPY [[DEF]].sub_512_hi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:exe = COPY [[DEF]].sub_512_lo - ; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x:%[0-9]+]]:vec512 = VSHUFFLE_vec_shuffle_x [[COPY1]], [[COPY]], [[MOV_RLC_imm11_pseudo1]] - ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo2:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:exo = COPY [[DEF]].sub_512_hi - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:exe = COPY [[DEF]].sub_512_lo - ; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x1:%[0-9]+]]:vec512 = VSHUFFLE_vec_shuffle_x [[COPY3]], [[COPY2]], [[MOV_RLC_imm11_pseudo2]] - ; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x2:%[0-9]+]]:mshflxdst = VSHUFFLE_vec_shuffle_x [[VSHUFFLE_vec_shuffle_x1]], [[VSHUFFLE_vec_shuffle_x]], [[MOV_RLC_imm11_pseudo]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ewl = COPY [[VSHUFFLE_vec_shuffle_x2]].sub_256_lo - ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY4]] - %1:vregbank(<32 x s32>) = G_IMPLICIT_DEF - %0:vregbank(<32 x s8>) = G_TRUNC %1(<32 x s32>) + ; CHECK-LABEL: name: v16s8_trunc_v16s32_vec512 + ; CHECK: [[DEF:%[0-9]+]]:vec512 = IMPLICIT_DEF + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 36 + ; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x:%[0-9]+]]:mshflxdst = VSHUFFLE_vec_shuffle_x [[DEF]], [[DEF]], [[MOV_RLC_imm11_pseudo]] + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ewl = COPY [[VSHUFFLE_vec_shuffle_x]].sub_256_lo + ; CHECK-NEXT: [[VMOV_alu_mv_mv_w_to_q:%[0-9]+]]:vec128 = VMOV_alu_mv_mv_w_to_q [[COPY]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VMOV_alu_mv_mv_w_to_q]] + %1:vregbank(<16 x s32>) = G_IMPLICIT_DEF + %0:vregbank(<16 x s8>) = G_TRUNC %1(<16 x s32>) PseudoRET implicit $lr, implicit %0 ... - --- -name: v16s16_trunc_v16s64_vec1024 +name: v8s16_trunc_v8s64_vec512 legalized: true regBankSelected: true body: | bb.1.entry: - ; CHECK-LABEL: name: v16s16_trunc_v16s64_vec1024 - ; CHECK: [[DEF:%[0-9]+]]:vec1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 2 - ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo1:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 4 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:exo = COPY [[DEF]].sub_512_hi - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:exe = COPY [[DEF]].sub_512_lo - ; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x:%[0-9]+]]:vec512 = VSHUFFLE_vec_shuffle_x [[COPY1]], [[COPY]], [[MOV_RLC_imm11_pseudo1]] - ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo2:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 4 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:exo = COPY [[DEF]].sub_512_hi - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:exe = COPY [[DEF]].sub_512_lo - ; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x1:%[0-9]+]]:vec512 = VSHUFFLE_vec_shuffle_x [[COPY3]], [[COPY2]], [[MOV_RLC_imm11_pseudo2]] - ; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x2:%[0-9]+]]:mshflxdst = VSHUFFLE_vec_shuffle_x [[VSHUFFLE_vec_shuffle_x1]], [[VSHUFFLE_vec_shuffle_x]], [[MOV_RLC_imm11_pseudo]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ewl = COPY [[VSHUFFLE_vec_shuffle_x2]].sub_256_lo - ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[COPY4]] - %1:vregbank(<16 x s64>) = G_IMPLICIT_DEF - %0:vregbank(<16 x s16>) = G_TRUNC %1(<16 x s64>) + ; CHECK-LABEL: name: v8s16_trunc_v8s64_vec512 + ; CHECK: [[DEF:%[0-9]+]]:vec512 = IMPLICIT_DEF + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 28 + ; CHECK-NEXT: [[VSHUFFLE_vec_shuffle_x:%[0-9]+]]:mshflxdst = VSHUFFLE_vec_shuffle_x [[DEF]], [[DEF]], [[MOV_RLC_imm11_pseudo]] + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ewl = COPY [[VSHUFFLE_vec_shuffle_x]].sub_256_lo + ; CHECK-NEXT: [[VMOV_alu_mv_mv_w_to_q:%[0-9]+]]:vec128 = VMOV_alu_mv_mv_w_to_q [[COPY]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VMOV_alu_mv_mv_w_to_q]] + %1:vregbank(<8 x s64>) = G_IMPLICIT_DEF + %0:vregbank(<8 x s16>) = G_TRUNC %1(<8 x s64>) PseudoRET implicit $lr, implicit %0 ... diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-trunc.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-trunc.mir index aaa9ff5a7fd0..03c6fce19025 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-trunc.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-trunc.mir @@ -133,3 +133,57 @@ body: | %0:_(<16 x s8>) = G_TRUNC %1(<16 x s16>) PseudoRET implicit $lr, implicit %0 ... +--- +name: v16s8_trunc_v16s32_vec512 +body: | + bb.1.entry: + ; CHECK-LABEL: name: v16s8_trunc_v16s32_vec512 + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[DEF]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](<16 x s8>) + %1:_(<16 x s32>) = G_IMPLICIT_DEF + %0:_(<16 x s8>) = G_TRUNC %1(<16 x s32>) + PseudoRET implicit $lr, implicit %0 +... +--- +name: v8s16_trunc_v8s64_vec512 +body: | + bb.1.entry: + ; CHECK-LABEL: name: v8s16_trunc_v8s64_vec512 + ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[DEF]](<8 x s64>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](<8 x s16>) + %1:_(<8 x s64>) = G_IMPLICIT_DEF + %0:_(<8 x s16>) = G_TRUNC %1(<8 x s64>) + PseudoRET implicit $lr, implicit %0 +... +--- +name: v32s8_trunc_v32s32_vec1024 +body: | + bb.1.entry: + ; CHECK-LABEL: name: v32s8_trunc_v32s32_vec1024 + ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[UV]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[UV1]](<16 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<16 x s8>), [[TRUNC1]](<16 x s8>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<32 x s8>) + %1:_(<32 x s32>) = G_IMPLICIT_DEF + %0:_(<32 x s8>) = G_TRUNC %1(<32 x s32>) + PseudoRET implicit $lr, implicit %0 +... +--- +name: v16s16_trunc_v16s64_vec1024 +body: | + bb.1.entry: + ; CHECK-LABEL: name: v16s16_trunc_v16s64_vec1024 + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s64>), [[UV1:%[0-9]+]]:_(<8 x s64>) = G_UNMERGE_VALUES [[DEF]](<16 x s64>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[UV]](<8 x s64>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[UV1]](<8 x s64>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<8 x s16>), [[TRUNC1]](<8 x s16>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<16 x s16>) + %1:_(<16 x s64>) = G_IMPLICIT_DEF + %0:_(<16 x s16>) = G_TRUNC %1(<16 x s64>) + PseudoRET implicit $lr, implicit %0 +...