-
Notifications
You must be signed in to change notification settings - Fork 29
[AIE2P] Support all 64-bit-destination G_BUILD_VECTOR in legalizer #467
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: aie-public
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -193,8 +193,8 @@ bool AIELegalizerHelper::legalizeG_BUILD_VECTOR(LegalizerHelper &Helper, | |
|
|
||
| assert((EltSize == 8 || EltSize == 16 || EltSize == 32 || EltSize == 64) && | ||
| "non-existent integer size"); | ||
| assert(DstVecSize == 32 || (DstVecSize > 64 && DstVecSize <= 1024 && | ||
| "non-native vectors are not supported")); | ||
| assert(DstVecSize >= 32 && DstVecSize <= 1024 && | ||
| "non-native vectors are not supported"); | ||
| assert(DstVecSize < 1024 && "vadd takes a 512-bit argument"); | ||
|
|
||
| // If our vector is 32-bit we can store it as packed integer vector | ||
|
|
@@ -263,6 +263,16 @@ bool AIELegalizerHelper::legalizeG_BUILD_VECTOR(LegalizerHelper &Helper, | |
| : MIRBuilder.buildBitcast(VecTy, Vec512Reg).getReg(0); | ||
| MIRBuilder.buildInstr(II->getGenericUnpadVectorOpcode(), {DstReg}, | ||
| {NewSrc2}); | ||
| } else if (DstVecSize == 64) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I started to feel like we are over using vector registers, may be we could consider keep it in the scalar registers itself.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I already tried the pack approach using shifts but there were more instructions than passing through the 512-bit vector. I don't mind either approach, both are pretty expensive without any pattern to combine. |
||
| // E.G. AIE2 doesn't have an 64-bit extract vector element instruction | ||
| assert(ST.isAIE2P() && "Only AIE2P is supported for now"); | ||
| Register Zero = MIRBuilder.buildConstant(S32, 0).getReg(0); | ||
| Register DstTemp = MRI.createGenericVirtualRegister(S64); | ||
| Register NewSrc = MIRBuilder.buildBitcast(V8S64, Src).getReg(0); | ||
|
|
||
| MIRBuilder.buildInstr(II->getGenericExtractVectorEltOpcode(true), {DstTemp}, | ||
| {NewSrc, Zero}); | ||
| MIRBuilder.buildBitcast(DstReg, DstTemp); | ||
| } | ||
|
|
||
| MI.eraseFromParent(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -65,11 +65,11 @@ isValidVectorMergeUnmergeOp(const unsigned BigVectorId, | |
| }; | ||
| } | ||
|
|
||
| static LegalityPredicate isValidVectorAIEP(const unsigned TypeIdx) { | ||
| static LegalityPredicate isValidVectorAIE2P(const unsigned TypeIdx) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is used for other opcodes as well, we should add tests.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't like the name actually. We were excluding 64-bits as invalid AIE2P vectors when it was just a question of missing support. For instance, we have legal vector types for all those sizes from 32-bits and upwards. |
||
| return [=](const LegalityQuery &Query) { | ||
| const LLT DstTy = Query.Types[TypeIdx]; | ||
| const unsigned DstSize = DstTy.getSizeInBits(); | ||
| return DstTy.isVector() && (DstSize == 32 || DstSize > 64); | ||
| return DstTy.isVector() && DstSize >= 32; | ||
| }; | ||
| } | ||
|
|
||
|
|
@@ -605,7 +605,7 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST) | |
| .customIf(typeInSet(0, {V2S8, S16})); | ||
|
|
||
| const LegalityPredicate IsNotValidDestinationVector = | ||
| negatePredicate(isValidVectorAIEP(0)); | ||
| negatePredicate(isValidVectorAIE2P(0)); | ||
|
|
||
| getActionDefinitionsBuilder(G_MERGE_VALUES).legalFor({{S64, S32}}); | ||
| getActionDefinitionsBuilder(G_UNMERGE_VALUES) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -180,3 +180,40 @@ body: | | |
| $bmll0 = COPY %1(<8 x s64>) | ||
| PseudoRET implicit $lr, implicit $bmll0 | ||
| ... | ||
|
|
||
|
|
||
| --- | ||
| name: test_build_vector_v8s8 | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please include legalizer tests for 16 and 32 bit element type cases.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should already have the one for v232 since it is already legal before my change. I already added the test for 4x16 in the end-2-end test buildvector.ll but I can add it here as well if needed. |
||
| stack: | ||
| - {id: 0, name: "", type: default, offset: 0, size: 128, alignment: 32} | ||
| body: | | ||
| bb.1.entry: | ||
| ; CHECK-LABEL: name: test_build_vector_v8s8 | ||
| ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF | ||
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<64 x s8>) = G_IMPLICIT_DEF | ||
| ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[DEF1]], [[DEF]](s32) | ||
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) | ||
| ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI1:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI]], [[COPY]](s32) | ||
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) | ||
| ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI2:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI1]], [[COPY1]](s32) | ||
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) | ||
| ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI3:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI2]], [[COPY2]](s32) | ||
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) | ||
| ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI4:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI3]], [[COPY3]](s32) | ||
| ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) | ||
| ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI5:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI4]], [[COPY4]](s32) | ||
| ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) | ||
| ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI6:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI5]], [[COPY5]](s32) | ||
| ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) | ||
| ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI7:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI6]], [[COPY6]](s32) | ||
| ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 | ||
| ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s64>) = G_BITCAST [[AIE_ADD_VECTOR_ELT_HI7]](<64 x s8>) | ||
| ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s64) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[BITCAST]](<8 x s64>), [[C]](s32) | ||
| ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s64) | ||
| ; CHECK-NEXT: $l0 = COPY [[BITCAST1]](<8 x s8>) | ||
| ; CHECK-NEXT: PseudoRET implicit $lr, implicit $l0 | ||
| %2:_(s8) = G_IMPLICIT_DEF | ||
| %1:_(<8 x s8>) = G_BUILD_VECTOR %2(s8), %2(s8), %2(s8), %2(s8), %2(s8), %2(s8), %2(s8), %2(s8) | ||
| $l0 = COPY %1(<8 x s8>) | ||
| PseudoRET implicit $lr, implicit $l0 | ||
| ... | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,7 @@ | |
| # See https://llvm.org/LICENSE.txt for license information. | ||
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| # | ||
| # (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates | ||
| # (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates | ||
| # RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck %s | ||
|
|
||
| --- | ||
|
|
@@ -24,3 +24,72 @@ body: | | |
| %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<16 x s32>), %1(<16 x s32>), shufflemask(14, 15) | ||
| PseudoRET implicit $lr, implicit %2 | ||
| ... | ||
|
|
||
| --- | ||
| name: test_v8s8 | ||
| body: | | ||
| bb.0: | ||
|
|
||
| ; CHECK-LABEL: name: test_v8s8 | ||
| ; CHECK: [[COPY:%[0-9]+]]:_(<32 x s8>) = COPY $wh0 | ||
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s8>) = COPY $wh0 | ||
| ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 | ||
| ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s8>) = G_IMPLICIT_DEF | ||
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) | ||
| ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY2]](<32 x s8>) | ||
| ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](<64 x s8>), [[C]](s32) | ||
| ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 8 | ||
| ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 | ||
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) | ||
| ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY3]](<32 x s8>) | ||
| ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT1:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS1]](<64 x s8>), [[C1]](s32) | ||
| ; CHECK-NEXT: [[ASSERT_SEXT1:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT1]], 8 | ||
| ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 | ||
| ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) | ||
| ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY4]](<32 x s8>) | ||
| ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT2:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS2]](<64 x s8>), [[C2]](s32) | ||
| ; CHECK-NEXT: [[ASSERT_SEXT2:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT2]], 8 | ||
| ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 | ||
| ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) | ||
| ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY5]](<32 x s8>) | ||
| ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT3:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS3]](<64 x s8>), [[C3]](s32) | ||
| ; CHECK-NEXT: [[ASSERT_SEXT3:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT3]], 8 | ||
| ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 | ||
| ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) | ||
| ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY6]](<32 x s8>) | ||
| ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT4:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS4]](<64 x s8>), [[C4]](s32) | ||
| ; CHECK-NEXT: [[ASSERT_SEXT4:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT4]], 8 | ||
| ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 | ||
| ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) | ||
| ; CHECK-NEXT: [[CONCAT_VECTORS5:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY7]](<32 x s8>) | ||
| ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT5:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS5]](<64 x s8>), [[C5]](s32) | ||
| ; CHECK-NEXT: [[ASSERT_SEXT5:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT5]], 8 | ||
| ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 | ||
| ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<32 x s8>) = COPY [[DEF]](<32 x s8>) | ||
| ; CHECK-NEXT: [[CONCAT_VECTORS6:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[COPY8]](<32 x s8>) | ||
| ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT6:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS6]](<64 x s8>), [[C6]](s32) | ||
| ; CHECK-NEXT: [[ASSERT_SEXT6:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT6]], 8 | ||
| ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 | ||
| ; CHECK-NEXT: [[CONCAT_VECTORS7:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY1]](<32 x s8>), [[DEF]](<32 x s8>) | ||
| ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT7:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS7]](<64 x s8>), [[C7]](s32) | ||
| ; CHECK-NEXT: [[ASSERT_SEXT7:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT7]], 8 | ||
| ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<64 x s8>) = G_IMPLICIT_DEF | ||
| ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[DEF1]], [[ASSERT_SEXT]](s32) | ||
| ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI1:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI]], [[ASSERT_SEXT1]](s32) | ||
| ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI2:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI1]], [[ASSERT_SEXT2]](s32) | ||
| ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI3:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI2]], [[ASSERT_SEXT3]](s32) | ||
| ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI4:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI3]], [[ASSERT_SEXT4]](s32) | ||
| ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI5:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI4]], [[ASSERT_SEXT5]](s32) | ||
| ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI6:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI5]], [[ASSERT_SEXT6]](s32) | ||
| ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI7:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI6]], [[ASSERT_SEXT7]](s32) | ||
| ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 | ||
| ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s64>) = G_BITCAST [[AIE_ADD_VECTOR_ELT_HI7]](<64 x s8>) | ||
| ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT8:%[0-9]+]]:_(s64) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[BITCAST]](<8 x s64>), [[C8]](s32) | ||
| ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AIE_SEXT_EXTRACT_VECTOR_ELT8]](s64) | ||
| ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST1]](<8 x s8>) | ||
| %0:_(<32 x s8>) = COPY $wh0 | ||
| %143:_(<32 x s8>) = COPY $wh0 | ||
| %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %143:_(<32 x s8>), %143:_, shufflemask(24, 25, 26, 27, 28, 29, 30, 31) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: maybe use a mask that cannot be optimized by the pre-legalizer combiner in a real scenario. For example
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes and we can keep both. We should be able to combine this later. |
||
| PseudoRET implicit $lr, implicit %2 | ||
| ... | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -83,3 +83,23 @@ entry: | |
| ret void | ||
| } | ||
|
|
||
| define void @test_buildvector_64bits() { | ||
| ; CHECK-LABEL: test_buildvector_64bits: | ||
| ; CHECK: .p2align 4 | ||
| ; CHECK-NEXT: // %bb.0: // %entry | ||
| ; CHECK-NEXT: mova r0, #777; nopx | ||
| ; CHECK-NEXT: vpush.hi.16 x0, x0, r0 | ||
| ; CHECK-NEXT: vpush.hi.16 x0, x0, r0 | ||
| ; CHECK-NEXT: vpush.hi.16 x0, x0, r0 | ||
| ; CHECK-NEXT: vpush.hi.16 x0, x0, r0 | ||
| ; CHECK-NEXT: vextract.64 r1:r0, x0, #0, vaddsign1 | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Super follow-up: can we combine this as a VBCST.16 + VEXTRACT.64?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I used that pattern on purpose |
||
| ; CHECK-NEXT: ret lr | ||
| ; CHECK-NEXT: mova p0, #0 // Delay Slot 5 | ||
| ; CHECK-NEXT: st r0, [p0, #0] // Delay Slot 4 | ||
| ; CHECK-NEXT: mova p0, #4 // Delay Slot 3 | ||
| ; CHECK-NEXT: st r1, [p0, #0] // Delay Slot 2 | ||
| ; CHECK-NEXT: nop // Delay Slot 1 | ||
| entry: | ||
| store <4 x i16> <i16 777, i16 777, i16 777, i16 777>, ptr addrspace(6) null, align 32 | ||
| ret void | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: vadd vs BUILD_VECTOR?
nit: both DstVecSize <= 1024 and DstVecSize < 1024? can they be combined?