Skip to content

Commit 9bfe5f8

Browse files
[AIE2] Legalize G_EXTRACT_VECTOR_ELT op for 16-, 32- and 128-bit vectors
1 parent a21a094 commit 9bfe5f8

File tree

2 files changed

+189
-15
lines changed

2 files changed

+189
-15
lines changed

llvm/lib/Target/AIE/AIELegalizerInfo.cpp

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,16 @@ static LegalizeMutation bitcastAccToVectorType(unsigned TypeIdx) {
6363
};
6464
}
6565

66+
static LegalizeMutation bitcastToVectorElement32(const unsigned TypeIdx) {
67+
return [=](const LegalityQuery &Query) {
68+
const LLT Ty = Query.Types[TypeIdx];
69+
unsigned Size = Ty.getSizeInBits();
70+
assert(Size % 32 == 0);
71+
return std::pair(
72+
TypeIdx, LLT::scalarOrVector(ElementCount::getFixed(Size / 32), 32));
73+
};
74+
}
75+
6676
static LegalityPredicate
6777
isValidVectorMergeUnmergeOp(const unsigned BigVectorId,
6878
const unsigned SmallVectorId) {
@@ -394,8 +404,34 @@ AIELegalizerInfo::AIELegalizerInfo(const AIEBaseSubtarget &ST) {
394404
const LLT &EltTy = Query.Types[1].getElementType();
395405
return Query.Types[0] != EltTy;
396406
})
397-
.customIf(typeInSet(1, {V2S32, V8S32, V16S32, V32S32, V16S16, V32S8,
398-
V32S16, V64S8, V64S16, V128S8}));
407+
// If it is 32-bit, the LLVM can perform some bitshifts to legalize it
408+
.bitcastIf(
409+
[=](const LegalityQuery &Query) {
410+
const LLT &VecTy = Query.Types[1];
411+
return VecTy.getSizeInBits() == 32;
412+
},
413+
bitcastToVectorElement32(1))
414+
// Extraction is supported for the native types of 32-, 256-, 512- and
415+
// 1024-bit
416+
.customIf(typeInSet(1, {V4S8, V2S16, V2S32, V8S32, V16S32, V32S32,
417+
V16S16, V32S8, V32S16, V64S8, V64S16, V128S8}))
418+
// For 16-bits, we want to increase the number of elements to 4. Since
419+
// our architecture doesn't always support all intermediate sizes, we do
420+
// it as a special case so that we can use them minimum clamp for the
421+
// smallest vector register.
422+
.moreElementsIf(
423+
[=](const LegalityQuery &Query) {
424+
return Query.Types[1].getScalarSizeInBits() == 8 &&
425+
Query.Types[1].getNumElements() == 2;
426+
},
427+
[=](const LegalityQuery &Query) {
428+
return std::make_pair(1, LLT::fixed_vector(4, S8));
429+
})
430+
// Increase the input vectors if they don't fit in the smallest vector
431+
// register
432+
.clampMinNumElements(1, S8, 32)
433+
.clampMinNumElements(1, S16, 16)
434+
.clampMinNumElements(1, S32, 8);
399435

400436
getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
401437
.clampScalar(2, S32, S32) // Clamp the idx to 32 bit since VINSERT

llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-extract-vector-elt.mir

Lines changed: 151 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -522,8 +522,7 @@ body: |
522522
; CHECK-NEXT: LIFETIME_END %stack.0
523523
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
524524
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
525-
; CHECK-NEXT: $r0 = COPY [[AND]](s32)
526-
; CHECK-NEXT: PseudoRET implicit $lr, implicit $r0
525+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AND]](s32)
527526
%1:_(<32 x s16>) = COPY $x0
528527
%4:_(s32) = G_CONSTANT i32 15
529528
%2:_(p0) = G_FRAME_INDEX %stack.0
@@ -533,8 +532,7 @@ body: |
533532
%0:_(s16) = G_LOAD %2(p0) :: (volatile dereferenceable load (s16))
534533
LIFETIME_END %stack.0
535534
%5:_(s32) = G_ZEXT %0(s16)
536-
$r0 = COPY %5(s32)
537-
PseudoRET implicit $lr, implicit $r0
535+
PseudoRET implicit $lr, implicit %5
538536
539537
...
540538
---
@@ -559,8 +557,7 @@ body: |
559557
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (volatile dereferenceable load (s8))
560558
; CHECK-NEXT: LIFETIME_END %stack.0
561559
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 8
562-
; CHECK-NEXT: $r0 = COPY [[SEXT_INREG]](s32)
563-
; CHECK-NEXT: PseudoRET implicit $lr, implicit $r0
560+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SEXT_INREG]](s32)
564561
%1:_(<128 x s8>) = COPY $y2
565562
%4:_(s32) = G_CONSTANT i32 127
566563
%2:_(p0) = G_FRAME_INDEX %stack.0
@@ -570,9 +567,7 @@ body: |
570567
%0:_(s8) = G_LOAD %2(p0) :: (volatile dereferenceable load (s8))
571568
LIFETIME_END %stack.0
572569
%5:_(s32) = G_SEXT %0(s8)
573-
$r0 = COPY %5(s32)
574-
PseudoRET implicit $lr, implicit $r0
575-
570+
PseudoRET implicit $lr, implicit %5
576571
...
577572
---
578573
name: extract_16bit_1024
@@ -597,8 +592,7 @@ body: |
597592
; CHECK-NEXT: LIFETIME_END %stack.0
598593
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
599594
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C1]]
600-
; CHECK-NEXT: $r0 = COPY [[AND]](s32)
601-
; CHECK-NEXT: PseudoRET implicit $lr, implicit $r0
595+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AND]](s32)
602596
%1:_(<64 x s16>) = COPY $y2
603597
%4:_(s32) = G_CONSTANT i32 63
604598
%2:_(p0) = G_FRAME_INDEX %stack.0
@@ -608,7 +602,151 @@ body: |
608602
%0:_(s16) = G_LOAD %2(p0) :: (volatile dereferenceable load (s16))
609603
LIFETIME_END %stack.0
610604
%5:_(s32) = G_ZEXT %0(s16)
611-
$r0 = COPY %5(s32)
612-
PseudoRET implicit $lr, implicit $r0
605+
PseudoRET implicit $lr, implicit %5
606+
...
613607

608+
---
609+
name: test_extract_32_0
610+
body: |
611+
bb.1.entry:
612+
liveins: $r0
613+
; CHECK-LABEL: name: test_extract_32_0
614+
; CHECK: liveins: $r0
615+
; CHECK-NEXT: {{ $}}
616+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s8>) = COPY $r0
617+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
618+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<4 x s8>)
619+
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
620+
; CHECK-NEXT: $r2 = COPY [[LSHR]](s32)
621+
%0:_(<4 x s8>) = COPY $r0
622+
%5:_(s32) = G_CONSTANT i32 0
623+
%4:_(s8) = G_EXTRACT_VECTOR_ELT %0(<4 x s8>), %5(s32)
624+
%1:_(s32) = G_ANYEXT %4(s8)
625+
$r2 = COPY %1(s32)
614626
...
627+
628+
---
629+
name: test_extract_32_1
630+
body: |
631+
bb.1.entry:
632+
liveins: $r0
633+
; CHECK-LABEL: name: test_extract_32_1
634+
; CHECK: liveins: $r0
635+
; CHECK-NEXT: {{ $}}
636+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s8>) = COPY $r0
637+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<4 x s8>)
638+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
639+
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
640+
; CHECK-NEXT: $r2 = COPY [[LSHR]](s32)
641+
%0:_(<4 x s8>) = COPY $r0
642+
%5:_(s32) = G_CONSTANT i32 1
643+
%4:_(s8) = G_EXTRACT_VECTOR_ELT %0(<4 x s8>), %5(s32)
644+
%1:_(s32) = G_ANYEXT %4(s8)
645+
$r2 = COPY %1(s32)
646+
...
647+
648+
---
649+
name: test_extract_32_2
650+
body: |
651+
bb.1.entry:
652+
liveins: $r0
653+
; CHECK-LABEL: name: test_extract_32_2
654+
; CHECK: liveins: $r0
655+
; CHECK-NEXT: {{ $}}
656+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s8>) = COPY $r0
657+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<4 x s8>)
658+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
659+
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
660+
; CHECK-NEXT: $r2 = COPY [[LSHR]](s32)
661+
%0:_(<4 x s8>) = COPY $r0
662+
%5:_(s32) = G_CONSTANT i32 2
663+
%4:_(s8) = G_EXTRACT_VECTOR_ELT %0(<4 x s8>), %5(s32)
664+
%1:_(s32) = G_ANYEXT %4(s8)
665+
$r2 = COPY %1(s32)
666+
...
667+
668+
---
669+
name: test_extract_32_3
670+
body: |
671+
bb.1.entry:
672+
liveins: $r0
673+
; CHECK-LABEL: name: test_extract_32_3
674+
; CHECK: liveins: $r0
675+
; CHECK-NEXT: {{ $}}
676+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s8>) = COPY $r0
677+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<4 x s8>)
678+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
679+
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
680+
; CHECK-NEXT: $r2 = COPY [[LSHR]](s32)
681+
%0:_(<4 x s8>) = COPY $r0
682+
%5:_(s32) = G_CONSTANT i32 3
683+
%4:_(s8) = G_EXTRACT_VECTOR_ELT %0(<4 x s8>), %5(s32)
684+
%1:_(s32) = G_ANYEXT %4(s8)
685+
$r2 = COPY %1(s32)
686+
...
687+
688+
---
689+
name: test_extract_16bit
690+
body: |
691+
bb.1.entry:
692+
; CHECK-LABEL: name: test_extract_16bit
693+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
694+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
695+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
696+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
697+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[C]]
698+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
699+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
700+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C2]](s32)
701+
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL]]
702+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
703+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[DEF]], [[C3]]
704+
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
705+
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C4]](s32)
706+
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL1]]
707+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND]](s32)
708+
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
709+
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C5]](s32)
710+
; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL2]]
711+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s8>) = G_BITCAST [[OR3]](s32)
712+
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<4 x s8>)
713+
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
714+
; CHECK-NEXT: $r0 = COPY [[LSHR]](s32)
715+
%0:_(s8) = G_CONSTANT i8 42
716+
%1:_(<2 x s8>) = G_BUILD_VECTOR %0(s8), %0(s8)
717+
%2:_(s32) = G_CONSTANT i32 0
718+
%3:_(s8) = G_EXTRACT_VECTOR_ELT %1(<2 x s8>), %2(s32)
719+
%4:_(s32) = G_ANYEXT %3(s8)
720+
$r0 = COPY %4(s32)
721+
...
722+
---
723+
name: test_extract_128bit
724+
body: |
725+
bb.1.entry:
726+
; CHECK-LABEL: name: test_extract_128bit
727+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65
728+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 77
729+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 68
730+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 33
731+
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
732+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
733+
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
734+
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[DEF1]], [[DEF]](s32)
735+
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT1:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT]], [[DEF]](s32)
736+
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT2:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT1]], [[DEF]](s32)
737+
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT3:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT2]], [[DEF]](s32)
738+
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT4:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT3]], [[C3]](s32)
739+
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT5:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT4]], [[C2]](s32)
740+
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT6:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT5]], [[C1]](s32)
741+
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT7:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT6]], [[C]](s32)
742+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[AIE_ADD_VECTOR_ELT_LEFT7]](<16 x s32>)
743+
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[UV]](<8 x s32>), [[C4]](s32)
744+
; CHECK-NEXT: $r0 = COPY [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32)
745+
%0:_(s32) = G_CONSTANT i32 65
746+
%1:_(s32) = G_CONSTANT i32 77
747+
%2:_(s32) = G_CONSTANT i32 68
748+
%3:_(s32) = G_CONSTANT i32 33
749+
%4:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32)
750+
%5:_(s32) = G_CONSTANT i32 2
751+
%6:_(s32) = G_EXTRACT_VECTOR_ELT %4(<4 x s32>), %5(s32)
752+
$r0 = COPY %6(s32)

0 commit comments

Comments
 (0)