Skip to content

Commit 982f803

Browse files
committed
implement different approach
1 parent 8a72c64 commit 982f803

File tree

4 files changed

+20
-183
lines changed

4 files changed

+20
-183
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,7 @@ class LegalizationArtifactCombiner {
997997

998998
// Recognize UnmergeSrc that can be unmerged to DstTy directly.
999999
// Types have to be either both vector or both non-vector types.
1000+
// In case of vector types, the scalar elements need to match.
10001001
// Merge-like opcodes are combined one at the time. First one creates new
10011002
// unmerge, following should use the same unmerge (builder performs CSE).
10021003
//
@@ -1005,7 +1006,9 @@ class LegalizationArtifactCombiner {
10051006
// %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3
10061007
//
10071008
// %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc
1008-
if ((DstTy.isVector() == UnmergeSrcTy.isVector()) &&
1009+
if (((!DstTy.isVector() && !UnmergeSrcTy.isVector()) ||
1010+
(DstTy.isVector() && UnmergeSrcTy.isVector() &&
1011+
DstTy.getScalarType() == UnmergeSrcTy.getScalarType())) &&
10091012
(Elt0UnmergeIdx % NumMIElts == 0) &&
10101013
getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) {
10111014
if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts,

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8281,8 +8281,9 @@ LegalizerHelper::LegalizeResult
82818281
LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
82828282
const unsigned NumDst = MI.getNumOperands() - 1;
82838283
Register SrcReg = MI.getOperand(NumDst).getReg();
8284-
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
8285-
if (DstTy.getScalarType().isPointer())
8284+
Register Dst0Reg = MI.getOperand(0).getReg();
8285+
LLT DstTy = MRI.getType(Dst0Reg);
8286+
if (DstTy.isPointer())
82868287
return UnableToLegalize; // TODO
82878288

82888289
SrcReg = coerceToScalar(SrcReg);
@@ -8292,26 +8293,14 @@ LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
82928293
// Expand scalarizing unmerge as bitcast to integer and shift.
82938294
LLT IntTy = MRI.getType(SrcReg);
82948295

8295-
const unsigned DstSize = DstTy.getScalarSizeInBits();
8296-
SmallVector<Register> VectorElems;
8297-
Register Shift;
8298-
for (unsigned I = 0, Offset = 0; I != NumDst; Offset += DstSize) {
8299-
if (Offset) {
8300-
auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
8301-
Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt).getReg(0);
8302-
} else {
8303-
Shift = SrcReg;
8304-
}
8305-
if (DstTy.isVector()) {
8306-
VectorElems.emplace_back(
8307-
MIRBuilder.buildTrunc(DstTy.getScalarType(), Shift).getReg(0));
8308-
if (VectorElems.size() == DstTy.getNumElements()) {
8309-
MIRBuilder.buildBuildVector(MI.getOperand(I++), VectorElems);
8310-
VectorElems.clear();
8311-
}
8312-
} else {
8313-
MIRBuilder.buildTrunc(MI.getOperand(I++), Shift);
8314-
}
8296+
MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
8297+
8298+
const unsigned DstSize = DstTy.getSizeInBits();
8299+
unsigned Offset = DstSize;
8300+
for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
8301+
auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
8302+
auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
8303+
MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
83158304
}
83168305

83178306
MI.eraseFromParent();

llvm/lib/CodeGen/MachineVerifier.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,11 +1510,11 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
15101510

15111511
LLT SrcTy = MRI->getType(MI->getOperand(NumDsts).getReg());
15121512
if (DstTy.isVector()) {
1513-
// This case is the converse of G_CONCAT_VECTORS, but relaxed since
1514-
// G_UNMERGE_VALUES can handle src and dst vectors with different
1515-
// element sizes:
1516-
// %1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)
1517-
if (SrcTy.isScalableVector() != DstTy.isScalableVector() ||
1513+
// This case is the converse of G_CONCAT_VECTORS.
1514+
if (!SrcTy.isVector() ||
1515+
(SrcTy.getScalarType() != DstTy.getScalarType() &&
1516+
!SrcTy.isPointerVector()) ||
1517+
SrcTy.isScalableVector() != DstTy.isScalableVector() ||
15181518
SrcTy.getSizeInBits() != NumDsts * DstTy.getSizeInBits())
15191519
report("G_UNMERGE_VALUES source operand does not match vector "
15201520
"destination operands",

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir

Lines changed: 0 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -96,41 +96,6 @@ body: |
9696
$vgpr1 = COPY %4
9797
...
9898

99-
---
100-
name: test_unmerge_v2s8_v2s16
101-
body: |
102-
bb.0:
103-
liveins: $vgpr0
104-
; CHECK-LABEL: name: test_unmerge_v2s8_v2s16
105-
; CHECK: liveins: $vgpr0
106-
; CHECK-NEXT: {{ $}}
107-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
108-
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
109-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
110-
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
111-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
112-
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
113-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
114-
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
115-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
116-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
117-
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
118-
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
119-
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
120-
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
121-
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C1]](s32)
122-
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]]
123-
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
124-
; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
125-
; CHECK-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
126-
%0:_(<2 x s16>) = COPY $vgpr0
127-
%1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)
128-
%3:_(<2 x s16>) = G_ANYEXT %1
129-
%4:_(<2 x s16>) = G_ANYEXT %2
130-
$vgpr0 = COPY %3
131-
$vgpr1 = COPY %4
132-
...
133-
13499
---
135100
name: test_unmerge_s16_v3s16
136101
body: |
@@ -155,50 +120,6 @@ body: |
155120
$vgpr2 = COPY %6
156121
...
157122

158-
---
159-
name: test_unmerge_v2s8_v3s16
160-
body: |
161-
bb.0:
162-
; CHECK-LABEL: name: test_unmerge_v2s8_v3s16
163-
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
164-
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
165-
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
166-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
167-
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
168-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
169-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
170-
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
171-
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
172-
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
173-
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
174-
; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
175-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
176-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
177-
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
178-
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
179-
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
180-
; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
181-
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C1]](s32)
182-
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]]
183-
; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
184-
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C3]]
185-
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
186-
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
187-
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]]
188-
; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
189-
; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
190-
; CHECK-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
191-
; CHECK-NEXT: $vgpr2 = COPY [[BITCAST5]](<2 x s16>)
192-
%0:_(<3 x s16>) = G_IMPLICIT_DEF
193-
%1:_(<2 x s8>), %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
194-
%4:_(<2 x s16>) = G_ANYEXT %1
195-
%5:_(<2 x s16>) = G_ANYEXT %2
196-
%6:_(<2 x s16>) = G_ANYEXT %3
197-
$vgpr0 = COPY %4
198-
$vgpr1 = COPY %5
199-
$vgpr2 = COPY %6
200-
...
201-
202123
---
203124

204125
name: test_unmerge_s16_v4s16
@@ -270,62 +191,6 @@ body: |
270191
$vgpr5 = COPY %12
271192
...
272193

273-
---
274-
name: test_unmerge_v4s8_v6s16
275-
body: |
276-
bb.0:
277-
liveins: $vgpr0_vgpr1
278-
; CHECK-LABEL: name: test_unmerge_v4s8_v6s16
279-
; CHECK: liveins: $vgpr0_vgpr1
280-
; CHECK-NEXT: {{ $}}
281-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
282-
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
283-
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
284-
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
285-
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
286-
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
287-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32)
288-
; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8), [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
289-
; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV16]](s8)
290-
; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV17]](s8)
291-
; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV18]](s8)
292-
; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV19]](s8)
293-
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32)
294-
; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s8), [[UV25:%[0-9]+]]:_(s8), [[UV26:%[0-9]+]]:_(s8), [[UV27:%[0-9]+]]:_(s8), [[UV28:%[0-9]+]]:_(s8), [[UV29:%[0-9]+]]:_(s8), [[UV30:%[0-9]+]]:_(s8), [[UV31:%[0-9]+]]:_(s8), [[UV32:%[0-9]+]]:_(s8), [[UV33:%[0-9]+]]:_(s8), [[UV34:%[0-9]+]]:_(s8), [[UV35:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
295-
; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV32]](s8)
296-
; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV33]](s8)
297-
; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV34]](s8)
298-
; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV35]](s8)
299-
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32)
300-
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
301-
; CHECK-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<4 x s32>)
302-
; CHECK-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = COPY [[BUILD_VECTOR2]](<4 x s32>)
303-
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
304-
%1:_(<4 x s8>), %2:_(<4 x s8>), %3:_(<4 x s8>) = G_UNMERGE_VALUES %0
305-
%4:_(<4 x s32>) = G_ANYEXT %1
306-
%5:_(<4 x s32>) = G_ANYEXT %2
307-
%6:_(<4 x s32>) = G_ANYEXT %3
308-
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
309-
$vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5
310-
$vgpr8_vgpr9_vgpr10_vgpr11 = COPY %6
311-
...
312-
313-
---
314-
name: test_unmerge_v3s32_v6s16
315-
body: |
316-
bb.0:
317-
liveins: $vgpr0_vgpr1
318-
; CHECK-LABEL: name: test_unmerge_v3s32_v6s16
319-
; CHECK: liveins: $vgpr0_vgpr1
320-
; CHECK-NEXT: {{ $}}
321-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
322-
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
323-
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>)
324-
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
325-
%1:_(<3 x s32>) = G_UNMERGE_VALUES %0
326-
$vgpr0_vgpr1_vgpr2 = COPY %1
327-
...
328-
329194
---
330195

331196
name: test_unmerge_s8_s16
@@ -1225,23 +1090,3 @@ body: |
12251090
$vgpr9_vgpr10_vgpr11 = COPY %8
12261091
12271092
...
1228-
1229-
---
1230-
name: test_unmerge_v3s32_v12s16
1231-
body: |
1232-
bb.0:
1233-
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
1234-
1235-
; CHECK-LABEL: name: test_unmerge_v3s32_v12s16
1236-
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
1237-
; CHECK-NEXT: {{ $}}
1238-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
1239-
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>)
1240-
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>)
1241-
; CHECK-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[UV1]](<3 x s32>)
1242-
%0:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
1243-
%1:_(<3 x s32>), %2:_(<3 x s32>) = G_UNMERGE_VALUES %0
1244-
$vgpr0_vgpr1_vgpr2 = COPY %1
1245-
$vgpr3_vgpr4_vgpr5 = COPY %2
1246-
1247-
...

0 commit comments

Comments
 (0)