Skip to content

Commit 4ad6acf

Browse files
committed
[AArch64] WIP Regalloc hints for EXT_ZZZI
This tries to ensure that the dst and first src register are mapped to the same physical register. This isn't always possible because the MachineScheduler has already moved instructions in a way that causes interferences if both virt regs get mapped to the same phys reg. WIP because there is probably a better place to do this.
1 parent f07ab32 commit 4ad6acf

File tree

2 files changed

+23
-24
lines changed

2 files changed

+23
-24
lines changed

llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,6 @@ bool AArch64PostCoalescer::runOnMachineFunction(MachineFunction &MF) {
5353
if (skipFunction(MF.getFunction()))
5454
return false;
5555

56-
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
57-
if (!FuncInfo->hasStreamingModeChanges())
58-
return false;
59-
6056
MRI = &MF.getRegInfo();
6157
LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
6258
bool Changed = false;
@@ -86,6 +82,13 @@ bool AArch64PostCoalescer::runOnMachineFunction(MachineFunction &MF) {
8682
Changed = true;
8783
break;
8884
}
85+
case AArch64::EXT_ZZZI:
86+
Register DstReg = MI.getOperand(0).getReg();
87+
Register SrcReg1 = MI.getOperand(1).getReg();
88+
if (SrcReg1 != DstReg) {
89+
MRI->setRegAllocationHint(DstReg, 0, SrcReg1);
90+
}
91+
break;
8992
}
9093
}
9194
}

llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -256,13 +256,12 @@ define <vscale x 2 x double> @splice_nxv2f64_last_idx(<vscale x 2 x double> %a,
256256
define <vscale x 2 x i1> @splice_nxv2i1_idx(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) #0 {
257257
; CHECK-LABEL: splice_nxv2i1_idx:
258258
; CHECK: // %bb.0:
259-
; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1
260259
; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1
260+
; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1
261261
; CHECK-NEXT: ptrue p0.d
262-
; CHECK-NEXT: mov z0.d, z1.d
263-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
264-
; CHECK-NEXT: and z0.d, z0.d, #0x1
265-
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
262+
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
263+
; CHECK-NEXT: and z1.d, z1.d, #0x1
264+
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
266265
; CHECK-NEXT: ret
267266
%res = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i32 1)
268267
ret <vscale x 2 x i1> %res
@@ -272,13 +271,12 @@ define <vscale x 2 x i1> @splice_nxv2i1_idx(<vscale x 2 x i1> %a, <vscale x 2 x
272271
define <vscale x 4 x i1> @splice_nxv4i1_idx(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) #0 {
273272
; CHECK-LABEL: splice_nxv4i1_idx:
274273
; CHECK: // %bb.0:
275-
; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1
276274
; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1
275+
; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1
277276
; CHECK-NEXT: ptrue p0.s
278-
; CHECK-NEXT: mov z0.d, z1.d
279-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
280-
; CHECK-NEXT: and z0.s, z0.s, #0x1
281-
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
277+
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
278+
; CHECK-NEXT: and z1.s, z1.s, #0x1
279+
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
282280
; CHECK-NEXT: ret
283281
%res = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i32 2)
284282
ret <vscale x 4 x i1> %res
@@ -288,13 +286,12 @@ define <vscale x 4 x i1> @splice_nxv4i1_idx(<vscale x 4 x i1> %a, <vscale x 4 x
288286
define <vscale x 8 x i1> @splice_nxv8i1_idx(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) #0 {
289287
; CHECK-LABEL: splice_nxv8i1_idx:
290288
; CHECK: // %bb.0:
291-
; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1
292289
; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1
290+
; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1
293291
; CHECK-NEXT: ptrue p0.h
294-
; CHECK-NEXT: mov z0.d, z1.d
295-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
296-
; CHECK-NEXT: and z0.h, z0.h, #0x1
297-
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
292+
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
293+
; CHECK-NEXT: and z1.h, z1.h, #0x1
294+
; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0
298295
; CHECK-NEXT: ret
299296
%res = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i32 4)
300297
ret <vscale x 8 x i1> %res
@@ -304,13 +301,12 @@ define <vscale x 8 x i1> @splice_nxv8i1_idx(<vscale x 8 x i1> %a, <vscale x 8 x
304301
define <vscale x 16 x i1> @splice_nxv16i1_idx(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) #0 {
305302
; CHECK-LABEL: splice_nxv16i1_idx:
306303
; CHECK: // %bb.0:
307-
; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
308304
; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
305+
; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
309306
; CHECK-NEXT: ptrue p0.b
310-
; CHECK-NEXT: mov z0.d, z1.d
311-
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
312-
; CHECK-NEXT: and z0.b, z0.b, #0x1
313-
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
307+
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
308+
; CHECK-NEXT: and z1.b, z1.b, #0x1
309+
; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0
314310
; CHECK-NEXT: ret
315311
%res = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, i32 8)
316312
ret <vscale x 16 x i1> %res

0 commit comments

Comments
 (0)