Skip to content

Commit 3cceb3b

Browse files
committed
Handle G_TRUNC on S1 SGPR
1 parent 278fa03 commit 3cceb3b

File tree

3 files changed

+27
-7
lines changed

3 files changed

+27
-7
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -477,14 +477,38 @@ bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
477477
}
478478
if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
479479
Combiner.tryCombineS1AnyExt(MI);
480+
// Also handle anyext where src and dst have same type (nop anyext)
481+
if (!MI.getParent())
482+
continue; // Already erased by tryCombineS1AnyExt
483+
Register Dst = MI.getOperand(0).getReg();
484+
Register Src = MI.getOperand(1).getReg();
485+
if (MRI.getType(Dst) == MRI.getType(Src)) {
486+
while (!MRI.use_empty(Dst)) {
487+
auto &Use = *MRI.use_begin(Dst);
488+
Use.setReg(Src);
489+
}
490+
MI.eraseFromParent();
491+
}
492+
continue;
493+
}
494+
// Handle G_TRUNC that produces sgpr S1 - convert to sgpr S32
495+
// Only eliminate if dead (no uses), otherwise legalization will handle it
496+
if (MI.getOpcode() == AMDGPU::G_TRUNC) {
497+
Register Dst = MI.getOperand(0).getReg();
498+
if (MRI.getType(Dst) == LLT::scalar(1) && MRI.getRegBankOrNull(Dst) &&
499+
MRI.getRegBankOrNull(Dst)->getID() == AMDGPU::SGPRRegBankID &&
500+
MRI.use_empty(Dst)) {
501+
// Dead sgpr S1 trunc, just erase it
502+
MI.eraseFromParent();
503+
}
480504
continue;
481505
}
482506
}
483507
}
484508

485-
// assert(!getAnySgprS1(MRI).isValid() &&
486-
// "Registers with sgpr reg bank and S1 LLT are not legal after "
487-
// "AMDGPURegBankLegalize. Should lower to sgpr S32");
509+
assert(!getAnySgprS1(MRI).isValid() &&
510+
"Registers with sgpr reg bank and S1 LLT are not legal after "
511+
"AMDGPURegBankLegalize. Should lower to sgpr S32");
488512

489513
return true;
490514
}

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add-overflow.s32.mir

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ body: |
1414
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
1515
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
1616
; CHECK-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[COPY]], [[COPY1]]
17-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDO1]](s32)
1817
%0:_(s32) = COPY $sgpr0
1918
%1:_(s32) = COPY $sgpr1
2019
%2:_(s32), %3:_(s1) = G_UADDO %0, %1
@@ -92,7 +91,6 @@ body: |
9291
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
9392
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]]
9493
; CHECK-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]]
95-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32)
9694
%0:_(s32) = COPY $sgpr0
9795
%1:_(s32) = COPY $sgpr1
9896
%2:_(s32) = COPY $sgpr2

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub-overflow.s32.mir

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ body: |
1414
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
1515
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
1616
; CHECK-NEXT: [[USUBO:%[0-9]+]]:sgpr(s32), [[USUBO1:%[0-9]+]]:sgpr(s32) = G_USUBO [[COPY]], [[COPY1]]
17-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[USUBO1]](s32)
1817
%0:_(s32) = COPY $sgpr0
1918
%1:_(s32) = COPY $sgpr1
2019
%2:_(s32), %3:_(s1) = G_USUBO %0, %1
@@ -92,7 +91,6 @@ body: |
9291
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
9392
; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]]
9493
; CHECK-NEXT: [[USUBE:%[0-9]+]]:sgpr(s32), [[USUBE1:%[0-9]+]]:sgpr(s32) = G_USUBE [[COPY]], [[COPY1]], [[AND]]
95-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[USUBE1]](s32)
9694
%0:_(s32) = COPY $sgpr0
9795
%1:_(s32) = COPY $sgpr1
9896
%2:_(s32) = COPY $sgpr2

0 commit comments

Comments
 (0)