Skip to content

Commit c2e9fb7

Browse files
committed
Perform S32 G_ADD instead of unpacked addition for V2S16
1 parent a000a1b commit c2e9fb7

File tree

3 files changed

+14
-22
lines changed

3 files changed

+14
-22
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -501,16 +501,10 @@ void RegBankLegalizeHelper::lowerUnpackMinMax(MachineInstr &MI) {
501501
}
502502

503503
void RegBankLegalizeHelper::lowerScalarizeV2S16(MachineInstr &MI) {
504-
// Unpack the V2S16 operands into two S16 scalars each
505-
auto Op1 = B.buildUnmerge({SgprRB, S16}, MI.getOperand(1).getReg());
506-
auto Op2 = B.buildUnmerge({SgprRB, S16}, MI.getOperand(2).getReg());
507-
508-
// Perform scalar additions on S16 values
509-
Register Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S16}, {Op1.getReg(0), Op2.getReg(0)}).getReg(0);
510-
Register Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S16}, {Op1.getReg(1), Op2.getReg(1)}).getReg(0);
511-
512-
// Pack the results back into V2S16
513-
B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {Lo, Hi});
504+
auto CastOp1 = B.buildBitcast(SgprRB_S32, MI.getOperand(1).getReg());
505+
auto CastOp2 = B.buildBitcast(SgprRB_S32, MI.getOperand(2).getReg());
506+
auto NewInstr = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {CastOp1, CastOp2});
507+
B.buildBitcast({SgprRB, V2S16}, NewInstr.getReg(0));
514508
MI.eraseFromParent();
515509
}
516510

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,11 @@ body: |
1414
; CHECK-NEXT: {{ $}}
1515
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
1616
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
17-
; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
18-
; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
19-
; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s16) = G_ADD [[UV]], [[UV2]]
20-
; CHECK-NEXT: [[ADD1:%[0-9]+]]:sgpr(s16) = G_ADD [[UV1]], [[UV3]]
21-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR [[ADD]](s16), [[ADD1]](s16)
22-
; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s16>)
17+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
18+
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
19+
; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[BITCAST]], [[BITCAST1]]
20+
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<2 x s16>) = G_BITCAST [[ADD]](s32)
21+
; CHECK-NEXT: S_ENDPGM 0, implicit %2:sgpr(<2 x s16>)
2322
%0:_(<2 x s16>) = COPY $sgpr0
2423
%1:_(<2 x s16>) = COPY $sgpr1
2524
%2:_(<2 x s16>) = G_ADD %0, %1

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.v2s16.mir

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,11 @@ body: |
1414
; CHECK-NEXT: {{ $}}
1515
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
1616
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
17-
; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
18-
; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
19-
; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s16) = G_SUB [[UV]], [[UV2]]
20-
; CHECK-NEXT: [[SUB1:%[0-9]+]]:sgpr(s16) = G_SUB [[UV1]], [[UV3]]
21-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR [[SUB]](s16), [[SUB1]](s16)
22-
; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s16>)
17+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
18+
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
19+
; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[BITCAST]], [[BITCAST1]]
20+
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<2 x s16>) = G_BITCAST [[SUB]](s32)
21+
; CHECK-NEXT: S_ENDPGM 0, implicit %2:sgpr(<2 x s16>)
2322
%0:_(<2 x s16>) = COPY $sgpr0
2423
%1:_(<2 x s16>) = COPY $sgpr1
2524
%2:_(<2 x s16>) = G_SUB %0, %1

0 commit comments

Comments
 (0)