Skip to content

Commit 5d1f75d

Browse files
committed
Unpack and zext instead of bitcast for v2s16
1 parent 08d51b9 commit 5d1f75d

File tree

3 files changed

+36
-15
lines changed

3 files changed

+36
-15
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -501,11 +501,22 @@ void RegBankLegalizeHelper::lowerUnpackMinMax(MachineInstr &MI) {
501501
}
502502

503503
void RegBankLegalizeHelper::lowerScalarizeV2S16(MachineInstr &MI) {
504-
auto CastOp1 = B.buildBitcast(SgprRB_S32, MI.getOperand(1).getReg());
505-
auto CastOp2 = B.buildBitcast(SgprRB_S32, MI.getOperand(2).getReg());
506-
auto NewInstr =
507-
B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {CastOp1, CastOp2});
508-
B.buildBitcast({SgprRB, V2S16}, NewInstr.getReg(0));
504+
auto Op1 = B.buildUnmerge({SgprRB, S16}, MI.getOperand(1).getReg());
505+
auto Hi1 = Op1.getReg(0);
506+
auto Lo1 = Op1.getReg(1);
507+
auto Op2 = B.buildUnmerge({SgprRB, S16}, MI.getOperand(2).getReg());
508+
auto Hi2 = Op2.getReg(0);
509+
auto Lo2 = Op2.getReg(1);
510+
511+
auto CastHi1 = B.buildZExt(SgprRB_S32, Hi1);
512+
auto CastLo1 = B.buildZExt(SgprRB_S32, Lo1);
513+
auto CastHi2 = B.buildZExt(SgprRB_S32, Hi2);
514+
auto CastLo2 = B.buildZExt(SgprRB_S32, Lo2);
515+
516+
auto ResHi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {CastHi1, CastHi2});
517+
auto ResLo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {CastLo1, CastLo2});
518+
519+
B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {ResHi.getReg(0), ResLo.getReg(0)});
509520
MI.eraseFromParent();
510521
}
511522

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,16 @@ body: |
1313
; CHECK-NEXT: {{ $}}
1414
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
1515
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
16-
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
17-
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
18-
; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[BITCAST]], [[BITCAST1]]
19-
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<2 x s16>) = G_BITCAST [[ADD]](s32)
20-
; CHECK-NEXT: S_ENDPGM 0, implicit %2:sgpr(<2 x s16>)
16+
; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
17+
; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
18+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[UV]](s16)
19+
; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[UV1]](s16)
20+
; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:sgpr(s32) = G_ZEXT [[UV2]](s16)
21+
; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:sgpr(s32) = G_ZEXT [[UV3]](s16)
22+
; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[ZEXT]], [[ZEXT2]]
23+
; CHECK-NEXT: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[ZEXT1]], [[ZEXT3]]
24+
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ADD]](s32), [[ADD1]](s32)
25+
; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>)
2126
%0:_(<2 x s16>) = COPY $sgpr0
2227
%1:_(<2 x s16>) = COPY $sgpr1
2328
%2:_(<2 x s16>) = G_ADD %0, %1

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.v2s16.mir

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,16 @@ body: |
1414
; CHECK-NEXT: {{ $}}
1515
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
1616
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
17-
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
18-
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
19-
; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[BITCAST]], [[BITCAST1]]
20-
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:sgpr(<2 x s16>) = G_BITCAST [[SUB]](s32)
21-
; CHECK-NEXT: S_ENDPGM 0, implicit %2:sgpr(<2 x s16>)
17+
; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
18+
; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
19+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[UV]](s16)
20+
; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:sgpr(s32) = G_ZEXT [[UV1]](s16)
21+
; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:sgpr(s32) = G_ZEXT [[UV2]](s16)
22+
; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:sgpr(s32) = G_ZEXT [[UV3]](s16)
23+
; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[ZEXT]], [[ZEXT2]]
24+
; CHECK-NEXT: [[SUB1:%[0-9]+]]:sgpr(s32) = G_SUB [[ZEXT1]], [[ZEXT3]]
25+
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB]](s32), [[SUB1]](s32)
26+
; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>)
2227
%0:_(<2 x s16>) = COPY $sgpr0
2328
%1:_(<2 x s16>) = COPY $sgpr1
2429
%2:_(<2 x s16>) = G_SUB %0, %1

0 commit comments

Comments
 (0)