Skip to content

Commit 94cab66

Browse files
committed
Add run lines to confirm bswap is matched to v_perm_b32
1 parent 34dd166 commit 94cab66

File tree

2 files changed

+22
-3
lines changed

2 files changed

+22
-3
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2252,7 +2252,7 @@ multiclass VOP3_Real_BITOP3_gfx9<bits<10> op, string AsmName, bit isSingle = 0>
22522252

22532253
// Instructions such as v_alignbyte_b32 allows op_sel in gfx9, but not in vi.
22542254
// The following is created to support that.
2255-
multiclass VOP3OpSel_Real_gfx9_with_names<bits<10> op, string opName, string AsmName> {
2255+
multiclass VOP3OpSel_Real_gfx9_with_name<bits<10> op, string opName, string AsmName> {
22562256
defvar psName = opName#"_e64";
22572257
def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(psName), SIEncodingFamily.VI>, // note: encoding family is VI
22582258
VOP3OpSel_gfx9 <op, !cast<VOP3_Pseudo>(psName).Pfl> {
@@ -2328,8 +2328,8 @@ defm V_INTERP_P2_LEGACY_F16 : VOP3Interp_F16_Real_gfx9 <0x276, "V_INTERP_P2_F16"
23282328
defm V_MAD_LEGACY_U16 : VOP3_F16_Real_gfx9 <0x1eb, "V_MAD_U16", "v_mad_legacy_u16">;
23292329
defm V_MAD_LEGACY_I16 : VOP3_F16_Real_gfx9 <0x1ec, "V_MAD_I16", "v_mad_legacy_i16">;
23302330

2331-
defm V_ALIGNBIT_B32_opsel : VOP3OpSel_Real_gfx9_with_names <0x1ce, "V_ALIGNBIT_B32_opsel", "v_alignbit_b32">;
2332-
defm V_ALIGNBYTE_B32_opsel : VOP3OpSel_Real_gfx9_with_names <0x1cf, "V_ALIGNBYTE_B32_opsel", "v_alignbyte_b32">;
2331+
defm V_ALIGNBIT_B32_opsel : VOP3OpSel_Real_gfx9_with_name <0x1ce, "V_ALIGNBIT_B32_opsel", "v_alignbit_b32">;
2332+
defm V_ALIGNBYTE_B32_opsel : VOP3OpSel_Real_gfx9_with_name <0x1cf, "V_ALIGNBYTE_B32_opsel", "v_alignbyte_b32">;
23332333

23342334
defm V_MAD_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x203, "v_mad_f16">;
23352335
defm V_MAD_U16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">;

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-bswap.mir

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
33
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s
4+
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
5+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
46

57
---
68
name: bswap_i32_vv
@@ -19,13 +21,30 @@ body: |
1921
; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16711935
2022
; GFX7-NEXT: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 [[S_MOV_B32_]], [[V_ALIGNBIT_B32_e64_1]], [[V_ALIGNBIT_B32_e64_]], implicit $exec
2123
; GFX7-NEXT: S_ENDPGM 0, implicit [[V_BFI_B32_e64_]]
24+
;
2225
; GFX8-LABEL: name: bswap_i32_vv
2326
; GFX8: liveins: $vgpr0
2427
; GFX8-NEXT: {{ $}}
2528
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2629
; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 66051
2730
; GFX8-NEXT: [[V_PERM_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 0, [[COPY]], [[S_MOV_B32_]], implicit $exec
2831
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_PERM_B32_e64_]]
32+
;
33+
; GFX9-LABEL: name: bswap_i32_vv
34+
; GFX9: liveins: $vgpr0
35+
; GFX9-NEXT: {{ $}}
36+
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
37+
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 66051
38+
; GFX9-NEXT: [[V_PERM_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 0, [[COPY]], [[S_MOV_B32_]], implicit $exec
39+
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_PERM_B32_e64_]]
40+
;
41+
; GFX10-LABEL: name: bswap_i32_vv
42+
; GFX10: liveins: $vgpr0
43+
; GFX10-NEXT: {{ $}}
44+
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
45+
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 66051
46+
; GFX10-NEXT: [[V_PERM_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 0, [[COPY]], [[S_MOV_B32_]], implicit $exec
47+
; GFX10-NEXT: S_ENDPGM 0, implicit [[V_PERM_B32_e64_]]
2948
%0:vgpr(s32) = COPY $vgpr0
3049
%1:vgpr(s32) = G_BSWAP %0
3150
S_ENDPGM 0, implicit %1

0 commit comments

Comments
 (0)