diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index d6acf9e081b9f..5caf23e41c16a 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1068,6 +1068,16 @@ void SIFoldOperandsImpl::foldOperand( if (MovOp == AMDGPU::COPY) return; + // Fold if the destination register class of the MOV instruction (ResRC) + // is a superclass of (or equal to) the destination register class of the + // COPY (DestRC). If this condition fails, folding would be illegal. + const MCInstrDesc &MovDesc = TII->get(MovOp); + assert(MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1); + const TargetRegisterClass *ResRC = + TRI->getRegClass(MovDesc.operands()[0].RegClass); + if (!DestRC->hasSuperClassEq(ResRC)) + return; + MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin(); MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end(); while (ImpOpI != ImpOpE) { diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir index 300bae7551ca5..706c0d8178d70 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir @@ -202,52 +202,70 @@ body: | ... -# FIXME: Register class restrictions of av register not respected, -# issue 130020 - -# --- -# name: s_mov_b32_inlineimm_copy_s_to_av_32 -# tracksRegLiveness: true -# body: | -# bb.0: -# %0:sreg_32 = S_MOV_B32 32 -# %1:av_32 = COPY %0 -# $agpr0 = COPY %1 -# S_ENDPGM 0 - -# ... - -# --- -# name: v_mov_b32_inlineimm_copy_v_to_av_32 -# tracksRegLiveness: true -# body: | -# bb.0: -# %0:vgpr_32 = V_MOV_B32_e32 32, implicit $exec -# %1:av_32 = COPY %0 -# $agpr0 = COPY %1 -# S_ENDPGM 0 -# ... - -# --- -# name: s_mov_b32_imm_literal_copy_s_to_av_32 -# tracksRegLiveness: true -# body: | -# bb.0: -# %0:sreg_32 = S_MOV_B32 999 -# %1:av_32 = COPY %0 -# $agpr0 = COPY %1 -# S_ENDPGM 0 - -# ... - -# --- -# name: v_mov_b32_imm_literal_copy_v_to_av_32 -# tracksRegLiveness: true -# body: | -# bb.0: -# %0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec -# %1:av_32 = COPY %0 -# $agpr0 = COPY %1 -# S_ENDPGM 0 - -# ... +--- +name: s_mov_b32_inlineimm_copy_s_to_av_32 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: s_mov_b32_inlineimm_copy_s_to_av_32 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32 + ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: $agpr0 = COPY [[COPY]] + ; GCN-NEXT: S_ENDPGM 0 + %0:sreg_32 = S_MOV_B32 32 + %1:av_32 = COPY %0 + $agpr0 = COPY %1 + S_ENDPGM 0 + +... + +--- +name: v_mov_b32_inlineimm_copy_v_to_av_32 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: v_mov_b32_inlineimm_copy_v_to_av_32 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]] + ; GCN-NEXT: $agpr0 = COPY [[COPY]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vgpr_32 = V_MOV_B32_e32 32, implicit $exec + %1:av_32 = COPY %0 + $agpr0 = COPY %1 + S_ENDPGM 0 + +... + +--- +name: s_mov_b32_imm_literal_copy_s_to_av_32 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: s_mov_b32_imm_literal_copy_s_to_av_32 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999 + ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]] + ; GCN-NEXT: $agpr0 = COPY [[COPY]] + ; GCN-NEXT: S_ENDPGM 0 + %0:sreg_32 = S_MOV_B32 999 + %1:av_32 = COPY %0 + $agpr0 = COPY %1 + S_ENDPGM 0 + +... + +--- +name: v_mov_b32_imm_literal_copy_v_to_av_32 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_av_32 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]] + ; GCN-NEXT: $agpr0 = COPY [[COPY]] + ; GCN-NEXT: S_ENDPGM 0 + %0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec + %1:av_32 = COPY %0 + $agpr0 = COPY %1 + S_ENDPGM 0 + +...