@@ -5139,13 +5139,45 @@ bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5139
5139
5140
5140
bool AMDGPUAsmParser::validateVGPRAlign (const MCInst &Inst) const {
5141
5141
auto FB = getFeatureBits ();
5142
+ if (!FB[AMDGPU::FeatureGFX90AInsts] && !FB[AMDGPU::FeatureGFX1250Insts])
5143
+ return true ;
5144
+
5142
5145
unsigned Opc = Inst.getOpcode ();
5146
+ const MCRegisterInfo *MRI = getMRI ();
5143
5147
// DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5144
5148
// unaligned VGPR. All others only allow even aligned VGPRs.
5145
- if (!( FB[AMDGPU::FeatureGFX90AInsts]) || Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5149
+ if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5146
5150
return true ;
5147
5151
5148
- const MCRegisterInfo *MRI = getMRI ();
5152
+ if (FB[AMDGPU::FeatureGFX1250Insts]) {
5153
+ switch (Opc) {
5154
+ default :
5155
+ break ;
5156
+ case AMDGPU::DS_LOAD_TR6_B96:
5157
+ case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5158
+ // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5159
+ // allows unaligned VGPR. All others only allow even aligned VGPRs.
5160
+ return true ;
5161
+ case AMDGPU::GLOBAL_LOAD_TR6_B96:
5162
+ case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5163
+ // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5164
+ // allows unaligned VGPR for vdst, but other operands still only allow
5165
+ // even aligned VGPRs.
5166
+ int VAddrIdx = AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::vaddr);
5167
+ if (VAddrIdx != -1 ) {
5168
+ const MCOperand &Op = Inst.getOperand (VAddrIdx);
5169
+ MCRegister Sub = MRI->getSubReg (Op.getReg (), AMDGPU::sub0);
5170
+ if ((Sub - AMDGPU::VGPR0) & 1 )
5171
+ return false ;
5172
+ }
5173
+ return true ;
5174
+ }
5175
+ case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5176
+ case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5177
+ return true ;
5178
+ }
5179
+ }
5180
+
5149
5181
const MCRegisterClass &VGPR32 = MRI->getRegClass (AMDGPU::VGPR_32RegClassID);
5150
5182
const MCRegisterClass &AGPR32 = MRI->getRegClass (AMDGPU::AGPR_32RegClassID);
5151
5183
for (unsigned I = 0 , E = Inst.getNumOperands (); I != E; ++I) {
0 commit comments