@@ -3915,17 +3915,6 @@ void Optimizer::HWWorkaround() {
39153915              LSC_FENCE_OP_NONE)
39163916        bb->insertBefore (ii, inst->cloneInst ());
39173917
3918-       //  When destination is an address register the following apply:
3919-       //  Destination must not span across the lower to upper 8 dword
3920-       //  boundary of the register.
3921-       //  Fix this restriction after RA instead of HWConformity just because
3922-       //  RA(spill/fill, A0 save/restore) would generate such instructions.
3923-       if  (inst->getExecSize () == g4::SIMD32 && inst->getDst () &&
3924-           inst->getDst ()->isDirectA0 ()) {
3925-         HWConformity hwConf (builder, kernel);
3926-         hwConf.evenlySplitInst (ii, bb, /* checkOverlap*/ false );
3927-       }
3928- 
39293918      ii++;
39303919    }
39313920  }
@@ -3980,8 +3969,27 @@ void Optimizer::HWWorkaround() {
39803969  if  (builder.hasFPU0ReadSuppressionIssue ()) {
39813970    fixReadSuppressioninFPU0 ();
39823971  }
3972+ 
3973+   if  (builder.supportNativeSIMD32 ())
3974+     fixDirectAddrBoundOnDst ();
39833975}
39843976
3977+ //  When destination is an address register the following apply:
3978+ //  Destination must not span across the lower to upper 8 dword
3979+ //  boundary of the register.
3980+ //  Fix this restriction after RA instead of HWConformity just because
3981+ //  RA(spill/fill, A0 save/restore) would generate such instructions.
3982+ void  Optimizer::fixDirectAddrBoundOnDst () {
3983+   HWConformity hwConf (builder, kernel);
3984+   for  (auto  bb : kernel.fg ) {
3985+     for  (auto  it = bb->begin (), ie = bb->end (); it != ie; ++it) {
3986+       G4_INST *inst = *it;
3987+       if  (inst->getExecSize () == g4::SIMD32 && inst->getDst () &&
3988+           inst->getDst ()->isDirectA0 ())
3989+         hwConf.evenlySplitInst (it, bb, /* checkOverlap*/ false );
3990+     }
3991+   }
3992+ }
39853993
39863994static  bool  retires (G4_Operand *Opnd, G4_INST *SI) {
39873995  vASSERT (SI);
0 commit comments