@@ -609,6 +609,29 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos
609609 } else if v .rematerializeable () {
610610 // Rematerialize instead of loading from the spill location.
611611 c = v .copyIntoWithXPos (s .curBlock , pos )
612+ // We need to consider its output mask and potentially issue a Copy
613+ // if there are register mask conflicts.
614+ // This currently happens for the SIMD package only between GP and FP
615+ // register. Because Intel's vector extension can put integer value into
616+ // FP, which is seen as a vector. Example instruction: VPSLL[BWDQ]
617+ // Because GP and FP masks do not overlap, mask & outputMask == 0
618+ // detects this situation thoroughly.
619+ sourceMask := s .regspec (c ).outputs [0 ].regs
620+ if mask & sourceMask == 0 && ! onWasmStack {
621+ s .setOrig (c , v )
622+ s .assignReg (s .allocReg (sourceMask , v ), v , c )
623+ // v.Type for the new OpCopy is likely wrong and it might delay the problem
624+ // until ssa to asm lowering, which might need the types to generate the right
625+ // assembly for OpCopy. For Intel's GP to FP move, it happens to be that
626+ // MOV instruction has such a variant so it happens to be right.
627+ // But it's unclear for other architectures or situations, and the problem
628+ // might be exposed when the assembler sees illegal instructions.
629+ // Right now make we still pick v.Type, because at least its size should be correct
630+ // for the rematerialization case the amd64 SIMD package exposed.
631+ // TODO: We might need to figure out a way to find the correct type or make
632+ // the asm lowering use reg info only for OpCopy.
633+ c = s .curBlock .NewValue1 (pos , OpCopy , v .Type , c )
634+ }
612635 } else {
613636 // Load v from its spill location.
614637 spill := s .makeSpill (v , s .curBlock )
0 commit comments