@@ -23,7 +23,7 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI
23
23
IMLInstruction* imlInstructionLoad = imlSegment->imlList .data () + imlIndexLoad;
24
24
if (imlInstructionLoad->op_storeLoad .flags2 .notExpanded )
25
25
return ;
26
-
26
+ boost::container::static_vector<sint32, 4 > trackedMoves; // only track up to 4 copies
27
27
IMLUsedRegisters registersUsed;
28
28
sint32 scanRangeEnd = std::min<sint32>(imlIndexLoad + 25 , imlSegment->imlList .size ()); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances)
29
29
bool foundMatch = false ;
@@ -54,8 +54,24 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI
54
54
continue ;
55
55
}
56
56
}
57
-
58
- // check if FPR is overwritten (we can actually ignore read operations?)
57
+ // if the FPR is copied then keep track of it. We can expand the copies instead of the original
58
+ if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R && imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN && imlInstruction->op_fpr_r_r .regA .GetRegID () == fprIndex)
59
+ {
60
+ if (imlInstruction->op_fpr_r_r .regR .GetRegID () == fprIndex)
61
+ {
62
+ // unexpected no-op
63
+ break ;
64
+ }
65
+ if (trackedMoves.size () >= trackedMoves.capacity ())
66
+ {
67
+ // we cant track any more moves, expand here
68
+ lastStore = i;
69
+ break ;
70
+ }
71
+ trackedMoves.push_back (i);
72
+ continue ;
73
+ }
74
+ // check if FPR is overwritten
59
75
imlInstruction->CheckRegisterUsage (®istersUsed);
60
76
if (registersUsed.writtenGPR1 .IsValidAndSameRegID (fprIndex) || registersUsed.writtenGPR2 .IsValidAndSameRegID (fprIndex))
61
77
break ;
@@ -71,6 +87,24 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI
71
87
72
88
if (foundMatch)
73
89
{
90
+ // insert expand instructions for each target register of a move
91
+ sint32 positionBias = 0 ;
92
+ for (auto & trackedMove : trackedMoves)
93
+ {
94
+ sint32 realPosition = trackedMove + positionBias;
95
+ IMLInstruction* imlMoveInstruction = imlSegment->imlList .data () + realPosition;
96
+ if (realPosition >= lastStore)
97
+ break ; // expand is inserted before this move
98
+ else
99
+ lastStore++;
100
+
101
+ cemu_assert_debug (imlMoveInstruction->type == PPCREC_IML_TYPE_FPR_R_R && imlMoveInstruction->op_fpr_r_r .regA .GetRegID () == fprIndex);
102
+ cemu_assert_debug (imlMoveInstruction->op_fpr_r_r .regA .GetRegFormat () == IMLRegFormat::F64);
103
+ auto dstReg = imlMoveInstruction->op_fpr_r_r .regR ;
104
+ IMLInstruction* newExpand = PPCRecompiler_insertInstruction (imlSegment, realPosition+1 ); // one after the move
105
+ newExpand->make_fpr_r (PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, dstReg);
106
+ positionBias++;
107
+ }
74
108
// insert expand instruction after store
75
109
IMLInstruction* newExpand = PPCRecompiler_insertInstruction (imlSegment, lastStore);
76
110
newExpand->make_fpr_r (PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, _FPRRegFromID (fprIndex));
@@ -90,23 +124,21 @@ void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcI
90
124
*/
91
125
void IMLOptimizer_OptimizeDirectFloatCopies (ppcImlGenContext_t* ppcImlGenContext)
92
126
{
93
- cemuLog_logDebugOnce (LogType::Force, " IMLOptimizer_OptimizeDirectFloatCopies(): Currently disabled\n " );
94
- return ;
95
- // for (IMLSegment* segIt : ppcImlGenContext->segmentList2)
96
- // {
97
- // for (sint32 i = 0; i < segIt->imlList.size(); i++)
98
- // {
99
- // IMLInstruction* imlInstruction = segIt->imlList.data() + i;
100
- // if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1)
101
- // {
102
- // PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
103
- // }
104
- // else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1)
105
- // {
106
- // PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData);
107
- // }
108
- // }
109
- // }
127
+ for (IMLSegment* segIt : ppcImlGenContext->segmentList2 )
128
+ {
129
+ for (sint32 i = 0 ; i < segIt->imlList .size (); i++)
130
+ {
131
+ IMLInstruction* imlInstruction = segIt->imlList .data () + i;
132
+ if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad .mode == PPCREC_FPR_LD_MODE_SINGLE)
133
+ {
134
+ PPCRecompiler_optimizeDirectFloatCopiesScanForward (ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad .registerData );
135
+ }
136
+ else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad .mode == PPCREC_FPR_LD_MODE_SINGLE)
137
+ {
138
+ PPCRecompiler_optimizeDirectFloatCopiesScanForward (ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad .registerData );
139
+ }
140
+ }
141
+ }
110
142
}
111
143
112
144
void PPCRecompiler_optimizeDirectIntegerCopiesScanForward (ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg gprReg)
0 commit comments