Skip to content

Commit 557aff4

Browse files
committed
PPCRec: Implement PSQ scaling
1 parent de54241 commit 557aff4

File tree

4 files changed

+85
-17
lines changed

4 files changed

+85
-17
lines changed

src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,25 @@ void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction
241241
x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext, regFpr, regGpr);
242242
return;
243243
}
244+
else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
245+
{
246+
cemu_assert_debug(imlInstruction->op_fpr_r_r.regR.GetRegFormat() == IMLRegFormat::F64); // assuming target is always F64 for now
247+
cemu_assert_debug(imlInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::I32); // supporting only 32bit floats as input for now
248+
// exact operation depends on size of types. Floats are automatically promoted to double if the target is F64
249+
uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR);
250+
if (imlInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::I32)
251+
{
252+
uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA);
253+
x64Gen_movq_xmmReg_reg64(x64GenContext, regFpr, regGpr); // using reg32 as reg64 param here is ok. We'll refactor later
254+
// float to double
255+
x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regFpr, regFpr);
256+
}
257+
else
258+
{
259+
cemu_assert_unimplemented();
260+
}
261+
return;
262+
}
244263

245264
uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR);
246265
uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA);

src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
277277

278278
}
279279
else if (operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT ||
280-
operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT)
280+
operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT ||
281+
operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT)
281282
{
282283
registersUsed->writtenGPR1 = op_fpr_r_r.regR;
283284
registersUsed->readGPR1 = op_fpr_r_r.regA;

src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,9 @@ enum
143143
PPCREC_IML_OP_FPR_INT_TO_FLOAT, // convert integer value in gpr to floating point value in fpr
144144
PPCREC_IML_OP_FPR_FLOAT_TO_INT, // convert floating point value in fpr to integer value in gpr
145145

146+
// Bitcast (FPR_R_R)
147+
PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT,
148+
146149
// R_R_R + R_R_S32
147150
PPCREC_IML_OP_ADD, // also R_R_R_CARRY
148151
PPCREC_IML_OP_SUB,

src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp

Lines changed: 61 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -744,7 +744,21 @@ void PPCRecompilerImlGen_ClampInteger(ppcImlGenContext_t* ppcImlGenContext, IMLR
744744
);
745745
}
746746

747-
void PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext_t* ppcImlGenContext, Espresso::PSQ_LOAD_TYPE loadType, bool readPS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1)
747+
void PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext_t* ppcImlGenContext, IMLReg gqrRegister, IMLReg fprRegScaleOut, bool isLoad)
748+
{
749+
IMLReg gprTmp2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2);
750+
// extract scale factor and sign extend it
751+
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, gprTmp2, gqrRegister, 32 - ((isLoad ? 24 : 8)+7));
752+
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, gprTmp2, gprTmp2, (32-23)-7);
753+
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, gprTmp2, gprTmp2, 0x1FF<<23);
754+
if (isLoad)
755+
ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NEG, gprTmp2, gprTmp2);
756+
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprTmp2, gprTmp2, 0x7F<<23);
757+
// gprTmp2 now holds the scale float bits, bitcast to float
758+
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT, fprRegScaleOut, gprTmp2);
759+
}
760+
761+
void PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, Espresso::PSQ_LOAD_TYPE loadType, bool readPS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1)
748762
{
749763
if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_F32)
750764
{
@@ -756,26 +770,42 @@ void PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext_t* ppcImlGenContext, E
756770
}
757771
if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_U16 || loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S16)
758772
{
773+
// get scale factor
774+
IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex);
775+
IMLReg fprScaleReg = _GetFPRTemp(ppcImlGenContext, 2);
776+
PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext, gqrRegister, fprScaleReg, true);
777+
759778
bool isSigned = (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S16);
760779
IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
761780
ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm, 16, isSigned, true);
762781
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS0, gprTmp);
782+
783+
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDPS0, fprDPS0, fprScaleReg);
784+
763785
if(readPS1)
764786
{
765787
ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm + 2, 16, isSigned, true);
766788
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS1, gprTmp);
789+
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDPS1, fprDPS1, fprScaleReg);
767790
}
768791
}
769792
else if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_U8 || loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S8)
770793
{
794+
// get scale factor
795+
IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex);
796+
IMLReg fprScaleReg = _GetFPRTemp(ppcImlGenContext, 2);
797+
PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext, gqrRegister, fprScaleReg, true);
798+
771799
bool isSigned = (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S8);
772800
IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
773801
ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm, 8, isSigned, true);
774802
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS0, gprTmp);
803+
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDPS0, fprDPS0, fprScaleReg);
775804
if(readPS1)
776805
{
777806
ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm + 1, 8, isSigned, true);
778807
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS1, gprTmp);
808+
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDPS1, fprDPS1, fprScaleReg);
779809
}
780810
}
781811
}
@@ -812,14 +842,15 @@ bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
812842
IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex);
813843
IMLReg loadTypeReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
814844
// extract the load type from the GQR register
815-
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, gqrRegister, 0x7);
845+
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, loadTypeReg, gqrRegister, 16);
846+
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, loadTypeReg, 0x7);
816847
IMLSegment* caseSegment[6];
817848
sint32 compareValues[6] = {0, 4, 5, 6, 7};
818849
PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5, 0);
819850
for (sint32 i=0; i<5; i++)
820851
{
821852
IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i]
822-
PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, static_cast<Espresso::PSQ_LOAD_TYPE>(compareValues[i]), readPS1, gprA, imm, fprDPS0, fprDPS1);
853+
PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, gqrIndex, static_cast<Espresso::PSQ_LOAD_TYPE>(compareValues[i]), readPS1, gprA, imm, fprDPS0, fprDPS1);
823854
// create the case jump instructions here because we need to add it last
824855
caseSegment[i]->AppendInstruction()->make_jump();
825856
}
@@ -839,11 +870,11 @@ bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
839870
return false;
840871
}
841872

842-
PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, type, readPS1, gprA, imm, fprDPS0, fprDPS1);
873+
PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, gqrIndex, type, readPS1, gprA, imm, fprDPS0, fprDPS1);
843874
return true;
844875
}
845876

846-
void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext, Espresso::PSQ_LOAD_TYPE storeType, bool storePS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1)
877+
void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, Espresso::PSQ_LOAD_TYPE storeType, bool storePS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1)
847878
{
848879
cemu_assert_debug(!storePS1 || fprDPS1.IsValid());
849880
if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_F32)
@@ -856,19 +887,27 @@ void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext,
856887
}
857888
else if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_U16 || storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S16)
858889
{
890+
// get scale factor
891+
IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex);
892+
IMLReg fprScaleReg = _GetFPRTemp(ppcImlGenContext, 2);
893+
PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext, gqrRegister, fprScaleReg, false);
894+
859895
bool isSigned = (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S16);
896+
IMLReg fprTmp = _GetFPRTemp(ppcImlGenContext, 0);
897+
860898
IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
861-
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS0);
862-
// todo - scaling
899+
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp, fprDPS0, fprScaleReg);
900+
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprTmp);
901+
863902
if (isSigned)
864903
PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -32768, 32767);
865904
else
866905
PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 65535);
867906
ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm, 16, true);
868907
if(storePS1)
869908
{
870-
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS1);
871-
// todo - scaling
909+
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp, fprDPS1, fprScaleReg);
910+
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprTmp);
872911
if (isSigned)
873912
PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -32768, 32767);
874913
else
@@ -878,18 +917,25 @@ void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext,
878917
}
879918
else if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_U8 || storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S8)
880919
{
920+
// get scale factor
921+
IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex);
922+
IMLReg fprScaleReg = _GetFPRTemp(ppcImlGenContext, 2);
923+
PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext, gqrRegister, fprScaleReg, false);
924+
881925
bool isSigned = (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S8);
926+
IMLReg fprTmp = _GetFPRTemp(ppcImlGenContext, 0);
882927
IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
883-
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS0);
928+
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp, fprDPS0, fprScaleReg);
929+
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprTmp);
884930
if (isSigned)
885931
PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -128, 127);
886932
else
887933
PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 255);
888934
ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm, 8, true);
889935
if(storePS1)
890936
{
891-
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprDPS1);
892-
// todo - scaling
937+
ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp, fprDPS1, fprScaleReg);
938+
ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprTmp);
893939
if (isSigned)
894940
PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -128, 127);
895941
else
@@ -928,16 +974,15 @@ bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
928974
IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex);
929975
IMLReg loadTypeReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0);
930976
// extract the load type from the GQR register
931-
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, loadTypeReg, gqrRegister, 16);
932-
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, loadTypeReg, 0x7);
977+
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, gqrRegister, 0x7);
933978

934979
IMLSegment* caseSegment[5];
935980
sint32 compareValues[5] = {0, 4, 5, 6, 7};
936981
PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5, 0);
937982
for (sint32 i=0; i<5; i++)
938983
{
939984
IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i]
940-
PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, static_cast<Espresso::PSQ_LOAD_TYPE>(compareValues[i]), storePS1, gprA, imm, fprDPS0, fprDPS1);
985+
PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, gqrIndex, static_cast<Espresso::PSQ_LOAD_TYPE>(compareValues[i]), storePS1, gprA, imm, fprDPS0, fprDPS1);
941986
ppcImlGenContext->emitInst().make_jump(); // finalize case
942987
}
943988
return true;
@@ -954,7 +999,7 @@ bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
954999
return false;
9551000
}
9561001

957-
PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, type, storePS1, gprA, imm, fprDPS0, fprDPS1);
1002+
PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, gqrIndex, type, storePS1, gprA, imm, fprDPS0, fprDPS1);
9581003
return true;
9591004
}
9601005

0 commit comments

Comments
 (0)