13
13
#include " Cafe/HW/Latte/Core/LattePM4.h"
14
14
15
15
#include " Cafe/OS/libs/coreinit/coreinit_Time.h"
16
+ #include " Cafe/OS/libs/TCL/TCL.h" // TCL currently handles the GPU command ringbuffer
16
17
17
18
#include " Cafe/CafeSystem.h"
18
19
@@ -28,11 +29,6 @@ typedef uint32be* LatteCMDPtr;
28
29
#define LatteReadCMD () ((uint32)*(cmd++))
29
30
#define LatteSkipCMD (_nWords ) cmd += (_nWords)
30
31
31
- uint8* gxRingBufferReadPtr; // currently active read pointer (gx2 ring buffer or display list)
32
- uint8* gx2CPParserDisplayListPtr;
33
- uint8* gx2CPParserDisplayListStart; // used for debugging
34
- uint8* gx2CPParserDisplayListEnd;
35
-
36
32
void LatteThread_HandleOSScreen ();
37
33
38
34
void LatteThread_Exit ();
@@ -155,16 +151,12 @@ void LatteCP_signalEnterWait()
155
151
*/
156
152
uint32 LatteCP_readU32Deprc ()
157
153
{
158
- uint32 v;
159
- uint8* gxRingBufferWritePtr;
160
- sint32 readDistance;
161
154
// no display list active
162
155
while (true )
163
156
{
164
- gxRingBufferWritePtr = gx2WriteGatherPipe.writeGatherPtrGxBuffer [GX2::sGX2MainCoreIndex ];
165
- readDistance = (sint32)(gxRingBufferWritePtr - gxRingBufferReadPtr);
166
- if (readDistance != 0 )
167
- break ;
157
+ uint32 cmdWord;
158
+ if ( TCL::TCLGPUReadRBWord (cmdWord) )
159
+ return cmdWord;
168
160
169
161
g_renderer->NotifyLatteCommandProcessorIdle (); // let the renderer know in case it wants to flush any commands
170
162
performanceMonitor.gpuTime_idleTime .beginMeasuring ();
@@ -175,56 +167,8 @@ uint32 LatteCP_readU32Deprc()
175
167
}
176
168
LatteThread_HandleOSScreen (); // check if new frame was presented via OSScreen API
177
169
178
- readDistance = (sint32)(gxRingBufferWritePtr - gxRingBufferReadPtr);
179
- if (readDistance != 0 )
180
- break ;
181
- if (Latte_GetStopSignal ())
182
- LatteThread_Exit ();
183
-
184
- // still no command data available, do some other tasks
185
- LatteTiming_HandleTimedVsync ();
186
- LatteAsyncCommands_checkAndExecute ();
187
- std::this_thread::yield ();
188
- performanceMonitor.gpuTime_idleTime .endMeasuring ();
189
- }
190
- v = *(uint32*)gxRingBufferReadPtr;
191
- gxRingBufferReadPtr += 4 ;
192
- #ifdef CEMU_DEBUG_ASSERT
193
- if (v == 0xcdcdcdcd )
194
- assert_dbg ();
195
- #endif
196
- v = _swapEndianU32 (v);
197
- return v;
198
- }
199
-
200
- void LatteCP_waitForNWords (uint32 numWords)
201
- {
202
- uint8* gxRingBufferWritePtr;
203
- sint32 readDistance;
204
- bool isFlushed = false ;
205
- sint32 waitDistance = numWords * sizeof (uint32be);
206
- // no display list active
207
- while (true )
208
- {
209
- gxRingBufferWritePtr = gx2WriteGatherPipe.writeGatherPtrGxBuffer [GX2::sGX2MainCoreIndex ];
210
- readDistance = (sint32)(gxRingBufferWritePtr - gxRingBufferReadPtr);
211
- if (readDistance < 0 )
212
- return ; // wrap around means there is at least one full command queued after this
213
- if (readDistance >= waitDistance)
214
- break ;
215
- g_renderer->NotifyLatteCommandProcessorIdle (); // let the renderer know in case it wants to flush any commands
216
- performanceMonitor.gpuTime_idleTime .beginMeasuring ();
217
- // no command data available, spin in a busy loop for a while then check again
218
- for (sint32 busy = 0 ; busy < 80 ; busy++)
219
- {
220
- _mm_pause ();
221
- }
222
- readDistance = (sint32)(gxRingBufferWritePtr - gxRingBufferReadPtr);
223
- if (readDistance < 0 )
224
- return ; // wrap around means there is at least one full command queued after this
225
- if (readDistance >= waitDistance)
226
- break ;
227
-
170
+ if ( TCL::TCLGPUReadRBWord (cmdWord) )
171
+ return cmdWord;
228
172
if (Latte_GetStopSignal ())
229
173
LatteThread_Exit ();
230
174
@@ -234,6 +178,7 @@ void LatteCP_waitForNWords(uint32 numWords)
234
178
std::this_thread::yield ();
235
179
performanceMonitor.gpuTime_idleTime .endMeasuring ();
236
180
}
181
+ UNREACHABLE;
237
182
}
238
183
239
184
template <uint32 readU32 ()>
@@ -270,21 +215,23 @@ void LatteCP_itIndirectBufferDepr(LatteCMDPtr cmd, uint32 nWords)
270
215
cemu_assert_debug (nWords == 3 );
271
216
uint32 physicalAddress = LatteReadCMD ();
272
217
uint32 physicalAddressHigh = LatteReadCMD (); // unused
273
- uint32 sizeInDWords = LatteReadCMD ();
274
- uint32 displayListSize = sizeInDWords * 4 ;
275
- DrawPassContext drawPassCtx;
218
+ uint32 sizeInU32s = LatteReadCMD ();
276
219
277
220
#ifdef LATTE_CP_LOGGING
278
221
if (GetAsyncKeyState (' A' ))
279
222
LatteCP_DebugPrintCmdBuffer (MEMPTR<uint32be>(physicalAddress), displayListSize);
280
223
#endif
281
224
282
- uint32be* buf = MEMPTR<uint32be>(physicalAddress).GetPtr ();
283
- drawPassCtx.PushCurrentCommandQueuePos (buf, buf, buf + sizeInDWords);
225
+ if (sizeInU32s > 0 )
226
+ {
227
+ DrawPassContext drawPassCtx;
228
+ uint32be* buf = MEMPTR<uint32be>(physicalAddress).GetPtr ();
229
+ drawPassCtx.PushCurrentCommandQueuePos (buf, buf, buf + sizeInU32s);
284
230
285
- LatteCP_processCommandBuffer (drawPassCtx);
286
- if (drawPassCtx.isWithinDrawPass ())
287
- drawPassCtx.endDrawPass ();
231
+ LatteCP_processCommandBuffer (drawPassCtx);
232
+ if (drawPassCtx.isWithinDrawPass ())
233
+ drawPassCtx.endDrawPass ();
234
+ }
288
235
}
289
236
290
237
// pushes the command buffer to the stack
@@ -294,11 +241,12 @@ void LatteCP_itIndirectBuffer(LatteCMDPtr cmd, uint32 nWords, DrawPassContext& d
294
241
uint32 physicalAddress = LatteReadCMD ();
295
242
uint32 physicalAddressHigh = LatteReadCMD (); // unused
296
243
uint32 sizeInDWords = LatteReadCMD ();
297
- uint32 displayListSize = sizeInDWords * 4 ;
298
- cemu_assert_debug (displayListSize >= 4 );
299
-
300
- uint32be* buf = MEMPTR<uint32be>(physicalAddress).GetPtr ();
301
- drawPassCtx.PushCurrentCommandQueuePos (buf, buf, buf + sizeInDWords);
244
+ if (sizeInDWords > 0 )
245
+ {
246
+ uint32 displayListSize = sizeInDWords * 4 ;
247
+ uint32be* buf = MEMPTR<uint32be>(physicalAddress).GetPtr ();
248
+ drawPassCtx.PushCurrentCommandQueuePos (buf, buf, buf + sizeInDWords);
249
+ }
302
250
}
303
251
304
252
LatteCMDPtr LatteCP_itStreamoutBufferUpdate (LatteCMDPtr cmd, uint32 nWords)
@@ -565,26 +513,55 @@ LatteCMDPtr LatteCP_itMemWrite(LatteCMDPtr cmd, uint32 nWords)
565
513
if (word1 == 0x40000 )
566
514
{
567
515
// write U32
568
- *memPtr = word2;
516
+ stdx::atomic_ref<uint32be> atomicRef (*memPtr);
517
+ atomicRef.store (word2);
569
518
}
570
519
else if (word1 == 0x00000 )
571
520
{
572
- // write U64 (as two U32)
573
- // note: The U32s are swapped
574
- memPtr[ 0 ] = word2 ;
575
- memPtr[ 1 ] = word3;
521
+ // write U64
522
+ // note: The U32s are swapped here, but needs verification. Also, it seems like the two U32 halves are written independently and the U64 as a whole is not atomic -> investiagte
523
+ stdx::atomic_ref<uint64be> atomicRef (*(uint64be*)memPtr) ;
524
+ atomicRef. store (((uint64le)word2 << 32 ) | word3) ;
576
525
}
577
526
else if (word1 == 0x20000 )
578
527
{
579
528
// write U64 (little endian)
580
- memPtr[ 0 ] = _swapEndianU32 (word2 );
581
- memPtr[ 1 ] = _swapEndianU32 ( word3);
529
+ stdx::atomic_ref<uint64le> atomicRef (*(uint64le*)memPtr );
530
+ atomicRef. store (((uint64le) word3 << 32 ) | word2 );
582
531
}
583
532
else
584
533
cemu_assert_unimplemented ();
585
534
return cmd;
586
535
}
587
536
537
+ LatteCMDPtr LatteCP_itEventWriteEOP (LatteCMDPtr cmd, uint32 nWords)
538
+ {
539
+ cemu_assert_debug (nWords == 5 );
540
+ uint32 word0 = LatteReadCMD ();
541
+ uint32 word1 = LatteReadCMD ();
542
+ uint32 word2 = LatteReadCMD ();
543
+ uint32 word3 = LatteReadCMD (); // value low bits
544
+ uint32 word4 = LatteReadCMD (); // value high bits
545
+
546
+ cemu_assert_debug (word2 == 0x40000000 || word2 == 0x42000000 );
547
+
548
+ if (word0 == 0x504 && (word2&0x40000000 )) // todo - figure out the flags
549
+ {
550
+ stdx::atomic_ref<uint64be> atomicRef (*(uint64be*)memory_getPointerFromPhysicalOffset (word1));
551
+ uint64 val = ((uint64)word4 << 32 ) | word3;
552
+ atomicRef.store (val);
553
+ }
554
+ else
555
+ { cemu_assert_unimplemented ();
556
+ }
557
+ bool triggerInterrupt = (word2 & 0x2000000 ) != 0 ;
558
+ if (triggerInterrupt)
559
+ {
560
+ // todo - timestamp interrupt
561
+ }
562
+ TCL::TCLGPUNotifyNewRetirementTimestamp ();
563
+ return cmd;
564
+ }
588
565
589
566
LatteCMDPtr LatteCP_itMemSemaphore (LatteCMDPtr cmd, uint32 nWords)
590
567
{
@@ -783,16 +760,6 @@ LatteCMDPtr LatteCP_itDrawImmediate(LatteCMDPtr cmd, uint32 nWords, DrawPassCont
783
760
784
761
drawPassCtx.executeDraw (count, false , _tempIndexArrayMPTR);
785
762
return cmd;
786
-
787
- }
788
-
789
- LatteCMDPtr LatteCP_itHLEFifoWrapAround (LatteCMDPtr cmd, uint32 nWords)
790
- {
791
- cemu_assert_debug (nWords == 1 );
792
- uint32 unused = LatteReadCMD ();
793
- gxRingBufferReadPtr = gx2WriteGatherPipe.gxRingBuffer ;
794
- cmd = (LatteCMDPtr)gxRingBufferReadPtr;
795
- return cmd;
796
763
}
797
764
798
765
LatteCMDPtr LatteCP_itHLESampleTimer (LatteCMDPtr cmd, uint32 nWords)
@@ -819,16 +786,6 @@ LatteCMDPtr LatteCP_itHLESpecialState(LatteCMDPtr cmd, uint32 nWords)
819
786
return cmd;
820
787
}
821
788
822
- LatteCMDPtr LatteCP_itHLESetRetirementTimestamp (LatteCMDPtr cmd, uint32 nWords)
823
- {
824
- cemu_assert_debug (nWords == 2 );
825
- uint32 timestampHigh = (uint32)LatteReadCMD ();
826
- uint32 timestampLow = (uint32)LatteReadCMD ();
827
- uint64 timestamp = ((uint64)timestampHigh << 32ULL ) | (uint64)timestampLow;
828
- GX2::__GX2NotifyNewRetirementTimestamp (timestamp);
829
- return cmd;
830
- }
831
-
832
789
LatteCMDPtr LatteCP_itHLEBeginOcclusionQuery (LatteCMDPtr cmd, uint32 nWords)
833
790
{
834
791
cemu_assert_debug (nWords == 1 );
@@ -1145,9 +1102,10 @@ void LatteCP_processCommandBuffer(DrawPassContext& drawPassCtx)
1145
1102
LatteCMDPtr cmd, cmdStart, cmdEnd;
1146
1103
if (!drawPassCtx.PopCurrentCommandQueuePos (cmd, cmdStart, cmdEnd))
1147
1104
break ;
1105
+ uint32 itHeader;
1148
1106
while (cmd < cmdEnd)
1149
1107
{
1150
- uint32 itHeader = LatteReadCMD ();
1108
+ itHeader = LatteReadCMD ();
1151
1109
uint32 itHeaderType = (itHeader >> 30 ) & 3 ;
1152
1110
if (itHeaderType == 3 )
1153
1111
{
@@ -1361,11 +1319,6 @@ void LatteCP_processCommandBuffer(DrawPassContext& drawPassCtx)
1361
1319
LatteCP_itHLEEndOcclusionQuery (cmdData, nWords);
1362
1320
break ;
1363
1321
}
1364
- case IT_HLE_SET_CB_RETIREMENT_TIMESTAMP:
1365
- {
1366
- LatteCP_itHLESetRetirementTimestamp (cmdData, nWords);
1367
- break ;
1368
- }
1369
1322
case IT_HLE_BOTTOM_OF_PIPE_CB:
1370
1323
{
1371
1324
LatteCP_itHLEBottomOfPipeCB (cmdData, nWords);
@@ -1421,6 +1374,7 @@ void LatteCP_processCommandBuffer(DrawPassContext& drawPassCtx)
1421
1374
void LatteCP_ProcessRingbuffer ()
1422
1375
{
1423
1376
sint32 timerRecheck = 0 ; // estimates how much CP processing time has elapsed based on the executed commands, if the value exceeds CP_TIMER_RECHECK then _handleTimers() is called
1377
+ uint32be tmpBuffer[128 ];
1424
1378
while (true )
1425
1379
{
1426
1380
uint32 itHeader = LatteCP_readU32Deprc ();
@@ -1429,10 +1383,13 @@ void LatteCP_ProcessRingbuffer()
1429
1383
{
1430
1384
uint32 itCode = (itHeader >> 8 ) & 0xFF ;
1431
1385
uint32 nWords = ((itHeader >> 16 ) & 0x3FFF ) + 1 ;
1432
- LatteCP_waitForNWords (nWords);
1433
- LatteCMDPtr cmd = (LatteCMDPtr)gxRingBufferReadPtr;
1434
- uint8* cmdEnd = gxRingBufferReadPtr + nWords * 4 ;
1435
- gxRingBufferReadPtr = cmdEnd;
1386
+ cemu_assert (nWords < 128 );
1387
+ for (sint32 i=0 ; i<nWords; i++)
1388
+ {
1389
+ uint32 word = LatteCP_readU32Deprc ();
1390
+ tmpBuffer[i] = word;
1391
+ }
1392
+ LatteCMDPtr cmd = (LatteCMDPtr)tmpBuffer;
1436
1393
switch (itCode)
1437
1394
{
1438
1395
case IT_SURFACE_SYNC:
@@ -1599,6 +1556,11 @@ void LatteCP_ProcessRingbuffer()
1599
1556
timerRecheck += CP_TIMER_RECHECK / 512 ;
1600
1557
break ;
1601
1558
}
1559
+ case IT_EVENT_WRITE_EOP:
1560
+ {
1561
+ LatteCP_itEventWriteEOP (cmd, nWords);
1562
+ break ;
1563
+ }
1602
1564
case IT_HLE_COPY_COLORBUFFER_TO_SCANBUFFER:
1603
1565
{
1604
1566
LatteCP_itHLECopyColorBufferToScanBuffer (cmd, nWords);
@@ -1637,12 +1599,6 @@ void LatteCP_ProcessRingbuffer()
1637
1599
timerRecheck += CP_TIMER_RECHECK / 128 ;
1638
1600
break ;
1639
1601
}
1640
- case IT_HLE_FIFO_WRAP_AROUND:
1641
- {
1642
- LatteCP_itHLEFifoWrapAround (cmd, nWords);
1643
- timerRecheck += CP_TIMER_RECHECK / 512 ;
1644
- break ;
1645
- }
1646
1602
case IT_HLE_SAMPLE_TIMER:
1647
1603
{
1648
1604
LatteCP_itHLESampleTimer (cmd, nWords);
@@ -1667,12 +1623,6 @@ void LatteCP_ProcessRingbuffer()
1667
1623
timerRecheck += CP_TIMER_RECHECK / 512 ;
1668
1624
break ;
1669
1625
}
1670
- case IT_HLE_SET_CB_RETIREMENT_TIMESTAMP:
1671
- {
1672
- LatteCP_itHLESetRetirementTimestamp (cmd, nWords);
1673
- timerRecheck += CP_TIMER_RECHECK / 512 ;
1674
- break ;
1675
- }
1676
1626
case IT_HLE_BOTTOM_OF_PIPE_CB:
1677
1627
{
1678
1628
LatteCP_itHLEBottomOfPipeCB (cmd, nWords);
@@ -1933,11 +1883,6 @@ void LatteCP_DebugPrintCmdBuffer(uint32be* bufferPtr, uint32 size)
1933
1883
cemuLog_log (LogType::Force, " {} IT_HLE_COPY_SURFACE_NEW" , strPrefix);
1934
1884
break ;
1935
1885
}
1936
- case IT_HLE_FIFO_WRAP_AROUND:
1937
- {
1938
- cemuLog_log (LogType::Force, " {} IT_HLE_FIFO_WRAP_AROUND" , strPrefix);
1939
- break ;
1940
- }
1941
1886
case IT_HLE_SAMPLE_TIMER:
1942
1887
{
1943
1888
cemuLog_log (LogType::Force, " {} IT_HLE_SAMPLE_TIMER" , strPrefix);
@@ -1958,11 +1903,6 @@ void LatteCP_DebugPrintCmdBuffer(uint32be* bufferPtr, uint32 size)
1958
1903
cemuLog_log (LogType::Force, " {} IT_HLE_END_OCCLUSION_QUERY" , strPrefix);
1959
1904
break ;
1960
1905
}
1961
- case IT_HLE_SET_CB_RETIREMENT_TIMESTAMP:
1962
- {
1963
- cemuLog_log (LogType::Force, " {} IT_HLE_SET_CB_RETIREMENT_TIMESTAMP" , strPrefix);
1964
- break ;
1965
- }
1966
1906
case IT_HLE_BOTTOM_OF_PIPE_CB:
1967
1907
{
1968
1908
cemuLog_log (LogType::Force, " {} IT_HLE_BOTTOM_OF_PIPE_CB" , strPrefix);
0 commit comments