Skip to content

Commit 997550f

Browse files
author
Niko
committed
optimization
1 parent 1ac39e6 commit 997550f

File tree

1 file changed

+131
-128
lines changed

1 file changed

+131
-128
lines changed

rebuild_PS1/TEST_DrawInstances.c

Lines changed: 131 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,18 @@ VECTOR* ApplyMatrixLV(MATRIX* m, VECTOR* v0, VECTOR* v1)
115115
}
116116
#endif
117117

118+
typedef struct CompVertex {
119+
u_char X;
120+
u_char Y;
121+
u_char Z;
122+
} CompVertex;
123+
124+
typedef struct V4 {
125+
u_char X;
126+
u_char Y;
127+
u_char Z;
128+
u_char W;
129+
} V4;
118130

119131
void DrawOneInst(struct Instance* curr)
120132
{
@@ -228,14 +240,6 @@ void DrawOneInst(struct Instance* curr)
228240
// 3FF is background, 0x0 is minimum depth
229241
void* ot = &pb->ptrOT[0];
230242

231-
//helper type, kinda same as RGB
232-
//a 255 grid "compressed vertex" 0 = 0.0 and 255 = 1.0. 256 steps only.
233-
typedef struct CompVertex {
234-
u_char X;
235-
u_char Y;
236-
u_char Z;
237-
} CompVertex;
238-
239243
//flag values and end of list
240244
#define END_OF_LIST 0xFFFFFFFF
241245
#define DRAW_CMD_FLAG_NEW_STRIP (1 << 7)
@@ -256,7 +260,7 @@ void DrawOneInst(struct Instance* curr)
256260

257261
//a "shifting window", here we update the vertices and read triangle once it's ready
258262
//you need same cache for both colors and texture layouts
259-
CompVertex tempCoords[4] = {0};
263+
V4 tempCoords[4] = {0};
260264
int tempColor[4] = {0};
261265
struct TextureLayout* tempTex = 0;
262266

@@ -265,9 +269,9 @@ void DrawOneInst(struct Instance* curr)
265269
//you can draw may trigles of the list with minimum additional loads
266270
//then once you don't need vertex data, you can overwrite same indices with new data
267271
#ifdef REBUILD_PC
268-
CompVertex stack[256] = { 0 };
272+
V4 stack[256] = { 0 };
269273
#else
270-
CompVertex* stack = 0x1f800000;
274+
V4* stack = 0x1f800000;
271275
#endif
272276

273277
// pCmd[0] is number of commands
@@ -280,7 +284,12 @@ void DrawOneInst(struct Instance* curr)
280284
bi = 0;
281285

282286
//loop commands until we hit the end marker
283-
while (*pCmd != END_OF_LIST)
287+
for (
288+
/* */;
289+
*pCmd != END_OF_LIST;
290+
291+
pCmd++, stripLength++
292+
)
284293
{
285294
//extract individual values from the command
286295
//refactor to a set of inline macros?
@@ -355,7 +364,9 @@ void DrawOneInst(struct Instance* curr)
355364
else
356365
{
357366
//copy from vertex buffer to stack index
358-
stack[stackIndex] = ptrVerts[vertexIndex];
367+
stack[stackIndex].X = ptrVerts[vertexIndex].X;
368+
stack[stackIndex].Y = ptrVerts[vertexIndex].Y;
369+
stack[stackIndex].Z = ptrVerts[vertexIndex].Z;
359370
}
360371

361372
//and point to next vertex
@@ -376,8 +387,6 @@ void DrawOneInst(struct Instance* curr)
376387
tempColor[2] = tempColor[3];
377388
tempColor[3] = mh->ptrColors[colorIndex];
378389

379-
tempTex = (texIndex == 0 ? 0 : mh->ptrTexLayout[texIndex - 1]);
380-
381390
//this is probably some tristrip optimization, so we can reuse vertex from the last triangle
382391
//and only spend 1 command
383392
if ((flags & DRAW_CMD_FLAG_SWAP_VERTEX) != 0)
@@ -392,127 +401,121 @@ void DrawOneInst(struct Instance* curr)
392401
stripLength = 0;
393402
}
394403

395-
//enough data to add prim
396-
if (stripLength >= 2)
404+
if (stripLength < 2)
405+
continue;
406+
407+
void* pCurr;
408+
void* pNext;
409+
410+
// The X, Z, Y, is not a typo
411+
posWorld1[0] = ((((mf->pos[0] + tempCoords[1].X) * mh->scale[0]) >> 8) * curr->scale[0]) >> 12;
412+
posWorld1[1] = ((((mf->pos[1] + tempCoords[1].Z) * mh->scale[1]) >> 8) * curr->scale[1]) >> 12;
413+
posWorld1[2] = ((((mf->pos[2] + tempCoords[1].Y) * mh->scale[2]) >> 8) * curr->scale[2]) >> 12;
414+
posWorld1[3] = 0;
415+
gte_ldv0(&posWorld1[0]);
416+
417+
// The X, Z, Y, is not a typo
418+
posWorld2[0] = ((((mf->pos[0] + tempCoords[2].X) * mh->scale[0]) >> 8) * curr->scale[0]) >> 12;
419+
posWorld2[1] = ((((mf->pos[1] + tempCoords[2].Z) * mh->scale[1]) >> 8) * curr->scale[1]) >> 12;
420+
posWorld2[2] = ((((mf->pos[2] + tempCoords[2].Y) * mh->scale[2]) >> 8) * curr->scale[2]) >> 12;
421+
posWorld2[3] = 0;
422+
gte_ldv1(&posWorld2[0]);
423+
424+
// The X, Z, Y, is not a typo
425+
posWorld3[0] = ((((mf->pos[0] + tempCoords[3].X) * mh->scale[0]) >> 8) * curr->scale[0]) >> 12;
426+
posWorld3[1] = ((((mf->pos[1] + tempCoords[3].Z) * mh->scale[1]) >> 8) * curr->scale[1]) >> 12;
427+
posWorld3[2] = ((((mf->pos[2] + tempCoords[3].Y) * mh->scale[2]) >> 8) * curr->scale[2]) >> 12;
428+
posWorld3[3] = 0;
429+
gte_ldv2(&posWorld3[0]);
430+
431+
gte_rtpt();
432+
433+
// automatic pass, if no frontface or backface culling
434+
int boolPassCull = ((flags & DRAW_CMD_FLAG_CULLING) == 0);
435+
436+
// if culling is required
437+
if (!boolPassCull)
397438
{
398-
void* pCurr;
399-
void* pNext;
400-
401-
// The X, Z, Y, is not a typo
402-
posWorld1[0] = ((((mf->pos[0] + tempCoords[1].X) * mh->scale[0]) >> 8) * curr->scale[0]) >> 12;
403-
posWorld1[1] = ((((mf->pos[1] + tempCoords[1].Z) * mh->scale[1]) >> 8) * curr->scale[1]) >> 12;
404-
posWorld1[2] = ((((mf->pos[2] + tempCoords[1].Y) * mh->scale[2]) >> 8) * curr->scale[2]) >> 12;
405-
posWorld1[3] = 0;
406-
gte_ldv0(&posWorld1[0]);
407-
408-
// The X, Z, Y, is not a typo
409-
posWorld2[0] = ((((mf->pos[0] + tempCoords[2].X) * mh->scale[0]) >> 8) * curr->scale[0]) >> 12;
410-
posWorld2[1] = ((((mf->pos[1] + tempCoords[2].Z) * mh->scale[1]) >> 8) * curr->scale[1]) >> 12;
411-
posWorld2[2] = ((((mf->pos[2] + tempCoords[2].Y) * mh->scale[2]) >> 8) * curr->scale[2]) >> 12;
412-
posWorld2[3] = 0;
413-
gte_ldv1(&posWorld2[0]);
414-
415-
// The X, Z, Y, is not a typo
416-
posWorld3[0] = ((((mf->pos[0] + tempCoords[3].X) * mh->scale[0]) >> 8) * curr->scale[0]) >> 12;
417-
posWorld3[1] = ((((mf->pos[1] + tempCoords[3].Z) * mh->scale[1]) >> 8) * curr->scale[1]) >> 12;
418-
posWorld3[2] = ((((mf->pos[2] + tempCoords[3].Y) * mh->scale[2]) >> 8) * curr->scale[2]) >> 12;
419-
posWorld3[3] = 0;
420-
gte_ldv2(&posWorld3[0]);
421-
422-
gte_rtpt();
423-
424-
if (tempTex == 0)
425-
{
426-
POLY_G3* p = primMem->curr;
427-
pNext = p + 1;
428-
pCurr = p;
429-
430-
*(int*)&p->r0 = tempColor[1];
431-
*(int*)&p->r1 = tempColor[2];
432-
*(int*)&p->r2 = tempColor[3];
433-
434-
setPolyG3(p);
435-
436-
gte_stsxy3(
437-
&p->x0,
438-
&p->x1,
439-
&p->x2);
440-
}
441-
else
442-
{
443-
POLY_GT3* p = primMem->curr;
444-
pNext = p + 1;
445-
pCurr = p;
446-
447-
*(int*)&p->r0 = tempColor[1];
448-
*(int*)&p->r1 = tempColor[2];
449-
*(int*)&p->r2 = tempColor[3];
450-
451-
*(int*)&p->u0 = *(int*)&tempTex->u0;
452-
*(int*)&p->u1 = *(int*)&tempTex->u1;
453-
*(short*)&p->u2 = *(short*)&tempTex->u2;
454-
455-
setPolyGT3(p);
456-
457-
gte_stsxy3(
458-
&p->x0,
459-
&p->x1,
460-
&p->x2);
461-
}
439+
// assume backface culling
440+
int opZ;
441+
gte_nclip();
442+
gte_stopz(&opZ);
443+
boolPassCull = (opZ >= 0);
444+
445+
// if polygon is flipped
446+
if ((flags & DRAW_CMD_FLAG_FLIP_NORMAL) != 0)
447+
boolPassCull = !boolPassCull;
448+
449+
// if instance is flipped
450+
if ((curr->flags & REVERSE_CULL_DIRECTION) != 0)
451+
boolPassCull = !boolPassCull;
452+
}
453+
454+
if (!boolPassCull)
455+
continue;
456+
457+
// sorting
458+
int otZ;
459+
gte_avsz3();
460+
gte_stotz(&otZ);
461+
462+
// near-range for instances should be higher
463+
// for instances than level (not exact number)
464+
if (otZ <= 32) continue;
465+
466+
// make sure instances draw on top of the road,
467+
// reduce depth in the sorting table (not exact number)
468+
otZ -= 32;
469+
470+
if (otZ >= 4080) continue;
471+
472+
tempTex = (texIndex == 0)
473+
? 0 : // index=0 -> tempTex = nullptr
474+
mh->ptrTexLayout[texIndex - 1]; // can still be nullptr
475+
476+
if (tempTex == 0)
477+
{
478+
POLY_G3* p = primMem->curr;
479+
pNext = p + 1;
480+
pCurr = p;
462481

463-
// automatic pass, if no frontface or backface culling
464-
int boolPassCull = ((flags & DRAW_CMD_FLAG_CULLING) == 0);
482+
*(int*)&p->r0 = tempColor[1];
483+
*(int*)&p->r1 = tempColor[2];
484+
*(int*)&p->r2 = tempColor[3];
465485

466-
// if culling is required
467-
if (!boolPassCull)
468-
{
469-
// assume backface culling
470-
int opZ;
471-
gte_nclip();
472-
gte_stopz(&opZ);
473-
boolPassCull = (opZ >= 0);
474-
475-
// if polygon is flipped
476-
if ((flags & DRAW_CMD_FLAG_FLIP_NORMAL) != 0)
477-
boolPassCull = !boolPassCull;
478-
479-
// if instance is flipped
480-
if ((curr->flags & REVERSE_CULL_DIRECTION) != 0)
481-
boolPassCull = !boolPassCull;
482-
}
483-
484-
if (boolPassCull)
485-
{
486-
// sorting
487-
int otZ;
488-
gte_avsz3();
489-
gte_stotz(&otZ);
490-
491-
// near-range for instances should be higher
492-
// for instances than level (not exact number)
493-
if (otZ > 32)
494-
{
495-
// make sure instances draw on top of the road,
496-
// reduce depth in the sorting table (not exact number)
497-
otZ -= 32;
498-
499-
if (otZ < 4080)
500-
{
501-
AddPrim((u_long*)ot + (otZ >> 2), pCurr);
502-
primMem->curr = pNext;
503-
}
504-
}
505-
}
486+
setPolyG3(p);
487+
488+
gte_stsxy3(
489+
&p->x0,
490+
&p->x1,
491+
&p->x2);
506492
}
493+
else
494+
{
495+
POLY_GT3* p = primMem->curr;
496+
pNext = p + 1;
497+
pCurr = p;
507498

508-
//strip length increases
509-
stripLength++;
499+
*(int*)&p->r0 = tempColor[1];
500+
*(int*)&p->r1 = tempColor[2];
501+
*(int*)&p->r2 = tempColor[3];
502+
503+
*(int*)&p->u0 = *(int*)&tempTex->u0;
504+
*(int*)&p->u1 = *(int*)&tempTex->u1;
505+
*(short*)&p->u2 = *(short*)&tempTex->u2;
510506

511-
//proceed to the next command
512-
pCmd++;
507+
setPolyGT3(p);
508+
509+
gte_stsxy3(
510+
&p->x0,
511+
&p->x1,
512+
&p->x2);
513+
}
514+
515+
AddPrim((u_long*)ot + (otZ >> 2), pCurr);
516+
primMem->curr = pNext;
513517
}
514518
}
515-
516519
}
517520

518521
void TEST_DrawInstances(struct GameTracker* gGT)

0 commit comments

Comments
 (0)