Skip to content

Commit 5316f1c

Browse files
committed
GS: Add optimized right triangle check.
Co-authored-by: TellowKrinkle
1 parent 7728729 commit 5316f1c

File tree

3 files changed

+209
-87
lines changed

3 files changed

+209
-87
lines changed

pcsx2/GS/GSState.cpp

Lines changed: 13 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -4132,87 +4132,6 @@ GSState::TextureMinMaxResult GSState::GetTextureMinMax(GIFRegTEX0 TEX0, GIFRegCL
41324132
return GetTextureMinMaxApprox(TEX0, CLAMP, linear);
41334133
}
41344134

4135-
// TODO: Replace this with optimized SIMD code.
4136-
// Returns true if the 3 vertices in verts reprsents a axis-aligned right triangles (both XY and ST/UV) and
4137-
// outputs the indices of the corner, vertical, and horizontal point in idx_out
4138-
static bool IsAxisAlignedRightTriangle(const GSVertex* vertex, const u16* index, bool fst, u32* i_out)
4139-
{
4140-
bool same_coord_x[3] = {false, false, false};
4141-
bool same_coord_y[3] = {false, false, false};
4142-
4143-
// Find which vertices have the same X/Y coordinates as other vertices
4144-
for (u32 i0 = 0; i0 < 3; i0++)
4145-
{
4146-
const u32 i1 = (i0 + 1) % 3;
4147-
bool same_x = vertex[index[i0]].XYZ.X == vertex[index[i1]].XYZ.X;
4148-
bool same_y = vertex[index[i0]].XYZ.Y == vertex[index[i1]].XYZ.Y;
4149-
if (fst)
4150-
{
4151-
same_x &= vertex[index[i0]].U == vertex[index[i1]].U;
4152-
same_y &= vertex[index[i0]].V == vertex[index[i1]].V;
4153-
}
4154-
else
4155-
{
4156-
same_x &= vertex[index[i0]].ST.S == vertex[index[i1]].ST.S;
4157-
same_y &= vertex[index[i0]].ST.T == vertex[index[i1]].ST.T;
4158-
}
4159-
if (same_x)
4160-
{
4161-
same_coord_x[i0] = same_coord_x[i1] = true;
4162-
}
4163-
if (same_y)
4164-
{
4165-
same_coord_y[i0] = same_coord_y[i1] = true;
4166-
}
4167-
}
4168-
4169-
// Find the corner vertex, which should share both X/Y both other vertices
4170-
int i_corner = -1;
4171-
int i_vertical = -1;
4172-
int i_horizontal = -1;
4173-
for (int i = 0; i < 3; i++)
4174-
{
4175-
if (same_coord_x[i] && same_coord_y[i])
4176-
{
4177-
if (i_corner != -1) // There can only be one corner point
4178-
return false;
4179-
i_corner = i;
4180-
}
4181-
}
4182-
4183-
if (i_corner < 0)
4184-
return false;
4185-
4186-
// The vertical vertex is the one that has the same x coordinate as the corner vertex
4187-
for (int i = 1; i < 3; i++)
4188-
{
4189-
if (same_coord_x[(i_corner + i) % 3])
4190-
{
4191-
if (i_vertical != -1)
4192-
return false; // There can only be one vertical point
4193-
i_vertical = (i_corner + i) % 3;
4194-
}
4195-
}
4196-
4197-
// The horizontal vertex is the one that has the same y coordinate as the corner vertex
4198-
for (int i = 1; i < 3; i++)
4199-
{
4200-
if (same_coord_y[(i_corner + i) % 3])
4201-
{
4202-
if (i_horizontal != -1)
4203-
return false; // There can only be one horizontal point
4204-
i_horizontal = (i_corner + i) % 3;
4205-
}
4206-
}
4207-
4208-
pxAssertMsg(i_horizontal != i_vertical, "Impossible");
4209-
4210-
i_out[0] = (u32)i_corner;
4211-
i_out[1] = (u32)i_vertical;
4212-
i_out[2] = (u32)i_horizontal;
4213-
return true;
4214-
}
4215-
42164135
// TODO: Replace this with optimized SIMD code.
42174136
void GSState::GetTextureMinMaxAxisAlignedHelper(
42184137
GIFRegTEX0 TEX0, GSVector4i scissor, u32 fst, const GSVertex* vertex, u16 index0, u16 index1, GSVector4* minmax)
@@ -4282,7 +4201,6 @@ void GSState::GetTextureMinMaxAxisAlignedHelper(
42824201
minmax->bottom = std::max(minmax->bottom, v_max);
42834202
}
42844203

4285-
// TODO: Replace stuff with templates for efficiency
42864204
bool GSState::GetTextureMinMaxAxisAligned(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, GSVector4i scissor, bool linear, GSState::TextureMinMaxResult* result)
42874205
{
42884206
const GS_PRIM_CLASS primclass = GSUtil::GetPrimClass(PRIM->PRIM);
@@ -4295,13 +4213,19 @@ bool GSState::GetTextureMinMaxAxisAligned(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, GS
42954213

42964214
if (primclass == GS_PRIM_CLASS::GS_TRIANGLE_CLASS)
42974215
{
4216+
const auto IsTriangleRight = fst ? GSUtil::IsTriangleRight<1, 1> : GSUtil::IsTriangleRight<1, 0>;
4217+
42984218
for (u32 i = 0; i < m_index.tail; i += 3)
42994219
{
43004220
const u16* const idx = &index[i];
4301-
u32 i_tri[3]; // Vertices of triangle in order: corner, vertical, horizontal
4302-
if (!IsAxisAlignedRightTriangle(vertex, &idx[0], fst, &i_tri[0]))
4221+
4222+
GSUtil::TriangleOrdering order;
4223+
if (!IsTriangleRight(vertex, &idx[0], &order))
43034224
return false;
4304-
GetTextureMinMaxAxisAlignedHelper(TEX0, scissor, fst, vertex, idx[i_tri[1]], idx[i_tri[2]], &minmax);
4225+
4226+
// We ignore the right angle corner of the triangle (order.b) and just pass the acute angle corners,
4227+
// (order.a and order.c) since this is all we need to infer the limits of the UV coordinates.
4228+
GetTextureMinMaxAxisAlignedHelper(TEX0, scissor, fst, vertex, idx[order.a], idx[order.c], &minmax);
43054229
}
43064230
}
43074231
else if (primclass == GS_PRIM_CLASS::GS_SPRITE_CLASS)
@@ -4350,8 +4274,10 @@ bool GSState::GetTextureMinMaxAxisAligned(GIFRegTEX0 TEX0, GIFRegCLAMP CLAMP, GS
43504274
}
43514275

43524276
bool left_boundary, top_boundary, right_boundary, bottom_boundary;
4353-
GetClampWrapMinMaxUV(TEX0.TW, CLAMP.WMS, CLAMP.MINU, CLAMP.MAXU, minmaxi.left, minmaxi.right, &minmaxi.left, &minmaxi.right, &left_boundary, &right_boundary);
4354-
GetClampWrapMinMaxUV(TEX0.TH, CLAMP.WMT, CLAMP.MINV, CLAMP.MAXV, minmaxi.top, minmaxi.bottom, &minmaxi.top, &minmaxi.bottom, &top_boundary, &bottom_boundary);
4277+
GetClampWrapMinMaxUV(TEX0.TW, CLAMP.WMS, CLAMP.MINU, CLAMP.MAXU, minmaxi.left, minmaxi.right,
4278+
&minmaxi.left, &minmaxi.right, &left_boundary, &right_boundary);
4279+
GetClampWrapMinMaxUV(TEX0.TH, CLAMP.WMT, CLAMP.MINV, CLAMP.MAXV, minmaxi.top, minmaxi.bottom,
4280+
&minmaxi.top, &minmaxi.bottom, &top_boundary, &bottom_boundary);
43554281

43564282
result->coverage = GSVector4i(minmaxi.left, minmaxi.top, minmaxi.right + 1, minmaxi.bottom + 1); // use exclusive coordinates for right/bottom
43574283
result->uses_boundary = 0;

pcsx2/GS/GSUtil.cpp

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,180 @@ GSRendererType GSUtil::GetPreferredRenderer()
235235
return preferred_renderer;
236236
}
237237

238+
// Helper struct for IsTriangleRight and AreTrianglesRight
239+
struct ComparisonResult
240+
{
241+
u8 value;
242+
u8 FinalCmp() const { return value & 3; }
243+
constexpr ComparisonResult(u8 final_cmp, u8 final_order)
244+
: value(final_cmp | (final_order << 2))
245+
{
246+
}
247+
GSUtil::TriangleOrdering FinalOrder() const
248+
{
249+
struct alignas(2) TriangleOrderingBC
250+
{
251+
u8 b;
252+
u8 c;
253+
};
254+
alignas(16) static constexpr TriangleOrderingBC order_lut[6] =
255+
{
256+
TriangleOrderingBC{/*a=0,*/ 1, 2},
257+
TriangleOrderingBC{/*a=0,*/ 2, 1},
258+
TriangleOrderingBC{/*a=1,*/ 0, 2},
259+
TriangleOrderingBC{/*a=1,*/ 2, 0},
260+
TriangleOrderingBC{/*a=2,*/ 0, 1},
261+
TriangleOrderingBC{/*a=2,*/ 1, 0},
262+
};
263+
u32 order = static_cast<u32>(value) >> 2;
264+
TriangleOrderingBC bc = order_lut[order];
265+
return {order >> 1, bc.b, bc.c};
266+
}
267+
};
268+
269+
// Helper table for IsTriangleRight/AreTrianglesRight functions
270+
static constexpr ComparisonResult comparison_lut[16] =
271+
{
272+
ComparisonResult(0, 0), // 0000 => None equal, no sprite possible
273+
ComparisonResult(2, 0), // 0001 => x0 = x1, requires y1 = y2
274+
ComparisonResult(1, 5), // 0010 => y0 = y1, requires x1 = x2
275+
ComparisonResult(2, 0), // 0011 => x0 = x1, y0 = y1, (no area) requires x1 = x2 or y1 = y2
276+
ComparisonResult(2, 1), // 0100 => x0 = x2, requires y1 = y2
277+
ComparisonResult(2, 0), // 0101 => x0 = x1, x0 = x2, (no area) requires y1 = y2
278+
ComparisonResult(0, 4), // 0110 => y0 = y1, x0 = x2, requires nothing
279+
ComparisonResult(0, 4), // 0111 => x0 = y1, y0 = y1, x0 = x2, (no area) requires nothing
280+
ComparisonResult(1, 3), // 1000 => y0 = y2, requires x1 = x2
281+
ComparisonResult(0, 2), // 1001 => x0 = x1, y0 = y2, requires nothing
282+
ComparisonResult(1, 3), // 1010 => y0 = y1, y0 = y2, (no area) requires x1 = x2
283+
ComparisonResult(0, 2), // 1011 => x0 = x1, y0 = y1, y0 = y2, (unlikely) requires nothing
284+
ComparisonResult(2, 1), // 1100 => x0 = x2, y0 = y2, (no area) requires x1 = x2 or y1 = y2
285+
ComparisonResult(0, 2), // 1101 => x0 = x1, x0 = x2, y0 = y2, (no area) requires nothing
286+
ComparisonResult(0, 4), // 1110 => y0 = y1, x0 = x2, y0 = y2, (no area) requires nothing
287+
ComparisonResult(0, 2), // 1111 => x0 = x1, y0 = y1, x0 = x2, y0 = y2, (no area) requires nothing
288+
};
289+
290+
template <u32 tme, u32 fst>
291+
bool GSUtil::AreTrianglesRight(const GSVertex* RESTRICT vin, const u16* index0, const u16* index1,
292+
TriangleOrdering* out_triangle0, TriangleOrdering* out_triangle1)
293+
{
294+
GSVector4i mask;
295+
if (tme && fst)
296+
{
297+
// Compare xy and uv together
298+
mask = GSVector4i::cxpr8(
299+
(s8)0, (s8)1, (s8)8, (s8)9,
300+
(s8)2, (s8)3, (s8)10, (s8)11,
301+
(s8)0, (s8)1, (s8)8, (s8)9,
302+
(s8)2, (s8)3, (s8)10, (s8)11);
303+
}
304+
else
305+
{
306+
// ignore uv, compare st instead later
307+
mask = GSVector4i::cxpr8(
308+
(s8)0, (s8)1, (s8)0x80, (s8)0x80,
309+
(s8)2, (s8)3, (s8)0x80, (s8)0x80,
310+
(s8)0, (s8)1, (s8)0x80, (s8)0x80,
311+
(s8)2, (s8)3, (s8)0x80, (s8)0x80);
312+
}
313+
GSVector4i xy0 = GSVector4i(vin[index0[0]].m[1]).shuffle8(mask); // Triangle 0 vertex 0
314+
GSVector4i xy1 = GSVector4i(vin[index0[1]].m[1]).shuffle8(mask); // Triangle 0 vertex 1
315+
GSVector4i xy2 = GSVector4i(vin[index0[2]].m[1]).shuffle8(mask); // Triangle 0 vertex 2
316+
GSVector4i xy3 = GSVector4i(vin[index1[0]].m[1]).shuffle8(mask); // Triangle 1 vertex 0
317+
GSVector4i xy4 = GSVector4i(vin[index1[1]].m[1]).shuffle8(mask); // Triangle 1 vertex 1
318+
GSVector4i xy5 = GSVector4i(vin[index1[2]].m[1]).shuffle8(mask); // Triangle 1 vertex 2
319+
GSVector4i vcmp0 = xy0.eq32(xy1.upl64(xy2));
320+
GSVector4i vcmp1 = xy3.eq32(xy4.upl64(xy5));
321+
GSVector4i vcmp2 = xy1.upl64(xy4).eq32(xy2.upl64(xy5));
322+
if (tme && !fst)
323+
{
324+
// do the st comparisons
325+
GSVector4 st0 = GSVector4::cast(GSVector4i(vin[index0[0]].m[0]));
326+
GSVector4 st1 = GSVector4::cast(GSVector4i(vin[index0[1]].m[0]));
327+
GSVector4 st2 = GSVector4::cast(GSVector4i(vin[index0[2]].m[0]));
328+
GSVector4 st3 = GSVector4::cast(GSVector4i(vin[index1[0]].m[0]));
329+
GSVector4 st4 = GSVector4::cast(GSVector4i(vin[index1[1]].m[0]));
330+
GSVector4 st5 = GSVector4::cast(GSVector4i(vin[index1[2]].m[0]));
331+
332+
vcmp0 = vcmp0 & GSVector4i::cast(st0.xyxy() == st1.upld(st2));
333+
vcmp1 = vcmp1 & GSVector4i::cast(st3.xyxy() == st4.upld(st5));
334+
vcmp2 = vcmp2 & GSVector4i::cast(st1.upld(st4) == st2.upld(st5));
335+
}
336+
int cmp0 = GSVector4::cast(vcmp0).mask();
337+
int cmp1 = GSVector4::cast(vcmp1).mask();
338+
int cmp2 = GSVector4::cast(vcmp2).mask();
339+
if (!cmp0 || !cmp1) // Either triangle 0 or triangle 1 isn't a right triangle
340+
return false;
341+
ComparisonResult triangle0cmp = comparison_lut[cmp0];
342+
ComparisonResult triangle1cmp = comparison_lut[cmp1];
343+
int required_cmp2 = triangle0cmp.FinalCmp() | (triangle1cmp.FinalCmp() << 2);
344+
if ((cmp2 & required_cmp2) != required_cmp2)
345+
return false;
346+
// Both t0 and t1 are right triangles!
347+
*out_triangle0 = triangle0cmp.FinalOrder();
348+
*out_triangle1 = triangle1cmp.FinalOrder();
349+
return true;
350+
}
351+
352+
template <u32 tme, u32 fst>
353+
bool GSUtil::IsTriangleRight(const GSVertex* RESTRICT vin, const u16* index, TriangleOrdering* out_triangle)
354+
{
355+
GSVector4i mask;
356+
if (tme && fst)
357+
{
358+
// Compare xy and uv together
359+
mask = GSVector4i::cxpr8(
360+
(s8)0, (s8)1, (s8) 8, (s8) 9,
361+
(s8)2, (s8)3, (s8)10, (s8)11,
362+
(s8)0, (s8)1, (s8) 8, (s8) 9,
363+
(s8)2, (s8)3, (s8)10, (s8)11);
364+
}
365+
else
366+
{
367+
// ignore uv, compare st instead later
368+
mask = GSVector4i::cxpr8(
369+
(s8)0, (s8)1, (s8)0x80, (s8)0x80,
370+
(s8)2, (s8)3, (s8)0x80, (s8)0x80,
371+
(s8)0, (s8)1, (s8)0x80, (s8)0x80,
372+
(s8)2, (s8)3, (s8)0x80, (s8)0x80);
373+
}
374+
GSVector4i xy0 = GSVector4i(vin[index[0]].m[1]).shuffle8(mask); // Triangle 0 vertex 0
375+
GSVector4i xy1 = GSVector4i(vin[index[1]].m[1]).shuffle8(mask); // Triangle 0 vertex 1
376+
GSVector4i xy2 = GSVector4i(vin[index[2]].m[1]).shuffle8(mask); // Triangle 0 vertex 2
377+
GSVector4i vcmp0 = xy0.eq32(xy1.upl64(xy2));
378+
GSVector4i vcmp1 = xy1.eq32(xy2); // ignore top 64 bits
379+
if (tme && !fst)
380+
{
381+
// do the st comparisons
382+
GSVector4 st0 = GSVector4::cast(GSVector4i(vin[index[0]].m[0]));
383+
GSVector4 st1 = GSVector4::cast(GSVector4i(vin[index[1]].m[0]));
384+
GSVector4 st2 = GSVector4::cast(GSVector4i(vin[index[2]].m[0]));
385+
386+
vcmp0 = vcmp0 & GSVector4i::cast(st0.xyxy() == st1.upld(st2));
387+
vcmp1 = vcmp1 & GSVector4i::cast(st1 == st2); // ignore top 64 bits
388+
}
389+
int cmp0 = GSVector4::cast(vcmp0).mask();
390+
int cmp1 = GSVector4::cast(vcmp1).mask() & 0x3;
391+
if (!cmp0) // Either triangle 0 or triangle 1 isn't a right triangle
392+
return false;
393+
ComparisonResult trianglecmp = comparison_lut[cmp0];
394+
int required_cmp1 = trianglecmp.FinalCmp();
395+
if (cmp1 != required_cmp1)
396+
return false;
397+
// Both t0 and t1 are right triangles!
398+
*out_triangle = trianglecmp.FinalOrder();
399+
return true;
400+
}
401+
402+
// Instantiate the template functions for Is/AreTrianglesRight
403+
template bool GSUtil::AreTrianglesRight<0, 0>(const GSVertex* RESTRICT, const u16*, const u16*, TriangleOrdering*, TriangleOrdering*);
404+
template bool GSUtil::AreTrianglesRight<1, 0>(const GSVertex* RESTRICT, const u16*, const u16*, TriangleOrdering*, TriangleOrdering*);
405+
template bool GSUtil::AreTrianglesRight<0, 1>(const GSVertex* RESTRICT, const u16*, const u16*, TriangleOrdering*, TriangleOrdering*);
406+
template bool GSUtil::AreTrianglesRight<1, 1>(const GSVertex* RESTRICT, const u16*, const u16*, TriangleOrdering*, TriangleOrdering*);
407+
template bool GSUtil::IsTriangleRight<0, 0>(const GSVertex* RESTRICT, const u16*, TriangleOrdering*);
408+
template bool GSUtil::IsTriangleRight<1, 0>(const GSVertex* RESTRICT, const u16*, TriangleOrdering*);
409+
template bool GSUtil::IsTriangleRight<0, 1>(const GSVertex* RESTRICT, const u16*, TriangleOrdering*);
410+
template bool GSUtil::IsTriangleRight<1, 1>(const GSVertex* RESTRICT, const u16*, TriangleOrdering*);
411+
238412
const char* GSUtil::GetPSMName(int psm)
239413
{
240414
switch (psm)

pcsx2/GS/GSUtil.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
#include "GS.h"
77
#include "GSRegs.h"
8+
#include "GS/Renderers/Common/GSVertex.h"
89

910
class GSUtil
1011
{
@@ -27,4 +28,25 @@ class GSUtil
2728
static u32 GetChannelMask(u32 spsm, u32 fbmsk);
2829

2930
static GSRendererType GetPreferredRenderer();
31+
32+
// For returning order of vertices to form a right triangle
33+
struct TriangleOrdering
34+
{
35+
// Describes a right triangle laid out in one of the following orientations
36+
// b c | c b | a | a
37+
// a | a | b c | c b
38+
u32 a; // Same x as b
39+
u32 b; // Same x as a, same y as c
40+
u32 c; // Same y as b
41+
};
42+
43+
// Determines ordering of two triangles in parallel if both are right.
44+
// More efficient than calling IsTriangleRight twice.
45+
template <u32 tme, u32 fst>
46+
static bool AreTrianglesRight(const GSVertex* RESTRICT vin, const u16* index0, const u16* index1,
47+
TriangleOrdering* out_triangle0, TriangleOrdering* out_triangle1);
48+
49+
// Determines ordering of a single triangle
50+
template <u32 tme, u32 fst>
51+
static bool IsTriangleRight(const GSVertex* RESTRICT vin, const u16* index, TriangleOrdering* out_triangle);
3052
};

0 commit comments

Comments
 (0)