@@ -1586,106 +1586,146 @@ void GSRendererHW::RoundSpriteOffset()
15861586 for (u32 i = 0 ; i < count; i += 2 )
15871587 {
15881588 // Performance note: if it had any impact on perf, someone would port it to SSE (AKA GSVector)
1589-
1590- // Compute the coordinate of first and last texels (in native with a linear filtering)
1589+ // if the draw is page aligned, then don't round it.
1590+ const int tex_width = std::max ( 1 , (v[i + 1 ]. U - v[i]. U ) >> 4 );
15911591 const int ox = m_context->XYOFFSET .OFX ;
1592- const int X0 = v[i].XYZ .X - ox;
1593- const int X1 = v[i + 1 ].XYZ .X - ox;
15941592 const int Lx = (v[i + 1 ].XYZ .X - v[i].XYZ .X );
1595- const float ax0 = alpha0 (Lx, X0, X1);
1596- const float ax1 = alpha1 (Lx, X0, X1);
1597- const u16 tx0 = Interpolate_UV (ax0, v[i].U , v[i + 1 ].U );
1598- const u16 tx1 = Interpolate_UV (ax1, v[i].U , v[i + 1 ].U );
1599- #ifdef DEBUG_U
1600- if (debug)
1601- {
1602- fprintf (stderr, " u0:%d and u1:%d\n " , v[i].U , v[i + 1 ].U );
1603- fprintf (stderr, " a0:%f and a1:%f\n " , ax0, ax1);
1604- fprintf (stderr, " t0:%d and t1:%d\n " , tx0, tx1);
1605- }
1606- #endif
16071593
1608- const int oy = m_context->XYOFFSET .OFY ;
1609- const int Y0 = v[i].XYZ .Y - oy;
1610- const int Y1 = v[i + 1 ].XYZ .Y - oy;
1611- const int Ly = (v[i + 1 ].XYZ .Y - v[i].XYZ .Y );
1612- const float ay0 = alpha0 (Ly, Y0, Y1);
1613- const float ay1 = alpha1 (Ly, Y0, Y1);
1614- const u16 ty0 = Interpolate_UV (ay0, v[i].V , v[i + 1 ].V );
1615- const u16 ty1 = Interpolate_UV (ay1, v[i].V , v[i + 1 ].V );
1616- #ifdef DEBUG_V
1617- if (debug)
1594+ if ((((Lx - ox) >> 4 ) % tex_width) != 0 )
16181595 {
1619- fprintf (stderr, " v0:%d and v1:%d\n " , v[i].V , v[i + 1 ].V );
1620- fprintf (stderr, " a0:%f and a1:%f\n " , ay0, ay1);
1621- fprintf (stderr, " t0:%d and t1:%d\n " , ty0, ty1);
1622- }
1623- #endif
1624-
1596+ // Compute the coordinate of first and last texels (in native with a linear filtering)
1597+ const int X0 = v[i].XYZ .X - ox;
1598+ const int X1 = v[i + 1 ].XYZ .X - ox;
1599+ const float ax0 = alpha0 (Lx, X0, X1);
1600+ const float ax1 = alpha1 (Lx, X0, X1);
1601+ const u16 tx0 = Interpolate_UV (ax0, v[i].U , v[i + 1 ].U );
1602+ const u16 tx1 = Interpolate_UV (ax1, v[i].U , v[i + 1 ].U );
1603+ // DevCon.Warning("Tex width %d draw width %d (X %x -> %x U %x -> %x", tex_width, ((v[i + 1].XYZ.X - v[i].XYZ.X) >> 4), X0, X1, v[i].U, v[i+1].U);
16251604#ifdef DEBUG_U
1626- if (debug)
1627- fprintf (stderr, " GREP_BEFORE %d => %d \n " , v[i]. U , v[i + 1 ]. U );
1628- # endif
1629- # ifdef DEBUG_V
1630- if (debug)
1631- fprintf (stderr, " GREP_BEFORE %d => %d \n " , v[i]. V , v[i + 1 ]. V );
1605+ if (debug)
1606+ {
1607+ fprintf (stderr, " u0:%d and u1:%d \n " , v[i]. U , v[i + 1 ]. U );
1608+ fprintf (stderr, " a0:%f and a1:%f \n " , ax0, ax1);
1609+ fprintf (stderr, " t0:%d and t1:%d \n " , tx0, tx1);
1610+ }
16321611#endif
1633-
16341612#if 1
1635- // Use rounded value of the newly computed texture coordinate. It ensures
1636- // that sampling will remains inside texture boundary
1637- //
1638- // Note for bilinear: by definition it will never work correctly! A sligh modification
1639- // of interpolation migth trigger a discard (with alpha testing)
1640- // Let's use something simple that correct really bad case (for a couple of 2D games).
1641- // I hope it won't create too much glitches.
1642- if (linear)
1643- {
1644- const int Lu = v[i + 1 ].U - v[i].U ;
1645- // Note 32 is based on taisho-mononoke
1646- if ((Lu > 0 ) && (Lu <= (Lx + 32 )))
1613+ // Use rounded value of the newly computed texture coordinate. It ensures
1614+ // that sampling will remains inside texture boundary
1615+ //
1616+ // Note for bilinear: by definition it will never work correctly! A sligh modification
1617+ // of interpolation migth trigger a discard (with alpha testing)
1618+ // Let's use something simple that correct really bad case (for a couple of 2D games).
1619+ // I hope it won't create too much glitches.
1620+ if (linear)
16471621 {
1648- v[i + 1 ].U -= 8 ;
1622+ const int Lu = v[i + 1 ].U - v[i].U ;
1623+ // Note 32 is based on taisho-mononoke
1624+ if ((Lu > 0 ) && (Lu <= (Lx + 32 )))
1625+ {
1626+ v[i + 1 ].U -= 8 ;
1627+ }
16491628 }
1629+ else
1630+ {
1631+ if (tx0 <= tx1)
1632+ {
1633+ v[i].U = tx0;
1634+ v[i + 1 ].U = tx1 + 16 ;
1635+ }
1636+ else
1637+ {
1638+ v[i].U = tx0 + 15 ;
1639+ v[i + 1 ].U = tx1;
1640+ }
1641+ }
1642+ #endif
16501643 }
16511644 else
16521645 {
1653- if (tx0 <= tx1 )
1646+ if (((v[i + 1 ]. U & 0xf ) ^ ((v[i + 1 ]. XYZ . X - ox) & 0xf )) && ((v[i + 1 ]. U - v[i]. U ) >> 4 ) == tex_width && (Lx >> 4 ) / tex_width <= 2 )
16541647 {
1655- v[i].U = tx0 ;
1656- v[i + 1 ].U = tx1 + 16 ;
1648+ v[i].U &= ~ 0xf ;
1649+ v[i + 1 ].U -= 8 ;
16571650 }
16581651 else
16591652 {
1660- v[i].U = tx0 + 15 ;
1661- v[i + 1 ].U = tx1;
1653+ v[i].U &= ~0xf ;
1654+ v[i + 1 ].U &= ~0xf ;
1655+ v[i].U |= (v[i].XYZ .X - ox) & 0xf ;
1656+ v[i + 1 ].U |= (v[i + 1 ].XYZ .X - ox) & 0xf ;
16621657 }
16631658 }
1659+
1660+ const int tex_height = std::max (1 , (v[i + 1 ].V - v[i].V ) >> 4 );
1661+ const int oy = m_context->XYOFFSET .OFY ;
1662+ const int Ly = (v[i + 1 ].XYZ .Y - v[i].XYZ .Y );
1663+
1664+ if ((((Ly - oy) >> 4 ) % tex_height) != 0 )
1665+ {
1666+ const int Y0 = v[i].XYZ .Y - oy;
1667+ const int Y1 = v[i + 1 ].XYZ .Y - oy;
1668+ const float ay0 = alpha0 (Ly, Y0, Y1);
1669+ const float ay1 = alpha1 (Ly, Y0, Y1);
1670+ const u16 ty0 = Interpolate_UV (ay0, v[i].V , v[i + 1 ].V );
1671+ const u16 ty1 = Interpolate_UV (ay1, v[i].V , v[i + 1 ].V );
1672+ #ifdef DEBUG_V
1673+ if (debug)
1674+ {
1675+ fprintf (stderr, " v0:%d and v1:%d\n " , v[i].V , v[i + 1 ].V );
1676+ fprintf (stderr, " a0:%f and a1:%f\n " , ay0, ay1);
1677+ fprintf (stderr, " t0:%d and t1:%d\n " , ty0, ty1);
1678+ }
16641679#endif
16651680#if 1
1666- if (linear)
1667- {
1668- const int Lv = v[i + 1 ].V - v[i].V ;
1669- if ((Lv > 0 ) && (Lv <= (Ly + 32 )))
1681+ if (linear)
16701682 {
1671- v[i + 1 ].V -= 8 ;
1683+ const int Lv = v[i + 1 ].V - v[i].V ;
1684+ if ((Lv > 0 ) && (Lv <= (Ly + 32 )))
1685+ {
1686+ v[i + 1 ].V -= 8 ;
1687+ }
1688+ }
1689+ else
1690+ {
1691+ if (ty0 <= ty1)
1692+ {
1693+ v[i].V = ty0;
1694+ v[i + 1 ].V = ty1 + 16 ;
1695+ }
1696+ else
1697+ {
1698+ v[i].V = ty0 + 15 ;
1699+ v[i + 1 ].V = ty1;
1700+ }
16721701 }
1702+ #endif
16731703 }
16741704 else
16751705 {
1676- if (ty0 <= ty1 )
1706+ if (((v[i + 1 ]. V & 0xf ) ^ ((v[i + 1 ]. XYZ . Y - oy) & 0xf )) && ((v[i + 1 ]. V - v[i]. V ) >> 4 ) == tex_height && (Ly >> 4 ) / tex_height <= 2 )
16771707 {
1678- v[i].V = ty0 ;
1679- v[i + 1 ].V = ty1 + 16 ;
1708+ v[i].V &= ~ 0xf ;
1709+ v[i + 1 ].V -= 8 ;
16801710 }
16811711 else
16821712 {
1683- v[i].V = ty0 + 15 ;
1684- v[i + 1 ].V = ty1;
1713+ v[i].V &= ~0xf ;
1714+ v[i + 1 ].V &= ~0xf ;
1715+ v[i].V |= (v[i].XYZ .Y - oy) & 0xf ;
1716+ v[i + 1 ].V |= (v[i + 1 ].XYZ .Y - oy) & 0xf ;
16851717 }
16861718 }
1719+ #ifdef DEBUG_U
1720+ if (debug)
1721+ fprintf (stderr, " GREP_BEFORE %d => %d\n " , v[i].U , v[i + 1 ].U );
1722+ #endif
1723+ #ifdef DEBUG_V
1724+ if (debug)
1725+ fprintf (stderr, " GREP_BEFORE %d => %d\n " , v[i].V , v[i + 1 ].V );
16871726#endif
16881727
1728+
16891729#ifdef DEBUG_U
16901730 if (debug)
16911731 fprintf (stderr, " GREP_AFTER %d => %d\n\n " , v[i].U , v[i + 1 ].U );
0 commit comments