Skip to content

Commit 7086366

Browse files
committed
Renderers fixes
1 parent 941a284 commit 7086366

File tree

8 files changed

+135
-93
lines changed

8 files changed

+135
-93
lines changed

benchmarks/benchmarkRenderers.cpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,30 +78,36 @@ static void benchmarkRendererCursor(std::string_view name)
7878
);
7979
}
8080

81+
#if LIBCEDIMU_ENABLE_RENDERERSIMD
82+
#define IF_SIMD(code) code
83+
#else
84+
#define IF_SIMD(code)
85+
#endif
86+
8187
int main()
8288
{
8389
constexpr Video::Renderer::BitsPerPixel NORMAL_8 = Video::Renderer::BitsPerPixel::Normal8;
8490
constexpr Video::Renderer::BitsPerPixel DOUBLE_4 = Video::Renderer::BitsPerPixel::Double4;
8591
// constexpr Video::Renderer::BitsPerPixel HIGH_8 = Video::Renderer::BitsPerPixel::High8;
8692

8793
benchmarkRendererCursor<Video::RendererSoftware>("Cursor Soft");
88-
benchmarkRendererCursor<Video::RendererSIMD>("Cursor SIMD");
94+
IF_SIMD(benchmarkRendererCursor<Video::RendererSIMD>("Cursor SIMD"));
8995

9096
benchmarkRenderer<Video::RendererSoftware, NORMAL_8, ICM(OFF), ICM(RGB555)>("Normal Soft RGB555");
91-
benchmarkRenderer<Video::RendererSIMD, NORMAL_8, ICM(OFF), ICM(RGB555)>("Normal SIMD RGB555");
97+
IF_SIMD((benchmarkRenderer<Video::RendererSIMD, NORMAL_8, ICM(OFF), ICM(RGB555)>("Normal SIMD RGB555")));
9298

9399
benchmarkRenderer<Video::RendererSoftware, NORMAL_8, ICM(DYUV), ICM(DYUV)>("Normal Soft DYUV");
94-
benchmarkRenderer<Video::RendererSIMD, NORMAL_8, ICM(DYUV), ICM(DYUV)>("Normal SIMD DYUV");
100+
IF_SIMD((benchmarkRenderer<Video::RendererSIMD, NORMAL_8, ICM(DYUV), ICM(DYUV)>("Normal SIMD DYUV")));
95101

96102
benchmarkRenderer<Video::RendererSoftware, DOUBLE_4, ICM(DYUV), ICM(DYUV)>("Double Soft DYUV");
97-
benchmarkRenderer<Video::RendererSIMD, DOUBLE_4, ICM(DYUV), ICM(DYUV)>("Double SIMD DYUV");
103+
IF_SIMD((benchmarkRenderer<Video::RendererSIMD, DOUBLE_4, ICM(DYUV), ICM(DYUV)>("Double SIMD DYUV")));
98104

99105
benchmarkRenderer<Video::RendererSoftware, DOUBLE_4, ICM(CLUT4), ICM(CLUT4)>("Double Soft CLUT4");
100-
benchmarkRenderer<Video::RendererSIMD, DOUBLE_4, ICM(CLUT4), ICM(CLUT4)>("Double SIMD CLUT4");
106+
IF_SIMD((benchmarkRenderer<Video::RendererSIMD, DOUBLE_4, ICM(CLUT4), ICM(CLUT4)>("Double SIMD CLUT4")));
101107

102108
benchmarkRenderer<Video::RendererSoftware, NORMAL_8, ICM(CLUT8), ICM(CLUT7)>("Normal Soft CLUT8/CLUT7");
103-
benchmarkRenderer<Video::RendererSIMD, NORMAL_8, ICM(CLUT8), ICM(CLUT7)>("Normal SIMD CLUT8/CLUT7");
109+
IF_SIMD((benchmarkRenderer<Video::RendererSIMD, NORMAL_8, ICM(CLUT8), ICM(CLUT7)>("Normal SIMD CLUT8/CLUT7")));
104110

105111
benchmarkRenderer<Video::RendererSoftware, DOUBLE_4, ICM(CLUT8), ICM(CLUT7)>("Double Soft CLUT8/CLUT7");
106-
benchmarkRenderer<Video::RendererSIMD, DOUBLE_4, ICM(CLUT8), ICM(CLUT7)>("Double SIMD CLUT8/CLUT7");
112+
IF_SIMD((benchmarkRenderer<Video::RendererSIMD, DOUBLE_4, ICM(CLUT8), ICM(CLUT7)>("Double SIMD CLUT8/CLUT7")));
107113
}

benchmarks/benchmarkVideoDecoders.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,14 +138,20 @@ static void benchmarkRunLength(std::string_view name, const uint8_t* data)
138138
);
139139
}
140140

141+
#if LIBCEDIMU_ENABLE_RENDERERSIMD
142+
#define IF_SIMD(code) code
143+
#else
144+
#define IF_SIMD(code)
145+
#endif
146+
141147
int main()
142148
{
143149
benchmarkRGB555Line<Video::decodeRGB555Line<HALF_WIDTH>>("RGB555 Soft");
144-
benchmarkRGB555Line<Video::decodeRGB555LineSIMD<HALF_WIDTH>>("RGB555 SIMD");
150+
IF_SIMD(benchmarkRGB555Line<Video::decodeRGB555LineSIMD<HALF_WIDTH>>("RGB555 SIMD"));
145151

146152
benchmarkDYUVLine<Video::decodeDYUVLine<HALF_WIDTH>>("DYUV Soft");
147153
benchmarkDYUVLine<Video::decodeDYUVLineLUT<HALF_WIDTH>>("DYUV LUT");
148-
benchmarkDYUVLine<Video::decodeDYUVLineSIMD<HALF_WIDTH>>("DYUV SIMD");
154+
IF_SIMD(benchmarkDYUVLine<Video::decodeDYUVLineSIMD<HALF_WIDTH>>("DYUV SIMD"));
149155

150156
benchmarkCLUTLine<Video::decodeCLUTLine<WIDTH>>("CLUT Soft");
151157

src/CDI/Video/Renderer.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@ bool Renderer::isValidDisplayFormat(const DisplayFormat display) noexcept
3030
void Renderer::IncrementCursorTime(const double ns) noexcept
3131
{
3232
if(!m_cursorEnabled || m_cursorBlinkOff == 0)
33+
{
34+
m_cursorIsOn = true; // Reset to true if blink is disabled during the off/complement pattern.
3335
return; // OFF == 0 means ON indefinitely.
36+
}
3437

3538
double delta = m_60FPS ? DELTA_60FPS : DELTA_50FPS;
3639
if(m_cursorIsOn)
@@ -133,10 +136,10 @@ bool Renderer::isAllowedImageCodingCombination(ImageCodingMethod planeA, ImageCo
133136
if(planeB == RGB555 && planeA != OFF)
134137
return false;
135138

136-
if((planeA == CLUT8 || planeA == CLUT77) && planeB != DYUV)
139+
if((planeA == CLUT8 || planeA == CLUT77) && (planeB != DYUV && planeB != OFF))
137140
return false;
138141

139-
// If I implement QHY, if(planeB == QHY && planeA != DYUV) return false;
142+
// If I implement QHY, if(planeB == QHY && (planeA != DYUV && planeA != OFF)) return false;
140143
return true;
141144
}
142145

src/CDI/Video/RendererDCP.cpp

Lines changed: 75 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,26 @@ bool Renderer::ExecuteDCPInstruction(const uint32_t instruction) noexcept
117117
return false;
118118
}
119119

120-
if constexpr(PLANE == A) // Plane A-only instructions.
120+
// Common instructions.
121+
switch(code)
121122
{
122-
switch(code)
123+
case NoOperation:
124+
case LoadControlTableLineStartPointer: // Avoid going in the default case.
125+
case LoadDisplayLineStartPointer:
126+
break;
127+
128+
case SignalScanLine: // Signal when scan reaches this line.
129+
return true;
130+
131+
case LoadDisplayParameters: // Load display parameters.
132+
m_imageType[PLANE] = decodeImageType(bits<0, 1>(instruction));
133+
m_pixelRepeatFactor[PLANE] = 1 << (1 + bits<2, 3>(instruction));
134+
m_bps[PLANE] = decodeBitsPerPixel(bits<8, 9>(instruction));
135+
break;
136+
137+
case SelectImageCodingMethod: // Select image coding methods.
138+
if constexpr(PLANE == A)
123139
{
124-
case SelectImageCodingMethod: // Select image coding methods.
125140
m_clutSelectHigh = bit<22>(instruction);
126141
m_matteNumber = bit<19>(instruction);
127142
m_codingMethod[B] = decodeCodingMethod1(bits<8, 11>(instruction));
@@ -131,91 +146,87 @@ bool Renderer::ExecuteDCPInstruction(const uint32_t instruction) noexcept
131146
static_cast<int>(m_codingMethod[A]), static_cast<int>(m_codingMethod[B]));
132147
// TODO: what to do with external video enabled?
133148
m_externalVideo = bit<18>(instruction);
134-
return false;
149+
}
150+
return false;
135151

136-
case LoadTransparencyControl: // Load transparency control information.
152+
case LoadTransparencyControl: // Load transparency control information.
153+
if constexpr(PLANE == A)
154+
{
137155
m_mix = !bit<23>(instruction);
138156
m_transparencyControl[B] = bits<8, 11>(instruction);
139157
m_transparencyControl[A] = bits<0, 3>(instruction);
140-
return false;
158+
}
159+
return false;
141160

142-
case LoadPlaneOrder: // Load plane order.
161+
case LoadPlaneOrder: // Load plane order.
162+
if constexpr(PLANE == A)
143163
m_planeOrder = bit<0>(instruction);
144-
return false;
164+
return false;
165+
166+
case SetCLUTBank: // Set CLUT bank.
167+
m_clutBank = bits<0, 1>(instruction);
168+
break;
145169

146-
case LoadTransparentColorA: // Load transparent color for plane A.
170+
case LoadTransparentColorA: // Load transparent color for plane A.
171+
if constexpr(PLANE == A)
147172
m_transparentColorRgb[A] = dcpExtractCommand(instruction);
148-
return false;
173+
return false;
174+
175+
case LoadTransparentColorB: // Load transparent color for plane B.
176+
if constexpr(PLANE == B)
177+
m_transparentColorRgb[B] = dcpExtractCommand(instruction);
178+
return false;
149179

150-
case LoadMaskColorA: // Load mask color for plane A.
180+
case LoadMaskColorA: // Load mask color for plane A.
181+
if constexpr(PLANE == A)
151182
m_maskColorRgb[A] = dcpExtractCommand(instruction);
152-
return false;
183+
return false;
153184

154-
case LoadDYUVStartValueA: // Load DYUV start value for plane A.
185+
case LoadMaskColorB: // Load mask color for plane B.
186+
if constexpr(PLANE == B)
187+
m_maskColorRgb[B] = dcpExtractCommand(instruction);
188+
return false;
189+
190+
case LoadDYUVStartValueA: // Load DYUV start value for plane A.
191+
if constexpr(PLANE == A)
155192
m_dyuvInitialValue[A] = dcpExtractCommand(instruction);
156-
return false;
193+
return false;
157194

158-
case LoadBackdropColor: // Load backdrop color.
195+
case LoadDYUVStartValueB: // Load DYUV start value for plane B.
196+
if constexpr(PLANE == B)
197+
m_dyuvInitialValue[B] = dcpExtractCommand(instruction);
198+
return false;
199+
200+
case LoadBackdropColor: // Load backdrop color.
201+
if constexpr(PLANE == A)
159202
m_backdropColor = bits<0, 3>(instruction);
160-
return false;
203+
return false;
161204

162-
case LoadMosaicFactorA: // Load mosaic pixel hold factor for A.
205+
case LoadMosaicFactorA: // Load mosaic pixel hold factor for A.
206+
if constexpr(PLANE == A)
207+
{
163208
m_holdEnabled[A] = bit<23>(instruction);
164209
m_holdFactor[A] = instruction;
165-
return false;
166-
167-
case LoadImageContributionFactorA: // Load image contribution factor for A.
168-
m_icf[A] = bits<0, 5>(instruction);
169-
return false;
170210
}
171-
}
172-
else // Plane B-only instructions.
173-
{
174-
switch(code)
175-
{
176-
case LoadTransparentColorB: // Load transparent color for plane B.
177-
m_transparentColorRgb[B] = dcpExtractCommand(instruction);
178-
return false;
179-
180-
case LoadMaskColorB: // Load mask color for plane B.
181-
m_maskColorRgb[B] = dcpExtractCommand(instruction);
182-
return false;
183-
184-
case LoadDYUVStartValueB: // Load DYUV start value for plane B.
185-
m_dyuvInitialValue[B] = dcpExtractCommand(instruction);
186-
return false;
211+
return false;
187212

188-
case LoadMosaicFactorB: // Load mosaic pixel hold factor for B.
213+
case LoadMosaicFactorB: // Load mosaic pixel hold factor for B.
214+
if constexpr(PLANE == B)
215+
{
189216
m_holdEnabled[B] = bit<23>(instruction);
190217
m_holdFactor[B] = instruction;
191-
return false;
192-
193-
case LoadImageContributionFactorB: // Load image contribution factor for B.
194-
m_icf[B] = bits<0, 5>(instruction);
195-
return false;
196218
}
197-
}
198-
199-
// Common instructions.
200-
switch(code)
201-
{
202-
case NoOperation:
203-
case LoadControlTableLineStartPointer: // Avoid going in the default case.
204-
case LoadDisplayLineStartPointer:
205-
break;
206-
207-
case SignalScanLine: // Signal when scan reaches this line.
208-
return true;
219+
return false;
209220

210-
case LoadDisplayParameters: // Load display parameters.
211-
m_imageType[PLANE] = decodeImageType(bits<0, 1>(instruction));
212-
m_pixelRepeatFactor[PLANE] = 1 << (1 + bits<2, 3>(instruction));
213-
m_bps[PLANE] = decodeBitsPerPixel(bits<8, 9>(instruction));
214-
break;
221+
case LoadImageContributionFactorA: // Load image contribution factor for A.
222+
if constexpr(PLANE == A)
223+
m_icf[A] = bits<0, 5>(instruction);
224+
return false;
215225

216-
case SetCLUTBank: // Set CLUT bank.
217-
m_clutBank = bits<0, 1>(instruction);
218-
break;
226+
case LoadImageContributionFactorB: // Load image contribution factor for B.
227+
if constexpr(PLANE == B)
228+
m_icf[B] = bits<0, 5>(instruction);
229+
return false;
219230

220231
default:
221232
std::println(stderr, "Unknow DCP instruction {:#X}", instruction);

src/CDI/Video/RendererSIMD.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,15 @@ std::pair<uint16_t, uint16_t> RendererSIMD::DrawLine(const uint8_t* lineA, const
3939
uint16_t height = GetDisplayHeight();
4040

4141
m_screen.m_width = m_plane[A].m_width = m_plane[B].m_width = width * 2;
42-
m_screen.m_height = m_plane[A].m_height = m_plane[B].m_height = height;
42+
m_screen.m_height = m_plane[A].m_height = m_plane[B].m_height = m_backdropPlane.m_height = height;
4343
}
4444

4545
ResetMatte();
4646

47-
const uint16_t bytesA = DrawLinePlane<A>(lineA, nullptr); // nullptr because plane A can't decode RGB555.
47+
uint16_t bytesA = DrawLinePlane<A>(lineA, nullptr); // nullptr because plane A can't decode RGB555.
4848
const uint16_t bytesB = DrawLinePlane<B>(lineB, lineA);
49+
if(m_codingMethod[B] == ImageCodingMethod::RGB555)
50+
bytesA = bytesB;
4951

5052
DrawLineBackdrop();
5153

@@ -196,7 +198,7 @@ static inline constexpr SIMDNativePixel ALPHA_MASKK{0xFF'00'00'00};
196198
* \tparam SIMD The SIMD type holding signed 32 bits integers.
197199
*/
198200
template<typename SIMD>
199-
static constexpr void applyICFMixSIMDShift(Pixel* screen, const Pixel* planeFront, const Pixel* planeBack, const uint8_t* icfFront, const uint8_t* icfBack) noexcept
201+
static constexpr void applyICFMixSIMDShift(Pixel* screen, const Pixel* planeFront, const Pixel* planeBack, const uint8_t* icfFront, const uint8_t* icfBack, const uint32_t backdrop) noexcept
200202
{
201203
SIMD icfF{icfFront, stdx::element_aligned};
202204
SIMD icfB{icfBack, stdx::element_aligned};
@@ -267,6 +269,7 @@ static constexpr void applyICFMixSIMDShift(Pixel* screen, const Pixel* planeFron
267269
bfp = stdx::clamp(bfp, U8_MIN, U8_MAX);
268270

269271
const SIMD result = (rfp << 16) | (gfp << 8) | bfp;
272+
stdx::where(maskF && maskB, result) = backdrop;
270273

271274
result.copy_to(screen->AsU32Pointer(), stdx::element_aligned);
272275
}
@@ -276,7 +279,7 @@ static constexpr void applyICFMixSIMDShift(Pixel* screen, const Pixel* planeFron
276279
/** \brief Applies ICF and mixes using SIMD (algorithm that casts the registers to access RGB components).
277280
* This can't be used with fixed-sized SIMD because fixed_sized_simd is not trivially copyable.
278281
*/
279-
static constexpr void applyICFMixSIMDCast(Pixel* screen, const Pixel* planeFront, const Pixel* planeBack, const uint8_t* icfFront, const uint8_t* icfBack) noexcept
282+
static constexpr void applyICFMixSIMDCast(Pixel* screen, const Pixel* planeFront, const Pixel* planeBack, const uint8_t* icfFront, const uint8_t* icfBack, const uint32_t backdrop) noexcept
280283
{
281284
SIMDNativePixel icfF{icfFront, stdx::element_aligned};
282285
SIMDNativePixel icfB{icfBack, stdx::element_aligned};
@@ -322,13 +325,14 @@ static constexpr void applyICFMixSIMDCast(Pixel* screen, const Pixel* planeFront
322325
// rgbB16 >>= 6;
323326

324327
rgbF16 += 16;
325-
// rgbB16 += 16; Don't add 16 to back plane when applying ICF because the below mixing subtracts it.
328+
// Don't add 16 to back plane when applying ICF because the below mixing subtracts it.
326329

327330
rgbF16 += rgbB16;
328331

329332
rgbF16 = stdx::clamp(rgbF16, U8_MINN, U8_MAXX);
330333

331-
const SIMDNativePixel result = std::bit_cast<SIMDNativePixel>(stdx::static_simd_cast<SIMDNativeU8>(rgbF16));
334+
SIMDNativePixel result = std::bit_cast<SIMDNativePixel>(stdx::static_simd_cast<SIMDNativeU8>(rgbF16));
335+
stdx::where(maskF && maskB, result) = backdrop;
332336

333337
result.copy_to(screen->AsU32Pointer(), stdx::element_aligned);
334338
}
@@ -506,7 +510,7 @@ void RendererSIMD::HandleOverlayMixSIMD() noexcept
506510
planeFront += SIMD_SIZE, planeBack += SIMD_SIZE, icfFront += SIMD_SIZE, icfBack += SIMD_SIZE, screen += SIMD_SIZE)
507511
{
508512
if constexpr(MIX) // Mixing.
509-
applyICFMixSIMDCast(screen, planeFront, planeBack, icfFront, icfBack);
513+
applyICFMixSIMDCast(screen, planeFront, planeBack, icfFront, icfBack, m_backdropPlane.GetLinePointer(m_lineNumber)->AsU32());
510514
// applyICFMixSIMDShift<SIMDNativePixelSigned>(screen, planeFront, planeBack, icfFront, icfBack);
511515
else // Overlay.
512516
applyICFOverlaySIMDCast(screen, planeFront, planeBack, icfFront, icfBack, m_backdropPlane.GetLinePointer(m_lineNumber)->AsU32());
@@ -517,7 +521,7 @@ void RendererSIMD::HandleOverlayMixSIMD() noexcept
517521
{
518522
// Now the remaining width is less than a SIMD register.
519523
if constexpr(MIX)
520-
applyICFMixSIMDShift<SIMDFixedPixelSigned<WIDTH_REMINDER>>(screen, planeFront, planeBack, icfFront, icfBack);
524+
applyICFMixSIMDShift<SIMDFixedPixelSigned<WIDTH_REMINDER>>(screen, planeFront, planeBack, icfFront, icfBack, m_backdropPlane.GetLinePointer(m_lineNumber)->AsU32());
521525
else
522526
applyICFOverlaySIMDShift<SIMDFixedPixelSigned<WIDTH_REMINDER>>(screen, planeFront, planeBack, icfFront, icfBack, m_backdropPlane.GetLinePointer(m_lineNumber)->AsU32());
523527
}

src/CDI/Video/RendererSoftware.cpp

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,15 @@ std::pair<uint16_t, uint16_t> RendererSoftware::DrawLine(const uint8_t* lineA, c
2828
uint16_t height = GetDisplayHeight();
2929

3030
m_screen.m_width = m_plane[A].m_width = m_plane[B].m_width = width * 2;
31-
m_screen.m_height = m_plane[A].m_height = m_plane[B].m_height = height;
31+
m_screen.m_height = m_plane[A].m_height = m_plane[B].m_height = m_backdropPlane.m_height = height;
3232
}
3333

3434
ResetMatte();
3535

36-
const uint16_t bytesA = DrawLinePlane<A>(lineA, nullptr); // nullptr because plane A can't decode RGB555.
36+
uint16_t bytesA = DrawLinePlane<A>(lineA, nullptr); // nullptr because plane A can't decode RGB555.
3737
const uint16_t bytesB = DrawLinePlane<B>(lineB, lineA);
38+
if(m_codingMethod[B] == ImageCodingMethod::RGB555)
39+
bytesA = bytesB;
3840

3941
DrawLineBackdrop();
4042

@@ -202,17 +204,24 @@ void RendererSoftware::OverlayMix() noexcept
202204

203205
if constexpr(MIX)
204206
{
205-
if(fp.a == 0) // When mixing transparent pixels are black (V.5.9.1).
207+
if(fp.a == 0 && bp.a == 0) // Front and back plane transparent: only show background.
206208
{
207-
fp = BLACK_PIXEL;
209+
*screen++ = backdrop;
208210
}
209-
210-
if(bp.a == 0)
211+
else
211212
{
212-
bp = BLACK_PIXEL;
213-
}
213+
if(fp.a == 0) // When mixing transparent pixels are black (V.5.9.1).
214+
{
215+
fp = BLACK_PIXEL;
216+
}
217+
218+
if(bp.a == 0)
219+
{
220+
bp = BLACK_PIXEL;
221+
}
214222

215-
mix(*screen++, fp, bp);
223+
mix(*screen++, fp, bp);
224+
}
216225
}
217226
else // Overlay.
218227
{

0 commit comments

Comments
 (0)