Skip to content

Commit 2f817ba

Browse files
committed
Fix benchmarkRenderers, implement a real SIMD DrawCursor and benchmark it
1 parent 6557962 commit 2f817ba

File tree

2 files changed

+50
-34
lines changed

2 files changed

+50
-34
lines changed

benchmarks/benchmarkRenderers.cpp

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77
#include <string_view>
88

99
static constexpr size_t WIDTH = 768;
10+
static constexpr size_t HEIGHT = 280;
1011
static constexpr size_t FRAMES = 10000;
12+
static constexpr size_t FRAMES_CURSOR = 10'000'000;
1113

1214
static constexpr std::array<uint8_t, WIDTH> LINEA{};
1315
static constexpr std::array<uint8_t, WIDTH> LINEB{};
@@ -23,15 +25,14 @@ static void benchmarkRenderer(std::string_view name)
2325
renderer.m_codingMethod[B] = CODINGB;
2426
renderer.m_bps[A] = BPS;
2527
renderer.m_bps[B] = BPS;
28+
renderer.SetDisplayFormat(Video::Renderer::DisplayFormat::PAL);
2629
renderer.m_mix = true;
2730

28-
const size_t height = renderer.m_plane[A].m_height;
29-
3031
// Benchmark
3132
const std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
3233
for(size_t f = 0; f < FRAMES; ++f)
3334
{
34-
for(size_t y = 0; y < height; ++y)
35+
for(size_t y = 0; y < HEIGHT; ++y)
3536
{
3637
renderer.DrawLine(LINEA.data(), LINEB.data());
3738
}
@@ -41,21 +42,48 @@ static void benchmarkRenderer(std::string_view name)
4142
const std::chrono::high_resolution_clock::time_point finish = std::chrono::high_resolution_clock::now();
4243
const std::chrono::nanoseconds delta = finish - start;
4344

44-
std::print("{} ", name);
45-
std::println("{} {} {}/f {}",
45+
std::println("{} {} {} {}/f {}",
46+
name,
4647
delta,
4748
std::chrono::duration_cast<std::chrono::microseconds>(delta),
4849
std::chrono::duration_cast<std::chrono::microseconds>(delta / FRAMES),
4950
std::chrono::duration_cast<std::chrono::milliseconds>(delta)
5051
);
5152
}
5253

54+
template<typename RENDERER>
55+
static void benchmarkRendererCursor(std::string_view name)
56+
{
57+
RENDERER renderer;
58+
renderer.SetDisplayFormat(Video::Renderer::DisplayFormat::PAL);
59+
60+
// Benchmark
61+
const std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now();
62+
for(size_t f = 0; f < FRAMES_CURSOR; ++f)
63+
{
64+
renderer.DrawCursor();
65+
}
66+
const std::chrono::high_resolution_clock::time_point finish = std::chrono::high_resolution_clock::now();
67+
const std::chrono::nanoseconds delta = finish - start;
68+
69+
std::println("{} {} {} {}/f {}",
70+
name,
71+
delta,
72+
std::chrono::duration_cast<std::chrono::microseconds>(delta),
73+
delta / FRAMES_CURSOR,
74+
std::chrono::duration_cast<std::chrono::milliseconds>(delta)
75+
);
76+
}
77+
5378
int main()
5479
{
5580
constexpr Video::Renderer::BitsPerPixel NORMAL_8 = Video::Renderer::BitsPerPixel::Normal8;
5681
constexpr Video::Renderer::BitsPerPixel DOUBLE_4 = Video::Renderer::BitsPerPixel::Double4;
5782
// constexpr Video::Renderer::BitsPerPixel HIGH_8 = Video::Renderer::BitsPerPixel::High8;
5883

84+
benchmarkRendererCursor<Video::RendererSoftware>("Cursor Soft");
85+
benchmarkRendererCursor<Video::RendererSIMD>("Cursor SIMD");
86+
5987
benchmarkRenderer<Video::RendererSoftware, NORMAL_8, ICM(OFF), ICM(RGB555)>("Normal Soft RGB555");
6088
benchmarkRenderer<Video::RendererSIMD, NORMAL_8, ICM(OFF), ICM(RGB555)>("Normal SIMD RGB555");
6189

src/CDI/Video/RendererSIMD.cpp

Lines changed: 17 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -132,37 +132,25 @@ void RendererSIMD::DrawCursor() noexcept
132132

133133
const Pixel color = backdropCursorColorToPixel(m_cursorColor);
134134

135-
Plane::iterator it = m_cursorPlane.begin();
136-
for(size_t y = 0; y < m_cursorPlane.m_height; ++y)
135+
using SIMDCursorLine = stdx::fixed_size_simd<uint32_t, 16>;
136+
using SIMDCursorLineMask = SIMDCursorLine::mask_type;
137+
static constexpr SIMDCursorLine SHIFTER([] (uint32_t i) { return 15 - i; });
138+
139+
int patternIndex = 0;
140+
for(Plane::iterator dst = m_cursorPlane.begin(); dst < m_cursorPlane.end(); dst += SIMDCursorLine::size(), ++patternIndex)
137141
{
138-
for(int x = static_cast<int>(m_cursorPlane.m_width) - 1; x >= 0; --x)
139-
{
140-
const uint16_t mask = (1 << x);
141-
if(m_cursorPatterns[y] & mask)
142-
*it = color;
143-
else
144-
*it = BLACK_PIXEL;
145-
++it;
146-
}
147-
}
142+
// Convert pattern to a mask.
143+
SIMDCursorLine patternSimd = m_cursorPatterns[patternIndex];
144+
patternSimd >>= SHIFTER;
145+
patternSimd &= 1;
146+
const SIMDCursorLineMask patternMask = patternSimd == 1;
148147

149-
// TODO: try in SIMD.
150-
// using FixedPixelSIMD = stdx::fixed_size_simd<uint32_t, 16>;
151-
// int pattern = 0;
152-
// for(PlaneSIMD::iterator it = m_cursorPlane.begin(); it < m_cursorPlane.end(); it += FixedPixelSIMD::size(), ++pattern)
153-
// {
154-
// FixedPixelSIMD pixel{&*it, stdx::element_aligned};
155-
//
156-
// for(int x = m_cursorPlane.m_width - 1, pix = 0; --x >= 0; pix++)
157-
// {
158-
// const uint16_t mask = (1 << x);
159-
// if(m_cursorPatterns[pattern] & mask)
160-
// pixel[pix] = color;
161-
// else
162-
// pixel[pix] = black;
163-
// }
164-
// pixel.copy_to(&*it, stdx::element_aligned);
165-
// }
148+
const SIMDCursorLine colorSimd{color.AsU32()};
149+
150+
SIMDCursorLine cursorPixels = BLACK_PIXEL.AsU32();
151+
stdx::where(patternMask, cursorPixels) = colorSimd;
152+
cursorPixels.copy_to(dst->AsU32Pointer(), stdx::element_aligned);
153+
}
166154
}
167155

168156
/** \brief Overlays or mix all the planes to the final screen.

0 commit comments

Comments
 (0)