Skip to content

Commit a4b9937

Browse files
committed
1.4.0 ig
1 parent 431fd32 commit a4b9937

File tree

5 files changed

+392
-137
lines changed

5 files changed

+392
-137
lines changed

KeyLighting/CPUImageProcessor.cs

Lines changed: 100 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using System.Drawing;
33
using System.Drawing.Drawing2D;
44
using System.Drawing.Imaging;
5+
using System.Numerics;
56
using System.Runtime.CompilerServices;
67
using System.Runtime.InteropServices;
78
using System.Threading.Tasks;
@@ -247,23 +248,44 @@ private void ExtractColumns24Bpp(int stride, int width, int height)
247248
{
248249
unchecked
249250
{
250-
uint totalR = 0, totalG = 0, totalB = 0;
251251
int pixelCount = height;
252252
int columnOffset = x * 3;
253-
254253
int y = 0;
255-
for (; y < height - 3; y += 4)
254+
255+
Vector<ulong> sumR = Vector<ulong>.Zero;
256+
Vector<ulong> sumG = Vector<ulong>.Zero;
257+
Vector<ulong> sumB = Vector<ulong>.Zero;
258+
int vectorSize = Vector<byte>.Count;
259+
260+
// Vectorized sum
261+
for (; y <= height - vectorSize; y += vectorSize)
256262
{
257-
int offset1 = y * stride + columnOffset;
258-
int offset2 = (y + 1) * stride + columnOffset;
259-
int offset3 = (y + 2) * stride + columnOffset;
260-
int offset4 = (y + 3) * stride + columnOffset;
261-
262-
totalB += (uint)pixelBuffer[offset1] + pixelBuffer[offset2] + pixelBuffer[offset3] + pixelBuffer[offset4];
263-
totalG += (uint)pixelBuffer[offset1 + 1] + pixelBuffer[offset2 + 1] + pixelBuffer[offset3 + 1] + pixelBuffer[offset4 + 1];
264-
totalR += (uint)pixelBuffer[offset1 + 2] + pixelBuffer[offset2 + 2] + pixelBuffer[offset3 + 2] + pixelBuffer[offset4 + 2];
263+
Span<byte> colBytes = stackalloc byte[vectorSize * 3];
264+
for (int v = 0; v < vectorSize; v++)
265+
{
266+
int offset = (y + v) * stride + columnOffset;
267+
colBytes[v * 3 + 0] = pixelBuffer[offset];
268+
colBytes[v * 3 + 1] = pixelBuffer[offset + 1];
269+
colBytes[v * 3 + 2] = pixelBuffer[offset + 2];
270+
}
271+
272+
var vec = new Vector<byte>(colBytes);
273+
274+
// Extract R, G, B channels and sum
275+
ulong r = 0, g = 0, b = 0;
276+
for (int v = 0; v < vectorSize; v++)
277+
{
278+
b += vec[v * 3 + 0];
279+
g += vec[v * 3 + 1];
280+
r += vec[v * 3 + 2];
281+
}
282+
sumR += new Vector<ulong>(r);
283+
sumG += new Vector<ulong>(g);
284+
sumB += new Vector<ulong>(b);
265285
}
266286

287+
// Scalar sum for remaining pixels
288+
ulong totalR = 0, totalG = 0, totalB = 0;
267289
for (; y < height; y++)
268290
{
269291
int offset = y * stride + columnOffset;
@@ -272,56 +294,69 @@ private void ExtractColumns24Bpp(int stride, int width, int height)
272294
totalR += pixelBuffer[offset + 2];
273295
}
274296

275-
byte avgR = (byte)(totalR / pixelCount);
276-
byte avgG = (byte)(totalG / pixelCount);
277-
byte avgB = (byte)(totalB / pixelCount);
297+
// Add vectorized sums
298+
for (int i = 0; i < Vector<ulong>.Count; i++)
299+
{
300+
totalR += sumR[i];
301+
totalG += sumG[i];
302+
totalB += sumB[i];
303+
}
304+
305+
byte avgR = (byte)(totalR / (ulong)pixelCount);
306+
byte avgG = (byte)(totalG / (ulong)pixelCount);
307+
byte avgB = (byte)(totalB / (ulong)pixelCount);
278308

279309
rawColors[x] = new OpenRGB.NET.Color(avgR, avgG, avgB);
280310
}
281311
});
282312
}
283313

314+
315+
// Move the stackalloc and Vector<byte> creation outside the vectorized loop
284316
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
285317
private void ExtractColumns32Bpp(int stride, int width, int height)
286318
{
287319
Parallel.For(0, width, new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }, x =>
288320
{
289321
unchecked
290322
{
291-
uint totalR = 0, totalG = 0, totalB = 0;
292323
int pixelCount = height;
293324
int columnOffset = x * 4;
325+
ulong totalR = 0, totalG = 0, totalB = 0;
294326

327+
// Process in blocks for cache efficiency
328+
int blockSize = 32; // Tune for your CPU cache
295329
int y = 0;
296-
for (; y < height - 3; y += 4)
330+
for (; y <= height - blockSize; y += blockSize)
297331
{
298-
int offset1 = y * stride + columnOffset;
299-
int offset2 = (y + 1) * stride + columnOffset;
300-
int offset3 = (y + 2) * stride + columnOffset;
301-
int offset4 = (y + 3) * stride + columnOffset;
302-
303-
totalB += (uint)(pixelBuffer[offset1] + pixelBuffer[offset2] + pixelBuffer[offset3] + pixelBuffer[offset4]);
304-
totalG += (uint)pixelBuffer[offset1 + 1] + pixelBuffer[offset2 + 1] + pixelBuffer[offset3 + 1] + pixelBuffer[offset4 + 1];
305-
totalR += (uint)pixelBuffer[offset1 + 2] + pixelBuffer[offset2 + 2] + pixelBuffer[offset3 + 2] + pixelBuffer[offset4 + 2];
332+
for (int b = 0; b < blockSize; b++)
333+
{
334+
int offset = (y + b) * stride + columnOffset;
335+
totalB += pixelBuffer![offset];
336+
totalG += pixelBuffer![offset + 1];
337+
totalR += pixelBuffer![offset + 2];
338+
}
306339
}
307-
340+
// Process remaining pixels
308341
for (; y < height; y++)
309342
{
310343
int offset = y * stride + columnOffset;
311-
totalB += pixelBuffer[offset];
312-
totalG += pixelBuffer[offset + 1];
313-
totalR += pixelBuffer[offset + 2];
344+
totalB += pixelBuffer![offset];
345+
totalG += pixelBuffer![offset + 1];
346+
totalR += pixelBuffer![offset + 2];
314347
}
315348

316-
byte avgR = (byte)(totalR / pixelCount);
317-
byte avgG = (byte)(totalG / pixelCount);
318-
byte avgB = (byte)(totalB / pixelCount);
349+
byte avgR = (byte)(totalR / (ulong)pixelCount);
350+
byte avgG = (byte)(totalG / (ulong)pixelCount);
351+
byte avgB = (byte)(totalB / (ulong)pixelCount);
319352

320353
rawColors[x] = new OpenRGB.NET.Color(avgR, avgG, avgB);
321354
}
322355
});
323356
}
324357

358+
359+
325360
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
326361
private void ProcessColumnsWithEffects(int width, double brightness, double vibrance, double contrast, int darkThreshold, double darkFactor)
327362
{
@@ -394,19 +429,48 @@ private OpenRGB.NET.Color FastApplyEffects(byte r, byte g, byte b, double bright
394429
}
395430

396431
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
432+
// Vectorized LUT initialization
397433
private void InitializeLuts(double brightness, double contrast)
398434
{
399-
for (int i = 0; i < 256; i++)
435+
// Vectorized brightness LUT
436+
if (Vector.IsHardwareAccelerated)
400437
{
438+
var brightnessVec = new Vector<float>((float)brightness);
439+
int vecSize = Vector<float>.Count;
440+
int i = 0;
441+
for (; i <= 256 - vecSize; i += vecSize)
442+
{
443+
var indices = new Vector<float>(Enumerable.Range(i, vecSize).Select(x => (float)x).ToArray());
444+
var result = Vector.Multiply(indices, brightnessVec);
445+
for (int j = 0; j < vecSize; j++)
446+
{
447+
brightnessLut[i + j] = (byte)Math.Clamp((int)result[j], 0, 255);
448+
}
449+
}
450+
// Handle any remaining elements
451+
for (; i < 256; i++)
452+
{
453+
int brightVal = (int)(i * brightness);
454+
brightnessLut[i] = (byte)Math.Clamp(brightVal, 0, 255);
455+
}
456+
}
457+
else
458+
{
459+
for (int i = 0; i < 256; i++)
460+
{
461+
int brightVal = (int)(i * brightness);
462+
brightnessLut[i] = (byte)Math.Clamp(brightVal, 0, 255);
463+
}
464+
}
401465

402-
int brightVal = (int)(i * brightness);
403-
brightnessLut[i] = (byte)Math.Min(Math.Max(brightVal, 0), 255);
404-
466+
// Contrast LUT (not vectorized)
467+
for (int i = 0; i < 256; i++)
468+
{
405469
if (Math.Abs(contrast - 1.0) > 0.001)
406470
{
407471
double normalized = i / 255.0;
408472
double adjusted = Math.Pow(normalized, contrast) * 255.0;
409-
contrastLut[i] = (byte)Math.Min(Math.Max((int)adjusted, 0), 255);
473+
contrastLut[i] = (byte)Math.Clamp((int)adjusted, 0, 255);
410474
}
411475
else
412476
{

KeyLighting/KeyLighting.csproj

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,6 @@
1717
<PackageReference Include="System.Drawing.Common" Version="9.0.4" />
1818
<PackageReference Include="System.Linq" Version="4.3.0" />
1919
<PackageReference Include="System.Threading" Version="4.3.0" />
20-
<PackageReference Include="Topshelf" Version="4.3.0" />
21-
<PackageReference Include="Vortice.D3DCompiler" Version="3.6.2" />
22-
<PackageReference Include="Vortice.Direct3D11" Version="3.6.2" />
23-
<PackageReference Include="Vortice.DXGI" Version="3.6.2" />
24-
<PackageReference Include="Vortice.Mathematics" Version="1.9.3" />
2520
</ItemGroup>
2621

2722
<ItemGroup>

KeyLighting/Program.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,14 @@ static void Main(string[] args)
5656
var devices = client.GetAllControllerData();
5757
int keyboardIndex = -1;
5858
int ledCount = 0;
59+
//foreach (var device in devices)
60+
//{
61+
// Console.WriteLine($"Name: {device.Name}, Type: {device.Type}, LEDs: {device.Leds.Length}");
62+
//}
5963

6064
for (int i = 0; i < devices.Length; i++)
6165
{
62-
if (devices[i].Type == DeviceType.Keyboard && devices[i].Name.Contains("Scope"))
66+
if (devices[i].Type == DeviceType.Keyboard)
6367
{
6468
keyboardIndex = i;
6569
ledCount = devices[i].Leds.Length;

0 commit comments

Comments
 (0)