-
Notifications
You must be signed in to change notification settings - Fork 2
Description
Hi!
We have had contact on Reddit. I am pretty sure that your SIMD code doesnt work correctly and it also has design fault.
I have rewritten both the Scalar and the SIMD code, mainly to make it simpler. Take what you think useful.
I very much like your buffer size to x-y-dimensions calculations but I think it is an unnecessary overkill. So I just take the Y-resolution (number of vertical pixels) and calculate the X-resolution (horizontal pixels). Instead of resolution correction, I only accept y-resolution which results in whole-number X-resolution.
SIMD:
Where I think your implementation is problematic are the following items:
-
You have in-memory arrays for x and y values which are read into Vectors. That is unnecessary and thus suboptimal as SIMD code can easily get memory-bound.
You can calculate the running X and Y vectors purely from registers (setting initial value and increment with step size) without touching memory. -
I also get at
testSpan[resVectorNumber] = Avx.Add(xSquVec, ySquVec);an index out of range exception
I have used only SSE4.2 as my machine is not AVX2 capable.
If you add SixLabors.ImageSharp; from NuGet you can actually get an image of the calculated fractal!
using System;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using SixLabors.ImageSharp;
using SixLabors.ImageSharp.PixelFormats;
namespace MandelBrot2
{
public static unsafe class Mandelbrot
{
const float TOP_Y = 1f;
const float BOTTOM_Y = -1f;
const float RIGHT_X = 1f;
const float LEFT_X = -2.5f;
public static int[] ScalarMandel(int resolutionY, int maxIterations)
{
if (resolutionY % 4 != 0)
throw new ArgumentException("resolutionY must be divisible by four");
int resolutionX = resolutionY / 4 * 7;
int[] result = new int[resolutionX * resolutionY];
float step = 2f / resolutionY;
float currentY = 1f;
for (int i = 0; i < resolutionY; i++)
{
float currentX = -2.5f;
for (int j = 0; j < resolutionX; j++)
{
var iteration = 0;
var xSquare = 0f;
var ySquare = 0f;
var sumSquare = 0f;
float x, y;
while (xSquare + ySquare <= 4f && iteration < maxIterations)
{
x = xSquare - ySquare + currentX;
y = sumSquare - xSquare - ySquare + currentY;
xSquare = x * x;
ySquare = y * y;
sumSquare = (x + y) * (x + y);
iteration++;
}
result[i * resolutionX + j] = iteration;
currentX += step;
}
currentY -= step;
}
return result;
}
/// <summary>
/// Computes an array representation of the Mandelbrot shape
/// </summary>
/// <returns>result is an array with values indicating for each point on the X-Y plane whether
/// the point is inside or outside the Mandelbrot shape.
/// result[i] equals the number of the iterations performed on the particalar element.
/// If it has a value below maxIterations, than it is outside the mandelbrot shape
/// </returns>
public static unsafe int[] VectorMandel(int resolutionY, int maxIterations)
{
const int VECTOR_SIZE = 4;
// ensure that resolutionX is divisible by SIMD-width
// ensure that resolutionY / 2.0 * 3.5 is integer
// this simplifies calculations
if (resolutionY % (4 * VECTOR_SIZE) != 0)
throw new ArgumentException($"resolutionY must be divisible by {VECTOR_SIZE * 4}");
int resolutionX = resolutionY / 4 * 7;
// distance between neighbouring points on the X-Y plane
float step = 2f / resolutionY;
// stepper vectors to easily step in Y and X directions
var yStepVec = Vector128.Create(-step);
var xStepVec = Vector128.Create(VECTOR_SIZE*step);
// always contains the current Y-axis position. We begin at the top most horizontal line
// will be updated inside loop. All SIMD-components of currentYvec are always the same
var currentYvec = Vector128.Create(1f);
var result = new int[resolutionX * resolutionY];
fixed (int* resultFixedPtr = result)
{
int* resultPtr = resultFixedPtr;
// outer loop: iterate along Y axis from top to bottom, one step at a time
for (int i = 0; i < resolutionY; i++)
{
// initialize the starting X vector to the leftmost X positions
var currentXvec = Vector128.Create(-2.5f, -2.5f + step, -2.5f + 2 * step, -2.5f + 3 * step);
// iterate along the X axis with "VECTOR_SIZE" number of elements at a time
// the value of "currentXvec" is updated at the end of the loop to always represent the working elements
for (int j = 0; j <= resolutionX - VECTOR_SIZE; j += VECTOR_SIZE)
{
int iteration = 0;
var iterationCounter = Vector128.Create(0);
Vector128<float> xVec, yVec;
var xVecSquare = Vector128.Create(0f);
var yVecSquare = Vector128.Create(0f);
var sumVecSquare = Vector128.Create(0f);
var fourVec = Vector128.Create(4f);
var oneVec = Vector128.Create(1);
while (iteration < maxIterations)
{
var test = Sse42.CompareLessThanOrEqual(Sse42.Add(xVecSquare, yVecSquare), fourVec);
// no components inside tolerance, no work left to do
if (Sse42.MoveMask(test) == 0)
break;
// mask the incrementer vector to increment only the positions still inside Mandelbrot-tolerance
var selectiveAdd = Sse42.And(oneVec, test.AsInt32());
iterationCounter = Sse42.Add(iterationCounter, selectiveAdd);
iteration++;
// set up values for next iteration
xVec = Sse42.Add(Sse42.Subtract(xVecSquare, yVecSquare), currentXvec);
yVec = Sse42.Add(Sse42.Subtract(sumVecSquare, Sse42.Add(xVecSquare, yVecSquare)), currentYvec);
xVecSquare = Sse42.Multiply(xVec, xVec);
yVecSquare = Sse42.Multiply(yVec, yVec);
sumVecSquare = Sse.Multiply(Sse42.Add(xVec, yVec), Sse42.Add(xVec, yVec));
}
// at this point the iterations counts for a whole vector has been calculated
// resulting values are stored
Sse42.Store(resultPtr, iterationCounter);
resultPtr += VECTOR_SIZE;
// move along the X axis by four step-s
currentXvec = Sse42.Add(currentXvec, xStepVec);
}
// move along the X axis by one step
currentYvec = Sse42.Add(currentYvec, yStepVec);
}
}
return result;
}
}
class Program
{
static void Main(string[] args)
{
int yLen = 1600;
int maxIteration = 10000;
var result1 = Mandelbrot.VectorMandel(yLen, maxIteration);
var xLen = result1.Length / yLen;
using (var image = new Image<Rgba32>(xLen, yLen))
{
for (int i = 0; i < result1.Length; i++)
{
image[i % xLen, i / xLen] =
result1[i] == maxIteration ? Rgba32.Black : Rgba32.White;
}
image.Save("mandel1.bmp");
}
}
}
}