Skip to content

Sse4.2 implementation #1

@jungjanos

Description

@jungjanos

Hi!

We have had contact on Reddit. I am pretty sure that your SIMD code doesnt work correctly and it also has design fault.

I have rewritten both the Scalar and the SIMD code, mainly to make it simpler. Take what you think useful.

I very much like your buffer size to x-y-dimensions calculations but I think it is an unnecessary overkill. So I just take the Y-resolution (number of vertical pixels) and calculate the X-resolution (horizontal pixels). Instead of resolution correction, I only accept y-resolution which results in whole-number X-resolution.

SIMD:
Where I think your implementation is problematic are the following items:

  1. You have in-memory arrays for x and y values which are read into Vectors. That is unnecessary and thus suboptimal as SIMD code can easily get memory-bound.
    You can calculate the running X and Y vectors purely from registers (setting initial value and increment with step size) without touching memory.

  2. I also get at testSpan[resVectorNumber] = Avx.Add(xSquVec, ySquVec); an index out of range exception

I have used only SSE4.2 as my machine is not AVX2 capable.
If you add SixLabors.ImageSharp; from NuGet you can actually get an image of the calculated fractal!

using System;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using SixLabors.ImageSharp;
using SixLabors.ImageSharp.PixelFormats;

namespace MandelBrot2
{
    public static unsafe class Mandelbrot
    {
        const float TOP_Y = 1f;
        const float BOTTOM_Y = -1f;
        const float RIGHT_X = 1f;
        const float LEFT_X = -2.5f;

        public static int[] ScalarMandel(int resolutionY, int maxIterations)
        {
            if (resolutionY % 4 != 0)
                throw new ArgumentException("resolutionY must be divisible by four");

            int resolutionX = resolutionY / 4 * 7;

            int[] result = new int[resolutionX * resolutionY];

            float step = 2f / resolutionY;

            float currentY = 1f;
            for (int i = 0; i < resolutionY; i++)
            {
                float currentX = -2.5f;
                for (int j = 0; j < resolutionX; j++)
                {
                    var iteration = 0;
                    var xSquare = 0f;
                    var ySquare = 0f;
                    var sumSquare = 0f;

                    float x, y;

                    while (xSquare + ySquare <= 4f && iteration < maxIterations)
                    {
                        x = xSquare - ySquare + currentX;
                        y = sumSquare - xSquare - ySquare + currentY;

                        xSquare = x * x;
                        ySquare = y * y;
                        sumSquare = (x + y) * (x + y);
                        iteration++;
                    }
                    result[i * resolutionX + j] = iteration;

                    currentX += step;
                }
                currentY -= step;
            }

            return result;
        }


        /// <summary>
        /// Computes an array representation of the Mandelbrot shape
        /// </summary>
        /// <returns>result is an array with values indicating for each point on the X-Y plane whether 
        /// the point is inside or outside the Mandelbrot shape. 
        /// result[i] equals the number of the iterations performed on the particalar element. 
        /// If it has a value below maxIterations, than it is outside the mandelbrot shape
        /// </returns>
        public static unsafe int[] VectorMandel(int resolutionY, int maxIterations)
        {
            const int VECTOR_SIZE = 4;

            // ensure that resolutionX is divisible by SIMD-width
            // ensure that resolutionY / 2.0 * 3.5 is integer
            // this simplifies calculations
            if (resolutionY % (4 * VECTOR_SIZE) != 0)
                throw new ArgumentException($"resolutionY must be divisible by {VECTOR_SIZE * 4}");            

            int resolutionX = resolutionY / 4 * 7;

            // distance between neighbouring points on the X-Y plane
            float step = 2f / resolutionY;

            // stepper vectors to easily step in Y and X directions
            var yStepVec = Vector128.Create(-step);
            var xStepVec = Vector128.Create(VECTOR_SIZE*step);

            // always contains the current Y-axis position. We begin at the top most horizontal line 
            // will be updated inside loop. All SIMD-components of currentYvec are always the same 
            var currentYvec = Vector128.Create(1f);

            var result = new int[resolutionX * resolutionY];

            fixed (int* resultFixedPtr = result)
            {
                int* resultPtr = resultFixedPtr;

                // outer loop: iterate along Y axis from top to bottom, one step at a time 
                for (int i = 0; i < resolutionY; i++)
                {
                    // initialize the starting X vector to the leftmost X positions
                    var currentXvec = Vector128.Create(-2.5f, -2.5f + step, -2.5f + 2 * step, -2.5f + 3 * step);

                    // iterate along the X axis with "VECTOR_SIZE" number of elements at a time
                    // the value of "currentXvec" is updated at the end of the loop to always represent the working elements
                    for (int j = 0; j <= resolutionX - VECTOR_SIZE; j += VECTOR_SIZE)
                    {
                        int iteration = 0;
                        var iterationCounter = Vector128.Create(0);
                        Vector128<float> xVec, yVec;
                        var xVecSquare = Vector128.Create(0f);
                        var yVecSquare = Vector128.Create(0f);
                        var sumVecSquare = Vector128.Create(0f);
                        var fourVec = Vector128.Create(4f);
                        var oneVec = Vector128.Create(1);

                        while (iteration < maxIterations)
                        {
                            var test = Sse42.CompareLessThanOrEqual(Sse42.Add(xVecSquare, yVecSquare), fourVec);

                            // no components inside tolerance, no work left to do
                            if (Sse42.MoveMask(test) == 0)
                                break;

                            // mask the incrementer vector to increment only the positions still inside Mandelbrot-tolerance
                            var selectiveAdd = Sse42.And(oneVec, test.AsInt32());
                            iterationCounter = Sse42.Add(iterationCounter, selectiveAdd);
                            iteration++;

                            // set up values for next iteration
                            xVec = Sse42.Add(Sse42.Subtract(xVecSquare, yVecSquare), currentXvec);
                            yVec = Sse42.Add(Sse42.Subtract(sumVecSquare, Sse42.Add(xVecSquare, yVecSquare)), currentYvec);

                            xVecSquare = Sse42.Multiply(xVec, xVec);
                            yVecSquare = Sse42.Multiply(yVec, yVec);

                            sumVecSquare = Sse.Multiply(Sse42.Add(xVec, yVec), Sse42.Add(xVec, yVec));
                        }
                        // at this point the iterations counts for a whole vector has been calculated
                        // resulting values are stored
                        Sse42.Store(resultPtr, iterationCounter);                        
                        resultPtr += VECTOR_SIZE;

                        // move along the X axis by four step-s 
                        currentXvec = Sse42.Add(currentXvec, xStepVec);
                    }
                    // move along the X axis by one step 
                    currentYvec = Sse42.Add(currentYvec, yStepVec);
                }
            }

            return result;
        }
    }


    class Program
    {
        static void Main(string[] args)
        {
         
            int yLen = 1600;
            int maxIteration = 10000;
            var result1 = Mandelbrot.VectorMandel(yLen, maxIteration);

            var xLen = result1.Length / yLen;
            using (var image = new Image<Rgba32>(xLen, yLen))
            {
                for (int i = 0; i < result1.Length; i++)
                {
                    image[i % xLen, i / xLen] =
                        result1[i] == maxIteration ? Rgba32.Black : Rgba32.White;
                }                
                image.Save("mandel1.bmp");                
            }
        }
    }
}

Metadata

Metadata

Assignees

Labels

No labels
No labels

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions