Skip to content

Create Benchmark to compare FKL with BVF and FKL without BVF #178

@morousg

Description

@morousg

We want to check the performance improvement achieved when using Backwards Vertical Fusion vs when not using it.

For that purpose, we want to execute 100 times the following two sequences:

Stream stream;

// We set all outputs to the same size
const Size outputSize(60, 60);

// We perform 5 crops on the image
constexpr int BATCH_10 = 10;
constexpr int BATCH = 100;

// We have a 4K source image
Image<PixelFormat::NV12> inputImage(3840, 2160);

// Intermediate RGB image after YUV to RGB conversion
Ptr2D<float3> rgbImg(3840, 2160);

// We want a Tensor of contiguous memory for all images
Tensor<float3> output(outputSize.width, outputSize.height, BATCH);

// Crops can be of different sizes
constexpr std::array<Rect, BATCH_10> crops_10{ Rect(0, 0, 34, 25),      Rect(40, 40, 70, 15),     Rect(100, 200, 60, 59),
                                     Rect(300, 1000, 20, 23), Rect(3000, 2000, 12, 11), Rect(0, 0, 34, 25),
                                     Rect(40, 40, 70, 15),    Rect(100, 200, 60, 59),   Rect(300, 1000, 20, 23),
                                     Rect(3000, 2000, 12, 11) };
std::array<Rect, BATCH> crops{};
std::array<Ptr2D<float3>, BATCH> cropedPtrs;

for (int i = 0; i < BATCH_10; ++i) {
    int j{ 0 };
    for (auto&& elem : crops_10) {
        crops[i + j] = elem;
        j++;
    }
}

// initImageValues(inputImage);
const float3 backgroundColor{ 0.f, 0.f, 0.f };
const float3 mulValue = make_set<float3>(1.4f);
const float3 subValue = make_set<float3>(0.5f);
const float3 divValue = make_set<float3>(255.f);

// Create the operation instances once, and use them multiple times
const auto readIOp = ReadYUV<PixelFormat::NV12>::build(inputImage);
const auto yuvToRGB = ConvertYUVToRGB<PixelFormat::NV12,
    ColorRange::Full,
    ColorPrimitives::bt2020,
    false, float3>::build();
const auto cropIOp = Crop<>::build(crops);
const auto resizeIOp =
    Resize<InterpolationType::INTER_LINEAR, AspectRatio::PRESERVE_AR>::build(outputSize, backgroundColor);
const auto mulIOp = Mul<float3>::build(mulValue);
const auto subIOp = Sub<float3>::build(subValue);
const auto divIOp = Div<float3>::build(divValue);
const auto tensorWriteIOp = TensorWrite<float3>::build(output);

// Sequence 1, without BVF
executeOperations<TransformDPP<>>(
    stream, readIOp, yuvToRGB, WriteOp::build(rgbImg));

for (int i = 0; i < BATCH; ++i) {
    cropedPtrs[i] = rgbImg.crop2D(Point(crops[i].x, crops[i].y), PtrDims<ND::_2D>(crops[i].width, crops[i].height));
}

executeOperations<TransformDPP<>>(
    stream, ReadOp::build(cropedPtrs), resizeIOp, mulIOp,
                                  subIOp, divIOp, tensorWriteIOp);

// Sequence 2, with BVF
executeOperations<TransformDPP<>>(stream, readIOp, yuvToRGB, cropIOp,
                                  resizeIOp, mulIOp, subIOp, divIOp, tensorWriteIOp);

stream.sync();

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions