Skip to content

bitonic sort sample #209

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions 13_BitonicSort/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
include(common RESULT_VARIABLE RES)
if(NOT RES)
message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory")
endif()

nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}")

if(NBL_EMBED_BUILTIN_RESOURCES)
set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData)
set(RESOURCE_DIR "app_resources")

get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE)
get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE)
get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE)

file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*")
foreach(RES_FILE ${BUILTIN_RESOURCE_FILES})
LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}")
endforeach()

ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}")

LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_})
endif()
112 changes: 112 additions & 0 deletions 13_BitonicSort/app_resources/bitonic_sort_shader.comp.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#include "nbl/builtin/hlsl/bda/bda_accessor.hlsl"

struct BitonicPushData
{
uint64_t inputKeyAddress;
uint64_t inputValueAddress;
uint64_t outputKeyAddress;
uint64_t outputValueAddress;
uint32_t dataElementCount;
};

using namespace nbl::hlsl;

[[vk::push_constant]] BitonicPushData pushData;

using DataPtr = bda::__ptr<uint32_t>;
using DataAccessor = BdaAccessor<uint32_t>;

groupshared uint32_t sharedKeys[ElementCount];
groupshared uint32_t sharedValues[ElementCount];

[numthreads(WorkgroupSize, 1, 1)]
[shader("compute")]
void main(uint32_t3 dispatchId : SV_DispatchThreadID, uint32_t3 localId : SV_GroupThreadID)
{
const uint32_t threadId = localId.x;
const uint32_t dataSize = pushData.dataElementCount;

DataAccessor inputKeys = DataAccessor::create(DataPtr::create(pushData.inputKeyAddress));
DataAccessor inputValues = DataAccessor::create(DataPtr::create(pushData.inputValueAddress));

for (uint32_t i = threadId; i < dataSize; i += WorkgroupSize)
{
inputKeys.get(i, sharedKeys[i]);
inputValues.get(i, sharedValues[i]);
}

// Synchronize all threads after loading
GroupMemoryBarrierWithGroupSync();


for (uint32_t stage = 0; stage < Log2ElementCount; stage++)
{
for (uint32_t pass = 0; pass <= stage; pass++)
{
const uint32_t compareDistance = 1 << (stage - pass);

for (uint32_t i = threadId; i < dataSize; i += WorkgroupSize)
{
const uint32_t partnerId = i ^ compareDistance;

if (partnerId >= dataSize)
continue;

const uint32_t waveSize = WaveGetLaneCount();
const uint32_t myWaveId = i / waveSize;
const uint32_t partnerWaveId = partnerId / waveSize;
const bool sameWave = (myWaveId == partnerWaveId);

uint32_t myKey, myValue, partnerKey, partnerValue;
[branch]
if (sameWave && compareDistance < waveSize)
{
// WAVE INTRINSIC
myKey = sharedKeys[i];
myValue = sharedValues[i];

const uint32_t partnerLane = partnerId % waveSize;
partnerKey = WaveReadLaneAt(myKey, partnerLane);
partnerValue = WaveReadLaneAt(myValue, partnerLane);
}
else
{
// SHARED MEM
myKey = sharedKeys[i];
myValue = sharedValues[i];
partnerKey = sharedKeys[partnerId];
partnerValue = sharedValues[partnerId];
}

const uint32_t sequenceSize = 1 << (stage + 1);
const uint32_t sequenceIndex = i / sequenceSize;
const bool sequenceAscending = (sequenceIndex % 2) == 0;
const bool ascending = true;
const bool finalDirection = sequenceAscending == ascending;

const bool swap = (myKey > partnerKey) == finalDirection;

// WORKGROUP COORDINATION: Only lower-indexed element writes both
if (i < partnerId && swap)
{
sharedKeys[i] = partnerKey;
sharedKeys[partnerId] = myKey;
sharedValues[i] = partnerValue;
sharedValues[partnerId] = myValue;
}
}

GroupMemoryBarrierWithGroupSync();
}
}


DataAccessor outputKeys = DataAccessor::create(DataPtr::create(pushData.outputKeyAddress));
DataAccessor outputValues = DataAccessor::create(DataPtr::create(pushData.outputValueAddress));

for (uint32_t i = threadId; i < dataSize; i += WorkgroupSize)
{
outputKeys.set(i, sharedKeys[i]);
outputValues.set(i, sharedValues[i]);
}
}
17 changes: 17 additions & 0 deletions 13_BitonicSort/app_resources/common.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O.
// This file is part of the "Nabla Engine".
// For conditions of distribution and use, see copyright notice in nabla.h
#ifndef _BITONIC_SORT_COMMON_INCLUDED_
#define _BITONIC_SORT_COMMON_INCLUDED_

struct BitonicPushData
{

uint64_t inputKeyAddress;
uint64_t inputValueAddress;
uint64_t outputKeyAddress;
uint64_t outputValueAddress;
uint32_t dataElementCount;
};

#endif
28 changes: 28 additions & 0 deletions 13_BitonicSort/config.json.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"enableParallelBuild": true,
"threadsPerBuildProcess" : 2,
"isExecuted": false,
"scriptPath": "",
"cmake": {
"configurations": [ "Release", "Debug", "RelWithDebInfo" ],
"buildModes": [],
"requiredOptions": []
},
"profiles": [
{
"backend": "vulkan", // should be none
"platform": "windows",
"buildModes": [],
"runConfiguration": "Release", // we also need to run in Debug nad RWDI because foundational example
"gpuArchitectures": []
}
],
"dependencies": [],
"data": [
{
"dependencies": [],
"command": [""],
"outputs": []
}
]
}
Loading