Skip to content

Commit d612ede

Browse files
RichardGemeistdan
andauthored
Merge pull request #50 from GPUOpen-LibrariesAndSDKs/next-release-11
Update to 3.0.3.a1525e7 Co-authored-by: Daniel Meister <[email protected]>
2 parents 4baa98e + c53c4c4 commit d612ede

File tree

18 files changed

+84
-89
lines changed

18 files changed

+84
-89
lines changed

contrib/Orochi/contrib/bin/win64/amd_comgr0602.dll

Lines changed: 0 additions & 3 deletions
This file was deleted.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:face8405d08135bcf307687b5aedca3061396220f43a0c4bffe134662ff5851d
3+
size 121159576

contrib/Orochi/contrib/bin/win64/hiprtc-builtins0602.dll

Lines changed: 0 additions & 3 deletions
This file was deleted.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:c792d7940ad54166f116400f14d393c4e564eadf39428a0b5c56d0b4746accc0
3+
size 1113496

contrib/Orochi/contrib/bin/win64/hiprtc0602.dll

Lines changed: 0 additions & 3 deletions
This file was deleted.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:c0e5913442de957f3faa1133ceedd1a8000831cbc32347bf63f03915b58a6446
3+
size 1922968

contrib/Orochi/contrib/hipew/src/hipew.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,13 @@ void hipewInit( int* resultDriver, int* resultRtc, uint32_t flags, const char**
625625
"amdhip64.dll", // <- hip '5.x' DLL.
626626
NULL };
627627
const char* hiprtc_paths[] = {
628+
"hiprtc0707.dll",
629+
"hiprtc0706.dll",
630+
"hiprtc0705.dll",
631+
"hiprtc0704.dll",
632+
"hiprtc0703.dll",
633+
"hiprtc0702.dll",
634+
"hiprtc0701.dll",
628635
"hiprtc0700.dll",
629636
"hiprtc0605.dll",
630637
"hiprtc0604.dll",
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
rd /s /q cache
2-
..\dist\bin\Release\Unittest64.exe --gtest_filter=-*link_bundledBc* --gtest_output=xml:../result.xml
2+
..\dist\bin\Release\Unittest64.exe --gtest_filter=-*link_bundledBc*:*VulkanComputeSimple64* --gtest_output=xml:../result.xml

hiprt/hiprt_libpath.h

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -37,20 +37,10 @@ const char* g_hip_paths[] = {
3737
"amdhip64.dll", // <- hip '5.x' DLL.
3838
NULL };
3939

40-
const char* g_hiprtc_paths[] = {
41-
"hiprtc0700.dll",
42-
"hiprtc0605.dll",
43-
"hiprtc0604.dll",
44-
"hiprtc0603.dll",
45-
"hiprtc0602.dll",
46-
"hiprtc0601.dll",
47-
"hiprtc0600.dll",
48-
"hiprtc0507.dll",
49-
"hiprtc0506.dll",
50-
"hiprtc0505.dll",
51-
"hiprtc0504.dll",
52-
"hiprtc0503.dll",
53-
NULL };
40+
const char* g_hiprtc_paths[] = { "hiprtc0707.dll", "hiprtc0706.dll", "hiprtc0705.dll", "hiprtc0704.dll", "hiprtc0703.dll",
41+
"hiprtc0702.dll", "hiprtc0701.dll", "hiprtc0700.dll", "hiprtc0605.dll", "hiprtc0604.dll",
42+
"hiprtc0603.dll", "hiprtc0602.dll", "hiprtc0601.dll", "hiprtc0600.dll", "hiprtc0507.dll",
43+
"hiprtc0506.dll", "hiprtc0505.dll", "hiprtc0504.dll", "hiprtc0503.dll", NULL };
5444
#elif defined( __APPLE__ )
5545

5646
const char** g_hip_paths = nullptr;

hiprt/impl/BvhBuilderKernels.h

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,19 @@ HIPRT_DEVICE HIPRT_INLINE Obb getNodeObb(
114114
if ( nodeType != BoxType )
115115
{
116116
if constexpr ( is_same<PrimitiveNode, TrianglePacketNode>::value )
117-
return primNodes[nodeAddr].obb( typeToTriPairIndex( nodeType ), matrixIndex, nodeBox );
117+
{
118+
Obb obb = primNodes[nodeAddr].obb( typeToTriPairIndex( nodeType ), matrixIndex, nodeBox );
119+
if ( !obb.aabb().valid() ) obb = Obb( matrixIndex ).grow( nodeBox );
120+
return obb;
121+
}
118122
else if constexpr ( is_same<PrimitiveNode, InstanceNode>::value )
123+
{
119124
return primitives.fetchObb( primNodes[nodeAddr].m_primIndex, matrixIndex, nodeBox );
125+
}
120126
else
127+
{
121128
return Obb( matrixIndex ).grow( nodeBox );
129+
}
122130
}
123131
else
124132
{
@@ -1467,6 +1475,7 @@ __device__ void PackLeaves(
14671475
}
14681476
}
14691477

1478+
// assuming that triangle packets are AMD specific, there are no warp syncs
14701479
template <>
14711480
__device__ void PackLeaves<TriangleMesh, TrianglePacketNode, GeomHeader>(
14721481
uint32_t index,
@@ -1754,6 +1763,7 @@ __device__ void PackLeaves<TriangleMesh, TrianglePacketNode, GeomHeader>(
17541763
}
17551764
}
17561765

1766+
// assuming that triangle packets are AMD specific, there are no warp syncs
17571767
__device__ void PackLeavesWarp(
17581768
uint32_t index,
17591769
uint32_t taskCount,
@@ -1813,15 +1823,13 @@ __device__ void PackLeavesWarp(
18131823
}
18141824

18151825
// find new vertices
1816-
const uint32_t sublaneVertIndex = laneIndex % LanesPerLeafPacketTask;
1817-
const uint32_t subwarpVertIndex = laneIndex / LanesPerLeafPacketTask;
1818-
const bool valid = sublaneVertIndex < 3 || pairIndices.x != pairIndices.y;
1826+
const bool valid = sublaneIndex < 3 || pairIndices.x != pairIndices.y;
18191827

18201828
bool contains = false;
18211829
uint32_t vertexIndexInPacket{};
18221830
for ( uint32_t k = 0; k < packet.m_vertCount; ++k )
18231831
{
1824-
if ( triPacketCache[subwarpIndex].m_vertexIndices[k] == ( &indices.x )[sublaneVertIndex] )
1832+
if ( triPacketCache[subwarpIndex].m_vertexIndices[k] == ( &indices.x )[sublaneIndex] )
18251833
{
18261834
vertexIndexInPacket = k;
18271835
contains = true;
@@ -1830,7 +1838,7 @@ __device__ void PackLeavesWarp(
18301838
}
18311839

18321840
const uint32_t newVertMask =
1833-
( hiprt::ballot( !contains && valid ) >> ( LanesPerLeafPacketTask * subwarpVertIndex ) ) & 0xf;
1841+
( hiprt::ballot( !contains && valid ) >> ( LanesPerLeafPacketTask * subwarpIndex ) ) & 0xf;
18341842
const uint32_t oldVertCount = packet.m_vertCount;
18351843
const uint32_t newVertCount = __popc( newVertMask );
18361844

@@ -1840,21 +1848,21 @@ __device__ void PackLeavesWarp(
18401848
// store new vertices
18411849
if ( !contains )
18421850
{
1843-
const uint32_t vertexMask = ( 1 << sublaneVertIndex ) - 1;
1851+
const uint32_t vertexMask = ( 1 << sublaneIndex ) - 1;
18441852
vertexIndexInPacket = oldVertCount + __popc( newVertMask & vertexMask );
1845-
triPacketCache[subwarpIndex].m_vertexIndices[vertexIndexInPacket] = ( &indices.x )[sublaneVertIndex];
1853+
triPacketCache[subwarpIndex].m_vertexIndices[vertexIndexInPacket] = ( &indices.x )[sublaneIndex];
18461854
}
18471855

18481856
rangeOffset++;
1857+
1858+
// not sure why but this fence is needed on linux
1859+
__threadfence_block();
18491860
}
1850-
sync_warp();
18511861
__threadfence_block();
18521862

18531863
// count packets
18541864
if ( taskIndex < taskCount && packet.m_triPairCount > 0 ) primNodeCount++;
1855-
sync_warp();
18561865
}
1857-
sync_warp();
18581866

18591867
const uint32_t primNodeBase =
18601868
warpOffset( sublaneIndex == LanesPerLeafPacketTask - 1 ? primNodeCount : 0u, &header->m_primNodeCount );
@@ -1865,11 +1873,9 @@ __device__ void PackLeavesWarp(
18651873
uint32_t leafIndex = 0;
18661874

18671875
rangeOffset = rangeBase;
1868-
sync_warp();
18691876

18701877
while ( hiprt::ballot( taskIndex < taskCount && rangeOffset < rangeBase + rangeSize ) )
18711878
{
1872-
sync_warp();
18731879
TrianglePacketData packet{};
18741880

18751881
while ( rangeOffset < rangeBase + rangeSize && packet.m_triPairCount < MaxTrianglePairsPerTrianglePacket )
@@ -1900,23 +1906,22 @@ __device__ void PackLeavesWarp(
19001906
}
19011907

19021908
// find new vertices
1903-
const uint32_t sublaneVertIndex = laneIndex % LanesPerLeafPacketTask;
1904-
const uint32_t subwarpVertIndex = laneIndex / LanesPerLeafPacketTask;
1905-
const bool valid = sublaneVertIndex < 3 || pairIndices.x != pairIndices.y;
1909+
const bool valid = sublaneIndex < 3 || pairIndices.x != pairIndices.y;
19061910

19071911
bool contains = false;
19081912
uint32_t vertexIndexInPacket{};
19091913
for ( uint32_t k = 0; k < packet.m_vertCount; ++k )
19101914
{
1911-
if ( triPacketCache[subwarpIndex].m_vertexIndices[k] == ( &indices.x )[sublaneVertIndex] )
1915+
if ( triPacketCache[subwarpIndex].m_vertexIndices[k] == ( &indices.x )[sublaneIndex] )
19121916
{
19131917
vertexIndexInPacket = k;
19141918
contains = true;
19151919
break;
19161920
}
19171921
}
19181922

1919-
const uint32_t newVertMask = ( hiprt::ballot( !contains && valid ) >> ( 4 * subwarpVertIndex ) ) & 0xf;
1923+
const uint32_t newVertMask =
1924+
( hiprt::ballot( !contains && valid ) >> ( LanesPerLeafPacketTask * subwarpIndex ) ) & 0xf;
19201925
const uint32_t oldVertCount = packet.m_vertCount;
19211926
const uint32_t newVertCount = __popc( newVertMask );
19221927

@@ -1937,17 +1942,17 @@ __device__ void PackLeavesWarp(
19371942
// store new vertices
19381943
if ( !contains )
19391944
{
1940-
const uint32_t vertexMask = ( 1 << sublaneVertIndex ) - 1;
1945+
const uint32_t vertexMask = ( 1 << sublaneIndex ) - 1;
19411946
vertexIndexInPacket = oldVertCount + __popc( newVertMask & vertexMask );
1942-
triPacketCache[subwarpIndex].m_vertexIndices[vertexIndexInPacket] = ( &indices.x )[sublaneVertIndex];
1947+
triPacketCache[subwarpIndex].m_vertexIndices[vertexIndexInPacket] = ( &indices.x )[sublaneIndex];
19431948
}
19441949

19451950
// shuffle vertex indices in packet
19461951
uint4 vertexIndicesInPacket;
1947-
vertexIndicesInPacket.x = shfl( vertexIndexInPacket, subwarpVertIndex * LanesPerLeafPacketTask + 0 );
1948-
vertexIndicesInPacket.y = shfl( vertexIndexInPacket, subwarpVertIndex * LanesPerLeafPacketTask + 1 );
1949-
vertexIndicesInPacket.z = shfl( vertexIndexInPacket, subwarpVertIndex * LanesPerLeafPacketTask + 2 );
1950-
vertexIndicesInPacket.w = shfl( vertexIndexInPacket, subwarpVertIndex * LanesPerLeafPacketTask + 3 );
1952+
vertexIndicesInPacket.x = shfl( vertexIndexInPacket, subwarpIndex * LanesPerLeafPacketTask + 0 );
1953+
vertexIndicesInPacket.y = shfl( vertexIndexInPacket, subwarpIndex * LanesPerLeafPacketTask + 1 );
1954+
vertexIndicesInPacket.z = shfl( vertexIndexInPacket, subwarpIndex * LanesPerLeafPacketTask + 2 );
1955+
vertexIndicesInPacket.w = shfl( vertexIndexInPacket, subwarpIndex * LanesPerLeafPacketTask + 3 );
19511956

19521957
uint3 triIndices0 = make_uint3( vertexIndicesInPacket );
19531958
uint3 triIndices1{};
@@ -1968,7 +1973,6 @@ __device__ void PackLeavesWarp(
19681973
// otherwise triIndices1 are not correcly written to the final packet
19691974
__threadfence_block();
19701975
}
1971-
sync_warp();
19721976
__threadfence_block();
19731977

19741978
// build packets
@@ -2000,7 +2004,6 @@ __device__ void PackLeavesWarp(
20002004
if ( halfLaneIndex < broadcastPacket.m_vertCount )
20012005
halfLaneVertexIndex = triPacketCache[broadcastSubwarpIndex].m_vertexIndices[halfLaneIndex];
20022006
}
2003-
sync_warp();
20042007

20052008
// reuse shared memory
20062009
TrianglePacketNode& triPacketNode =
@@ -2010,7 +2013,6 @@ __device__ void PackLeavesWarp(
20102013
triPacketNode.m_data[halfLaneIndex + 0 * 16] = 0;
20112014
triPacketNode.m_data[halfLaneIndex + 1 * 16] = 0;
20122015
}
2013-
sync_warp();
20142016

20152017
// build two packets at once
20162018
if ( halfWarpIndex == 0 || secondValid )
@@ -2041,7 +2043,6 @@ __device__ void PackLeavesWarp(
20412043
triPacketNode.writeVertex<true>( halfLaneIndex, vertex );
20422044
}
20432045
}
2044-
sync_warp();
20452046

20462047
// write packet
20472048
if ( ( halfWarpIndex == 0 || secondValid ) )
@@ -2051,13 +2052,10 @@ __device__ void PackLeavesWarp(
20512052
primNodes[broadcastPrimNodeOffset].m_data[halfLaneIndex + 1 * 16] =
20522053
triPacketNode.m_data[halfLaneIndex + 1 * 16];
20532054
}
2054-
sync_warp();
20552055
}
2056-
sync_warp();
20572056

20582057
if ( taskIndex < taskCount && packet.m_triPairCount > 0 ) primNodeOffset++;
20592058
}
2060-
sync_warp();
20612059

20622060
// patch children
20632061
if ( taskIndex < taskCount )

0 commit comments

Comments
 (0)