@@ -95,6 +95,8 @@ RWByteAddressBuffer AmdExtD3DShaderIntrinsicsUAV : register(u0, AmdExtD3DShaderI
9595#define AmdExtD3DShaderIntrinsicsOpcode_GetWaveSize 0x19
9696#define AmdExtD3DShaderIntrinsicsOpcode_BaseInstance 0x1a
9797#define AmdExtD3DShaderIntrinsicsOpcode_BaseVertex 0x1b
98+ #define AmdExtD3DShaderIntrinsicsOpcode_FloatConversion 0x1c
99+ #define AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt 0x1d
98100
99101/**
100102***********************************************************************************************************************
@@ -253,6 +255,17 @@ RWByteAddressBuffer AmdExtD3DShaderIntrinsicsUAV : register(u0, AmdExtD3DShaderI
253255#define AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64 0x07
254256#define AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64 0x08
255257
258+ /**
259+ ***********************************************************************************************************************
260+ * AmdExtD3DShaderIntrinsicsFloatConversion defines for supported rounding modes from float to float16 conversions.
261+ * To be used as an input AmdExtD3DShaderIntrinsicsOpcode_FloatConversion instruction
262+ ***********************************************************************************************************************
263+ */
264+ #define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16Near 0x01
265+ #define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16NegInf 0x02
266+ #define AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16PlusInf 0x03
267+
268+
256269/**
257270***********************************************************************************************************************
258271* MakeAmdShaderIntrinsicsInstruction
@@ -1315,6 +1328,133 @@ uint AmdExtD3DShaderIntrinsics_GetBaseVertex()
13151328 return retVal;
13161329}
13171330
1331+
1332+
1333+ /**
1334+ ***********************************************************************************************************************
1335+ * AmdExtD3DShaderIntrinsics_ReadlaneAt : uint
1336+ *
1337+ * The following function is available if CheckSupport(AmdExtD3DShaderIntrinsicsSupport_ReadlaneAt) returned S_OK.
1338+ *
1339+ * Returns the value of the source for the given lane index within the specified wave. The lane index
1340+ * can be non-uniform across the wave.
1341+ *
1342+ ***********************************************************************************************************************
1343+ */
1344+ uint AmdExtD3DShaderIntrinsics_ReadlaneAt (uint src, uint laneId)
1345+ {
1346+ uint retVal;
1347+
1348+ uint instruction;
1349+ instruction = MakeAmdShaderIntrinsicsInstruction (AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt,
1350+ AmdExtD3DShaderIntrinsicsOpcodePhase_0,
1351+ 0 );
1352+ AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange (instruction, src, laneId, retVal);
1353+
1354+ return retVal;
1355+ }
1356+
1357+ /**
1358+ ***********************************************************************************************************************
1359+ * AmdExtD3DShaderIntrinsics_ReadlaneAt : int
1360+ ***********************************************************************************************************************
1361+ */
1362+ int AmdExtD3DShaderIntrinsics_ReadlaneAt (int src, uint laneId)
1363+ {
1364+ uint retVal;
1365+
1366+ uint instruction;
1367+ instruction = MakeAmdShaderIntrinsicsInstruction (AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt,
1368+ AmdExtD3DShaderIntrinsicsOpcodePhase_0,
1369+ 0 );
1370+ AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange (instruction, asuint (src), laneId, retVal);
1371+
1372+ return asint (retVal);
1373+ }
1374+
1375+ /**
1376+ ***********************************************************************************************************************
1377+ * AmdExtD3DShaderIntrinsics_ReadlaneAt : float
1378+ ***********************************************************************************************************************
1379+ */
1380+ float AmdExtD3DShaderIntrinsics_ReadlaneAt (float src, uint laneId)
1381+ {
1382+ uint retVal;
1383+
1384+ uint instruction;
1385+ instruction = MakeAmdShaderIntrinsicsInstruction (AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt,
1386+ AmdExtD3DShaderIntrinsicsOpcodePhase_0,
1387+ 0 );
1388+ AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange (instruction, asuint (src), laneId, retVal);
1389+
1390+ return asfloat (retVal);
1391+ }
1392+
1393+ /**
1394+ ***********************************************************************************************************************
1395+ * AmdExtD3DShaderIntrinsics_ConvertF32toF16
1396+ *
1397+ * The following functions are available if CheckSupport(AmdExtD3DShaderIntrinsicsSupport_FloatConversion) returned
1398+ * S_OK.
1399+ *
1400+ * Converts 32bit floating point numbers into 16bit floating point number using a specified rounding mode
1401+ *
1402+ * Available in all shader stages.
1403+ *
1404+ ***********************************************************************************************************************
1405+ */
1406+
1407+ /**
1408+ ***********************************************************************************************************************
1409+ * AmdExtD3DShaderIntrinsics_ConvertF32toF16 - helper to convert f32 to f16 number
1410+ ***********************************************************************************************************************
1411+ */
1412+ uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16 (in uint convOp, in float3 val)
1413+ {
1414+ uint instruction = MakeAmdShaderIntrinsicsInstruction (AmdExtD3DShaderIntrinsicsOpcode_FloatConversion,
1415+ AmdExtD3DShaderIntrinsicsOpcodePhase_0,
1416+ convOp);
1417+
1418+ uint3 retVal;
1419+ AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange (instruction, asuint (val.x), 0 , retVal.x);
1420+ AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange (instruction, asuint (val.y), 0 , retVal.y);
1421+ AmdExtD3DShaderIntrinsicsUAV.InterlockedCompareExchange (instruction, asuint (val.z), 0 , retVal.z);
1422+
1423+ return retVal;
1424+ }
1425+
1426+ /**
1427+ ***********************************************************************************************************************
1428+ * AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using nearest rounding mode
1429+ ***********************************************************************************************************************
1430+ */
1431+ uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16Near (in float3 inVec)
1432+ {
1433+ return AmdExtD3DShaderIntrinsics_ConvertF32toF16 (AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16Near, inVec);
1434+ }
1435+
1436+ /**
1437+ ***********************************************************************************************************************
1438+ * AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using -inf rounding mode
1439+ ***********************************************************************************************************************
1440+ */
1441+ uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16NegInf (in float3 inVec)
1442+ {
1443+ return AmdExtD3DShaderIntrinsics_ConvertF32toF16 (AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16NegInf, inVec);
1444+ }
1445+
1446+ /**
1447+ ***********************************************************************************************************************
1448+ * AmdExtD3DShaderIntrinsics_ConvertF32toF16Near - convert f32 to f16 number using +inf rounding mode
1449+ ***********************************************************************************************************************
1450+ */
1451+ uint3 AmdExtD3DShaderIntrinsics_ConvertF32toF16PosInf (in float3 inVec)
1452+ {
1453+ return AmdExtD3DShaderIntrinsics_ConvertF32toF16 (AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16PlusInf, inVec);
1454+ }
1455+
1456+
1457+
13181458/**
13191459***********************************************************************************************************************
13201460* AmdExtD3DShaderIntrinsics_MakeAtomicInstructions
@@ -3756,4 +3896,98 @@ uint4 AmdExtD3DShaderIntrinsics_WavePostfixMax(uint4 src)
37563896}
37573897
37583898
3899+ #if defined (AGS_RAY_HIT_TOKEN)
3900+
3901+ //=====================================================================================================================
3902+ struct AmdExtRtHitToken
3903+ {
3904+ uint dword [2 ];
3905+ };
3906+
3907+ /**
3908+ ***********************************************************************************************************************
3909+ * @brief
3910+ * AmdExtD3DShaderIntrinsicsRT structure when included in a Ray Tracing payload will indicate to the driver
3911+ * that the dwords are already supplied in AmdExtRtHitTokenIn and only requires a call to intersect
3912+ * ray, bypassing the traversal of the acceleration structure.
3913+ ***********************************************************************************************************************
3914+ */
3915+ struct AmdExtRtHitTokenIn : AmdExtRtHitToken { };
3916+
3917+ /**
3918+ ***********************************************************************************************************************
3919+ * @brief
3920+ * AmdExtD3DShaderIntrinsicsRT structure when included in a Ray Tracing payload will indicate to the driver
3921+ * that the dwords must be patched into the payload after traversal. The application can store this
3922+ * data in a buffer which can then be used for hit group sorting so shading divergence can be avoided.
3923+ ***********************************************************************************************************************
3924+ */
3925+ struct AmdExtRtHitTokenOut : AmdExtRtHitToken { };
3926+
3927+ /**
3928+ ***********************************************************************************************************************
3929+ * @brief
3930+ * Group shared memory reserved for temprary storage of hit tokens. Not intended to touched by the app shader.
3931+ * Application shader must only use the extension functions defined below to access the hit tokens
3932+ *
3933+ ***********************************************************************************************************************
3934+ */
3935+ groupshared AmdExtRtHitToken AmdHitToken;
3936+
3937+ /**
3938+ ***********************************************************************************************************************
3939+ * @brief
3940+ * Accessor function to obtain the hit tokens from the last call to TraceRays(). The data returned by this
3941+ * function only guarantees valid values for the last call to TraceRays() prior to calling this function.
3942+ *
3943+ ***********************************************************************************************************************
3944+ */
3945+ uint2 AmdGetLastHitToken ()
3946+ {
3947+ return uint2 (AmdHitToken.dword [0 ], AmdHitToken.dword [1 ]);
3948+ }
3949+
3950+ /**
3951+ ***********************************************************************************************************************
3952+ * @brief
3953+ * This function initialises hit tokens for subsequent TraceRays() call. Note, any TraceRay() that intends to use
3954+ * these hit tokens must include this function call in the same basic block. Applications can use a convenience macro
3955+ * defined below to enforce that.
3956+ *
3957+ ***********************************************************************************************************************
3958+ */
3959+ void AmdSetHitToken (uint2 token)
3960+ {
3961+ AmdHitToken.dword [0 ] = token.x;
3962+ AmdHitToken.dword [1 ] = token.y;
3963+ }
3964+
3965+ /**
3966+ ***********************************************************************************************************************
3967+ * @brief
3968+ * Convenience macro for calling TraceRays that uses the hit token
3969+ *
3970+ ***********************************************************************************************************************
3971+ */
3972+ #define AmdTraceRay (accelStruct, \
3973+ rayFlags, \
3974+ instanceInclusionMask, \
3975+ rayContributionToHitGroupIndex, \
3976+ geometryMultiplier, \
3977+ missShaderIndex, \
3978+ ray, \
3979+ payload, \
3980+ token) \
3981+ AmdSetHitToken (token); \
3982+ TraceRay (accelStruct, \
3983+ rayFlags, \
3984+ instanceInclusionMask, \
3985+ rayContributionToHitGroupIndex, \
3986+ geometryMultiplier, \
3987+ missShaderIndex, \
3988+ ray, \
3989+ payload); \
3990+
3991+ #endif // AGS_RAY_HIT_TOKEN
3992+
37593993#endif // _AMDEXTD3DSHADERINTRINICS_HLSL
0 commit comments