Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
2652be6
NFC: Infrastructure changes for DXIL op vector and multi-dim-overloads
Feb 13, 2025
9002f71
Allow lowering of vector load stores
Mar 24, 2025
9ec8bcc
Enable select native vector intrinsics
Dec 3, 2024
51635fd
Use internal validator for execution test that requires SM 6.8 (#7309)
bob80905 Apr 4, 2025
73c4208
Add staging branches to azure pipeline testing (#7285)
Apr 7, 2025
aca1035
Merge branch 'main' into staging-sm6.9
damyanp Apr 9, 2025
f69f281
chore: autopublish 2025-04-09T15:48:17Z
github-actions[bot] Apr 9, 2025
d478b86
Merge branch 'main' to staging-sm6.9 (#7328)
damyanp Apr 10, 2025
32c8357
Merge branch 'main' into staging-sm6.9
damyanp Apr 10, 2025
3e18075
chore: autopublish 2025-04-10T00:53:54Z
github-actions[bot] Apr 10, 2025
4ce225f
Merge main to staging-sm6.9 (#7330)
damyanp Apr 10, 2025
519b6e5
Merge remote-tracking branch 'ms/main' into staging-sm6.9
tex3d Apr 15, 2025
bf961d9
Merge main into staging-sm6.9 (#7345)
damyanp Apr 15, 2025
a3af055
Merge remote-tracking branch 'ms/main' into update-staging-sm6.9
tex3d Apr 15, 2025
969168d
merge main into staging-sm6.9 (#7352)
damyanp Apr 16, 2025
1db8c5b
Implementation of the CoopVec Inference and Training builtin intrinis…
anupamachandra Apr 18, 2025
629a402
Revert ADO pipelines to Ubuntu 22.04 temporarily (#7365) (#7366)
damyanp Apr 22, 2025
9ef5d8a
Merge remote-tracking branch 'ms/main' into staging-sm6.9
tex3d Apr 22, 2025
86dd84d
Basic implementation of priority long vector exec tests. (#7320)
alsepkow Apr 22, 2025
70ee672
Merge main to staging-sm6.9 (#7368)
damyanp Apr 22, 2025
26ca0d5
Move most long vector preview test utility logic to its own file (#7375)
alsepkow Apr 24, 2025
6cb6843
[SER] Basic execution tests
simoll Apr 25, 2025
e617910
[CoopVec] Add Linear Algebra common header with tests (#7350)
bob80905 Apr 28, 2025
6bcb151
Add SetShaderTableIndex+LoadLocalRootConstant tests / host code for l…
simoll Apr 28, 2025
144a083
Uppercased vars for coding standards / added CreateDXRDevice helper /…
simoll Apr 30, 2025
7539042
Merge branch 'main' of https://github.com/microsoft/DirectXShaderComp…
damyanp Apr 30, 2025
98c9a93
Merge main into staging-sm6.9 (#7404)
damyanp Apr 30, 2025
5a31d33
Merge remote-tracking branch 'msft/staging-sm6.9' into ser_exectest_p…
simoll Apr 30, 2025
c2e2dee
Test all MaybeReorderThread variants (in wave-incoherent execution)
simoll May 2, 2025
6ab79fe
DynamicHitObjectArrayTest
simoll May 5, 2025
d31a0ad
Cleanup / fixes and update expected values based on changed geometry
simoll May 6, 2025
8f1c598
WaveIncoherentHitTest
simoll May 5, 2025
e1df92b
SERReorderCoherentTest
simoll May 6, 2025
8ab7045
[CoopVec] Initial CoopVec ExecutionTest support for Mul[Add] and Oute…
jholewinski-nv May 6, 2025
c6fce45
Fixed SERShaderTableIndexTest (not relying on is/ah any longer)
simoll May 8, 2025
c3c399f
Turn ShaderTable::Init into a ctor with initializer list
simoll May 8, 2025
15140b1
Fix GetAttributesTest: procedural not contained in AABB / use integer…
simoll May 8, 2025
6e42757
Fix SERWaveIncoherentHitTest: Use ray_flags to be independent of aabb…
simoll May 8, 2025
0aee7ff
nfc: formatting
simoll May 8, 2025
03e013a
Merge remote-tracking branch 'msft/staging-sm6.9' into ser_exectest_p…
simoll May 8, 2025
57a36b3
[SER] Execution test update
simoll Aug 27, 2025
c9ff450
[SER] HitObject::GetAttributes change (off by default)
simoll Sep 3, 2025
156d9c5
Set AABB/Tri exclusive ray flags to fix AH sequencing issues in SERMu…
simoll Sep 10, 2025
876b2b4
Remove dbg messages, restore PAQ variations
simoll Sep 10, 2025
4df10b9
SERMultiPayloadTest: Add SKIP_TRIANGLES ray flag to remaining TraceCall
simoll Sep 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
trigger:
- main
- release*
- staging*

pr:
- main
- release*
- staging*

resources:
- repo: self
Expand Down
50 changes: 44 additions & 6 deletions include/dxc/Test/HlslTestUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,17 @@ inline void LogErrorFmt(const wchar_t *fmt, ...) {
WEX::Logging::Log::Error(buf.data());
}

inline void LogErrorFmtThrow(const wchar_t *fmt, ...) {
va_list args;
va_start(args, fmt);
std::wstring buf(vFormatToWString(fmt, args));
va_end(args);
WEX::Logging::Log::Error(buf.data());

// Throws an exception to abort the test.
VERIFY_FAIL(L"Test error");
}

inline std::wstring
GetPathToHlslDataFile(const wchar_t *relative,
LPCWSTR paramName = HLSLDATAFILEPARAM,
Expand Down Expand Up @@ -459,15 +470,17 @@ inline bool GetTestParamUseWARP(bool defaultVal) {

#ifdef FP_SUBNORMAL

inline bool isdenorm(float f) { return FP_SUBNORMAL == std::fpclassify(f); }
template <typename T> inline bool isdenorm(T f) {
return FP_SUBNORMAL == std::fpclassify(f);
}

#else

inline bool isdenorm(float f) {
return (std::numeric_limits<float>::denorm_min() <= f &&
f < std::numeric_limits<float>::min()) ||
(-std::numeric_limits<float>::min() < f &&
f <= -std::numeric_limits<float>::denorm_min());
template <typename T> inline bool isdenorm(T f) {
return (std::numeric_limits<T>::denorm_min() <= f &&
f < std::numeric_limits<T>::min()) ||
(-std::numeric_limits<T>::min() < f &&
f <= -std::numeric_limits<T>::denorm_min());
}

#endif // FP_SUBNORMAL
Expand Down Expand Up @@ -515,6 +528,31 @@ inline bool isnanFloat16(uint16_t val) {
uint16_t ConvertFloat32ToFloat16(float val) throw();
float ConvertFloat16ToFloat32(uint16_t val) throw();

inline bool CompareDoubleULP(
const double &Src, const double &Ref, int64_t ULPTolerance,
hlsl::DXIL::Float32DenormMode Mode = hlsl::DXIL::Float32DenormMode::Any) {
if (Src == Ref) {
return true;
}
if (std::isnan(Src)) {
return std::isnan(Ref);
}

if (Mode == hlsl::DXIL::Float32DenormMode::Any) {
// If denorm expected, output can be sign preserved zero. Otherwise output
// should pass the regular ulp testing.
if (isdenorm(Ref) && Src == 0 && std::signbit(Src) == std::signbit(Ref))
return true;
}

// For FTZ or Preserve mode, we should get the expected number within
// ULPTolerance for any operations.
int64_t Diff = *((const uint64_t *)&Src) - *((const uint64_t *)&Ref);

uint64_t AbsoluteDiff = Diff < 0 ? -Diff : Diff;
return AbsoluteDiff <= (uint64_t)ULPTolerance;
}

inline bool CompareFloatULP(
const float &fsrc, const float &fref, int ULPTolerance,
hlsl::DXIL::Float32DenormMode mode = hlsl::DXIL::Float32DenormMode::Any) {
Expand Down
4 changes: 2 additions & 2 deletions include/dxc/Test/WEXAdapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,8 @@ inline void EndGroup(const wchar_t *name) {
wprintf(L"END TEST(S): <%ls>\n", name);
}
inline void Comment(const wchar_t *msg) {
fputws(msg, stdout);
fputwc(L'\n', stdout);
fputws(msg, stderr);
fputwc(L'\n', stderr);
}
inline void Error(const wchar_t *msg) {
fputws(msg, stderr);
Expand Down
4 changes: 4 additions & 0 deletions lib/HLSL/DxilLinker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1276,6 +1276,10 @@ void DxilLinkJob::RunPreparePass(Module &M) {

// Clean up vectors, and run mem2reg again
PM.add(createScalarizerPass());

// Need dxilelimvector for pre 6.9
// PM.add(createDxilEliminateVectorPass());

PM.add(createPromoteMemoryToRegisterPass());

PM.add(createSimplifyInstPass());
Expand Down
2 changes: 1 addition & 1 deletion tools/clang/lib/Sema/SemaHLSL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ enum ArBasicKind {
#define IS_BPROP_UNSIGNABLE(_Props) \
(IS_BPROP_AINT(_Props) && GET_BPROP_BITS(_Props) != BPROP_BITS12)

#define IS_BPROP_ENUM(_Props) (((_Props)&BPROP_ENUM) != 0)
#define IS_BPROP_ENUM(_Props) (((_Props) & BPROP_ENUM) != 0)

const UINT g_uBasicKindProps[] = {
BPROP_PRIMITIVE | BPROP_BOOLEAN | BPROP_INTEGER | BPROP_NUMERIC |
Expand Down
154 changes: 154 additions & 0 deletions tools/clang/test/CodeGenDXIL/hlsl/types/longvecs.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
// RUN: %dxc -Wno-conversion -T cs_6_9 %s | FileCheck %s --check-prefixes=CHECK,F32
// RUN: %dxc -Wno-conversion -T cs_6_9 -DF64 %s | FileCheck %s --check-prefixes=CHECK,F64

RWByteAddressBuffer buf;

// "TYPE" is the mainly focused test type.
// "UNTYPE" is the other type used for mixed precision testing.
#ifdef F64
typedef double TYPE;
typedef float UNTYPE;
#else
typedef float TYPE;
typedef double UNTYPE;
#endif

// Two main test function overloads. One expects matching element types.
// The other uses different types to test ops and overload resolution.
template <typename T, int N> vector<T, N> dostuff(vector<T, N> thing1, vector<T, N> thing2, vector<T, N> thing3);
vector<TYPE, 8> dostuff(vector<TYPE, 8> thing1, vector<UNTYPE, 8> thing2, vector<TYPE, 8> thing3);

// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
// F32-DAG: %dx.types.ResRet.[[TY:v8f32]] = type { [[TYPE:<8 x float>]]
// F32-DAG: %dx.types.ResRet.[[UNTY:v8f64]] = type { [[UNTYPE:<8 x double>]]
// F64-DAG: %dx.types.ResRet.[[TY:v8f64]] = type { [[TYPE:<8 x double>]]
// F64-DAG: %dx.types.ResRet.[[UNTY:v8f32]] = type { [[UNTYPE:<8 x float>]]

// Verify that groupshared vectors are kept as aggregates
// CHECK: @"\01?gs_vec1@@3V?$vector@{{M|N}}$07@@A" = external addrspace(3) global [[TYPE]]
// CHECK: @"\01?gs_vec2@@3V?$vector@{{M|N}}$07@@A" = external addrspace(3) global [[TYPE]]
// CHECK: @"\01?gs_vec3@@3V?$vector@{{M|N}}$07@@A" = external addrspace(3) global [[TYPE]]
groupshared vector<TYPE, 8> gs_vec1, gs_vec2, gs_vec3;

[numthreads(8,1,1)]
void main() {
// CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer

// CHECK: [[vec1_res:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[buf]], i32 0
// CHECK-DAG: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec1_res]], 0
// F32-DAG: [[vec1_32:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec1_res]], 0
// F64-DAG: [[vec1_64:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec1_res]], 0
vector<TYPE, 8> vec1 = buf.Load<vector<TYPE, 8> >(0);

// CHECK: [[vec2_res:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[buf]], i32 60
// CHECK-DAG: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec2_res]], 0
// F32-DAG: [[vec2_32:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec2_res]], 0
// F64-DAG: [[vec2_64:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec2_res]], 0
vector<TYPE, 8> vec2 = buf.Load<vector<TYPE, 8> >(60);

// CHECK: [[vec3_res:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[buf]], i32 120
// CHECK-DAG: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec3_res]], 0
// F64-DAG: [[vec3_64:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec3_res]], 0
vector<TYPE, 8> vec3 = buf.Load<vector<TYPE, 8> >(120);

// CHECK: [[unvec_res:%.*]] = call %dx.types.ResRet.[[UNTY]] @dx.op.rawBufferVectorLoad.[[UNTY]](i32 303, %dx.types.Handle [[buf]], i32 180
// CHECK-DAG: [[unvec:%.*]] = extractvalue %dx.types.ResRet.[[UNTY]] [[unvec_res]], 0
// F32-DAG: [[unvec_64:%.*]] = extractvalue %dx.types.ResRet.[[UNTY]] [[unvec_res]], 0
// F64-DAG: [[unvec_32:%.*]] = extractvalue %dx.types.ResRet.[[UNTY]] [[unvec_res]], 0
vector<UNTYPE, 8> unvec = buf.Load<vector<UNTYPE, 8> >(180);

vec1 = dostuff(vec1, vec2, vec3);

// Test mixed type operations
vec2 = dostuff(vec2, unvec, vec3);

gs_vec2 = dostuff(gs_vec1, gs_vec2, gs_vec3);

// mix groupshared and non
//vec1 = dostuff(vec1, gs_vec2, vec3);

buf.Store<vector<TYPE, 8> >(240, vec1 * vec2 - vec3 * gs_vec1 + gs_vec2 / gs_vec3);
}

// Test the required ops on long vectors and confirm correct lowering.
template <typename T, int N>
vector<T, N> dostuff(vector<T, N> thing1, vector<T, N> thing2, vector<T, N> thing3) {
vector<T, N> res = 0;

// CHECK: call [[TYPE]] @dx.op.binary.[[TY]](i32 36, [[TYPE]] [[vec1]], [[TYPE]] [[vec2]]) ; FMin(a,b)
res += min(thing1, thing2);
// CHECK: call [[TYPE]] @dx.op.binary.[[TY]](i32 35, [[TYPE]] [[vec1]], [[TYPE]] [[vec3]]) ; FMax(a,b)
res += max(thing1, thing3);

// CHECK: [[tmp:%.*]] = call [[TYPE]] @dx.op.binary.[[TY]](i32 35, [[TYPE]] [[vec1]], [[TYPE]] [[vec2]]) ; FMax(a,b)
// CHECK: call [[TYPE]] @dx.op.binary.[[TY]](i32 36, [[TYPE]] [[tmp]], [[TYPE]] [[vec3]]) ; FMin(a,b)
res += clamp(thing1, thing2, thing3);

// F32: [[vec3_64:%.*]] = fpext <8 x float> [[vec3]] to <8 x double>
// F32: [[vec2_64:%.*]] = fpext <8 x float> [[vec2]] to <8 x double>
// F32: [[vec1_64:%.*]] = fpext <8 x float> [[vec1]] to <8 x double>
// CHECK: call <8 x double> @dx.op.tertiary.v8f64(i32 47, <8 x double> [[vec1_64]], <8 x double> [[vec2_64]], <8 x double> [[vec3_64]]) ; Fma(a,b,c)
res += (vector<T, N>)fma((vector<double, N>)thing1, (vector<double, N>)(thing2), (vector<double, N>)thing3);

// Even in the double test, these will be downconverted because these builtins only take floats.
// F64: [[vec2_32:%.*]] = fptrunc <8 x double> [[vec2]] to <8 x float>
// F64: [[vec1_32:%.*]] = fptrunc <8 x double> [[vec1]] to <8 x float>

// CHECK: [[tmp:%.*]] = fcmp fast olt <8 x float> [[vec2_32]], [[vec1_32]]
// CHECK: select <8 x i1> [[tmp]], [[TYPE]] zeroinitializer, [[TYPE]]
res += step(thing1, thing2);

// CHECK: [[tmp:%.*]] = fmul fast <8 x float> [[vec1_32]], <float 0x
// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 21, <8 x float> [[tmp]]) ; Exp(value)
res += exp(thing1);

// CHECK: [[tmp:%.*]] = call <8 x float> @dx.op.unary.v8f32(i32 23, <8 x float> [[vec1_32]]) ; Log(value)
// CHECK: fmul fast <8 x float> [[tmp]], <float 0x
res += log(thing1);

// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 20, <8 x float> [[vec1_32]]) ; Htan(value)
res += tanh(thing1);
// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 17, <8 x float> [[vec1_32]]) ; Atan(value)
res += atan(thing1);

return res;
}

// A mixed-type overload to test overload resolution and mingle different vector element types in ops
vector<TYPE, 8> dostuff(vector<TYPE, 8> thing1, vector<UNTYPE, 8> thing2, vector<TYPE, 8> thing3) {
vector<TYPE, 8> res = 0;

// F64: [[unvec_64:%.*]] = fpext <8 x float> [[unvec]] to <8 x double>
// CHECK: call <8 x double> @dx.op.binary.v8f64(i32 36, <8 x double> [[vec2_64]], <8 x double> [[unvec_64]]) ; FMin(a,b)
res += min(thing1, thing2);

// CHECK: call [[TYPE]] @dx.op.binary.[[TY]](i32 35, [[TYPE]] [[vec2]], [[TYPE]] [[vec3]]) ; FMax(a,b)
res += max(thing1, thing3);

// CHECK: [[tmp:%.*]] = call <8 x double> @dx.op.binary.v8f64(i32 35, <8 x double> [[vec2_64]], <8 x double> [[unvec_64]]) ; FMax(a,b)
// CHECK: call <8 x double> @dx.op.binary.v8f64(i32 36, <8 x double> [[tmp]], <8 x double> [[vec3_64]]) ; FMin(a,b)
res += clamp(thing1, thing2, thing3);

// CHECK: call <8 x double> @dx.op.tertiary.v8f64(i32 47, <8 x double> [[vec2_64]], <8 x double> [[unvec_64]], <8 x double> [[vec3_64]]) ; Fma(a,b,c)
res += (vector<TYPE, 8>)fma((vector<double,8>)thing1, (vector<double,8>)(thing2), (vector<double,8>)thing3);

// F32: [[unvec_32:%.*]] = fptrunc <8 x double> [[unvec]] to <8 x float>
// CHECK: [[tmp:%.*]] = fcmp fast olt <8 x float> [[unvec_32]], [[vec2_32]]
// CHECK: select <8 x i1> [[tmp]], [[TYPE]] zeroinitializer, [[TYPE]]
res += step(thing1, thing2);

// CHECK: [[tmp:%.*]] = fmul fast <8 x float> [[vec2_32]], <float 0x
// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 21, <8 x float> [[tmp]]) ; Exp(value)
res += exp(thing1);

// CHECK: [[tmp:%.*]] = call <8 x float> @dx.op.unary.v8f32(i32 23, <8 x float> [[vec2_32]]) ; Log(value)
// CHECK: fmul fast <8 x float> [[tmp]], <float 0x
res += log(thing1);

// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 20, <8 x float> [[vec2_32]]) ; Htan(value)
res += tanh(thing1);
// CHECK: call <8 x float> @dx.op.unary.v8f32(i32 17, <8 x float> [[vec2_32]]) ; Atan(value)
res += atan(thing1);

return res;
}
Original file line number Diff line number Diff line change
Expand Up @@ -155,5 +155,7 @@ export float4 xform(float4 v) {

[shader("vertex")]
float4 main(float3 pos : Position) : SV_Position {
return xform(float4(pos, 1)) * StructBuf[0].f;
float4 res = xform(float4(pos, 1));
res *=StructBuf[0].f;
return res ;
}
7 changes: 7 additions & 0 deletions tools/clang/unittests/HLSLExec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,10 @@ endif()
file(TO_NATIVE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" DOS_STYLE_SOURCE_DIR)
file(TO_NATIVE_PATH "${TAEF_BIN_DIR}" DOS_TAEF_BIN_DIR)
configure_file(ExecHLSLTests.vcxproj.user.txt ExecHLSLTests.vcxproj.user)

# Copy the ShaderOpArith.xml file to the output directory. It's used by the exec
# tests and it's convenient to have it copied here if you want to easily copy
# the tests to another machine after building.
set(XML_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/ShaderOpArith.xml)
set(XML_DESTINATION ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}/bin)
file(COPY ${XML_SOURCE} DESTINATION ${XML_DESTINATION})
Loading
Loading