microsoft · simoll · Feb 13, 2025 · Mar 24, 2025 · Dec 3, 2024 · Apr 4, 2025
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -1,10 +1,12 @@
 trigger:
   - main
   - release*
+  - staging*
 
 pr: 
   - main
   - release*
+  - staging*
 
 resources:
 - repo: self

diff --git a/include/dxc/Test/HlslTestUtils.h b/include/dxc/Test/HlslTestUtils.h
@@ -258,6 +258,17 @@ inline void LogErrorFmt(const wchar_t *fmt, ...) {
   WEX::Logging::Log::Error(buf.data());
 }
 
+inline void LogErrorFmtThrow(const wchar_t *fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  std::wstring buf(vFormatToWString(fmt, args));
+  va_end(args);
+  WEX::Logging::Log::Error(buf.data());
+
+  // Throws an exception to abort the test.
+  VERIFY_FAIL(L"Test error");
+}
+
 inline std::wstring
 GetPathToHlslDataFile(const wchar_t *relative,
                       LPCWSTR paramName = HLSLDATAFILEPARAM,
@@ -459,15 +470,17 @@ inline bool GetTestParamUseWARP(bool defaultVal) {
 
 #ifdef FP_SUBNORMAL
 
-inline bool isdenorm(float f) { return FP_SUBNORMAL == std::fpclassify(f); }
+template <typename T> inline bool isdenorm(T f) {
+  return FP_SUBNORMAL == std::fpclassify(f);
+}
 
 #else
 
-inline bool isdenorm(float f) {
-  return (std::numeric_limits<float>::denorm_min() <= f &&
-          f < std::numeric_limits<float>::min()) ||
-         (-std::numeric_limits<float>::min() < f &&
-          f <= -std::numeric_limits<float>::denorm_min());
+template <typename T> inline bool isdenorm(T f) {
+  return (std::numeric_limits<T>::denorm_min() <= f &&
+          f < std::numeric_limits<T>::min()) ||
+         (-std::numeric_limits<T>::min() < f &&
+          f <= -std::numeric_limits<T>::denorm_min());
 }
 
 #endif // FP_SUBNORMAL
@@ -515,6 +528,31 @@ inline bool isnanFloat16(uint16_t val) {
 uint16_t ConvertFloat32ToFloat16(float val) throw();
 float ConvertFloat16ToFloat32(uint16_t val) throw();
 
+inline bool CompareDoubleULP(
+    const double &Src, const double &Ref, int64_t ULPTolerance,
+    hlsl::DXIL::Float32DenormMode Mode = hlsl::DXIL::Float32DenormMode::Any) {
+  if (Src == Ref) {
+    return true;
+  }
+  if (std::isnan(Src)) {
+    return std::isnan(Ref);
+  }
+
+  if (Mode == hlsl::DXIL::Float32DenormMode::Any) {
+    // If denorm expected, output can be sign preserved zero. Otherwise output
+    // should pass the regular ulp testing.
+    if (isdenorm(Ref) && Src == 0 && std::signbit(Src) == std::signbit(Ref))
+      return true;
+  }
+
+  // For FTZ or Preserve mode, we should get the expected number within
+  // ULPTolerance for any operations.
+  int64_t Diff = *((const uint64_t *)&Src) - *((const uint64_t *)&Ref);
+
+  uint64_t AbsoluteDiff = Diff < 0 ? -Diff : Diff;
+  return AbsoluteDiff <= (uint64_t)ULPTolerance;
+}
+
 inline bool CompareFloatULP(
     const float &fsrc, const float &fref, int ULPTolerance,
     hlsl::DXIL::Float32DenormMode mode = hlsl::DXIL::Float32DenormMode::Any) {

diff --git a/include/dxc/Test/WEXAdapter.h b/include/dxc/Test/WEXAdapter.h
@@ -178,8 +178,8 @@ inline void EndGroup(const wchar_t *name) {
   wprintf(L"END TEST(S): <%ls>\n", name);
 }
 inline void Comment(const wchar_t *msg) {
-  fputws(msg, stdout);
-  fputwc(L'\n', stdout);
+  fputws(msg, stderr);
+  fputwc(L'\n', stderr);
 }
 inline void Error(const wchar_t *msg) {
   fputws(msg, stderr);

diff --git a/lib/HLSL/DxilLinker.cpp b/lib/HLSL/DxilLinker.cpp
@@ -1276,6 +1276,10 @@ void DxilLinkJob::RunPreparePass(Module &M) {
 
   // Clean up vectors, and run mem2reg again
   PM.add(createScalarizerPass());
+
+  // Need dxilelimvector for pre 6.9
+  // PM.add(createDxilEliminateVectorPass());
+
   PM.add(createPromoteMemoryToRegisterPass());
 
   PM.add(createSimplifyInstPass());

diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -387,7 +387,7 @@ enum ArBasicKind {
 #define IS_BPROP_UNSIGNABLE(_Props)                                            \
   (IS_BPROP_AINT(_Props) && GET_BPROP_BITS(_Props) != BPROP_BITS12)
 
-#define IS_BPROP_ENUM(_Props) (((_Props)&BPROP_ENUM) != 0)
+#define IS_BPROP_ENUM(_Props) (((_Props) & BPROP_ENUM) != 0)
 
 const UINT g_uBasicKindProps[] = {
     BPROP_PRIMITIVE | BPROP_BOOLEAN | BPROP_INTEGER | BPROP_NUMERIC |

diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvecs.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvecs.hlsl
@@ -0,0 +1,154 @@
+// RUN: %dxc -Wno-conversion -T cs_6_9       %s | FileCheck %s --check-prefixes=CHECK,F32
+// RUN: %dxc -Wno-conversion -T cs_6_9 -DF64 %s | FileCheck %s --check-prefixes=CHECK,F64
+
+RWByteAddressBuffer buf;
+
+// "TYPE" is the mainly focused test type.
+// "UNTYPE" is the other type used for mixed precision testing.
+#ifdef F64
+typedef double TYPE;
+typedef float UNTYPE;
+#else
+typedef float TYPE;
+typedef double UNTYPE;
+#endif
+
+// Two main test function overloads. One expects matching element types.
+// The other uses different types to test ops and overload resolution.
+template <typename T, int N> vector<T, N> dostuff(vector<T, N> thing1, vector<T, N> thing2, vector<T, N> thing3);
+vector<TYPE, 8> dostuff(vector<TYPE, 8> thing1, vector<UNTYPE, 8> thing2, vector<TYPE, 8> thing3);
+
+// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly.
+// F32-DAG: %dx.types.ResRet.[[TY:v8f32]] = type { [[TYPE:<8 x float>]]
+// F32-DAG: %dx.types.ResRet.[[UNTY:v8f64]] = type { [[UNTYPE:<8 x double>]]
+// F64-DAG: %dx.types.ResRet.[[TY:v8f64]] = type { [[TYPE:<8 x double>]]
+// F64-DAG: %dx.types.ResRet.[[UNTY:v8f32]] = type { [[UNTYPE:<8 x float>]]
+
+// Verify that groupshared vectors are kept as aggregates
+// CHECK: @"\01?gs_vec1@@3V?$vector@{{M|N}}$07@@A" = external addrspace(3) global [[TYPE]]
+// CHECK: @"\01?gs_vec2@@3V?$vector@{{M|N}}$07@@A" = external addrspace(3) global [[TYPE]]
+// CHECK: @"\01?gs_vec3@@3V?$vector@{{M|N}}$07@@A" = external addrspace(3) global [[TYPE]]
+groupshared vector<TYPE, 8> gs_vec1, gs_vec2, gs_vec3;
+
+[numthreads(8,1,1)]
+void main() {
+  // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })  ; AnnotateHandle(res,props)  resource: RWByteAddressBuffer
+
+  // CHECK: [[vec1_res:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[buf]], i32 0
+  // CHECK-DAG: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec1_res]], 0
+  // F32-DAG: [[vec1_32:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec1_res]], 0
+  // F64-DAG: [[vec1_64:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec1_res]], 0
+  vector<TYPE, 8> vec1 = buf.Load<vector<TYPE, 8> >(0);
+
+  // CHECK: [[vec2_res:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[buf]], i32 60
+  // CHECK-DAG: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec2_res]], 0
+  // F32-DAG: [[vec2_32:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec2_res]], 0
+  // F64-DAG: [[vec2_64:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec2_res]], 0
+  vector<TYPE, 8> vec2 = buf.Load<vector<TYPE, 8> >(60);
+
+  // CHECK: [[vec3_res:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[buf]], i32 120
+  // CHECK-DAG: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec3_res]], 0
+  // F64-DAG: [[vec3_64:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec3_res]], 0
+  vector<TYPE, 8> vec3 = buf.Load<vector<TYPE, 8> >(120);
+
+  // CHECK: [[unvec_res:%.*]] = call %dx.types.ResRet.[[UNTY]] @dx.op.rawBufferVectorLoad.[[UNTY]](i32 303, %dx.types.Handle [[buf]], i32 180
+  // CHECK-DAG: [[unvec:%.*]] = extractvalue %dx.types.ResRet.[[UNTY]] [[unvec_res]], 0
+  // F32-DAG: [[unvec_64:%.*]] = extractvalue %dx.types.ResRet.[[UNTY]] [[unvec_res]], 0
+  // F64-DAG: [[unvec_32:%.*]] = extractvalue %dx.types.ResRet.[[UNTY]] [[unvec_res]], 0
+  vector<UNTYPE, 8> unvec = buf.Load<vector<UNTYPE, 8> >(180);
+
+  vec1 = dostuff(vec1, vec2, vec3);
+
+  // Test mixed type operations
+  vec2 = dostuff(vec2, unvec, vec3);
+
+  gs_vec2 = dostuff(gs_vec1, gs_vec2, gs_vec3);
+
+  // mix groupshared and non
+  //vec1 = dostuff(vec1, gs_vec2, vec3);
+
+  buf.Store<vector<TYPE, 8> >(240, vec1 * vec2 - vec3 * gs_vec1 + gs_vec2 / gs_vec3);
+}
+
+//  Test the required ops on long vectors and confirm correct lowering.
+template <typename T, int N>
+vector<T, N> dostuff(vector<T, N> thing1, vector<T, N> thing2, vector<T, N> thing3) {
+  vector<T, N> res = 0;
+
+  // CHECK: call [[TYPE]] @dx.op.binary.[[TY]](i32 36, [[TYPE]] [[vec1]], [[TYPE]] [[vec2]])  ; FMin(a,b)
+  res += min(thing1, thing2);
+  // CHECK: call [[TYPE]] @dx.op.binary.[[TY]](i32 35, [[TYPE]] [[vec1]], [[TYPE]] [[vec3]])  ; FMax(a,b)
+  res += max(thing1, thing3);
+
+  // CHECK: [[tmp:%.*]] = call [[TYPE]] @dx.op.binary.[[TY]](i32 35, [[TYPE]] [[vec1]], [[TYPE]] [[vec2]])  ; FMax(a,b)
+  // CHECK: call [[TYPE]] @dx.op.binary.[[TY]](i32 36, [[TYPE]] [[tmp]], [[TYPE]] [[vec3]])  ; FMin(a,b)
+  res += clamp(thing1, thing2, thing3);
+
+  // F32: [[vec3_64:%.*]] = fpext <8 x float> [[vec3]] to <8 x double>
+  // F32: [[vec2_64:%.*]] = fpext <8 x float> [[vec2]] to <8 x double>
+  // F32: [[vec1_64:%.*]] = fpext <8 x float> [[vec1]] to <8 x double>
+  // CHECK: call <8 x double> @dx.op.tertiary.v8f64(i32 47, <8 x double> [[vec1_64]], <8 x double> [[vec2_64]], <8 x double> [[vec3_64]]) ; Fma(a,b,c)
+  res += (vector<T, N>)fma((vector<double, N>)thing1, (vector<double, N>)(thing2), (vector<double, N>)thing3);
+
+  // Even in the double test, these will be downconverted because these builtins only take floats.
+  // F64: [[vec2_32:%.*]] = fptrunc <8 x double> [[vec2]] to <8 x float>
+  // F64: [[vec1_32:%.*]] = fptrunc <8 x double> [[vec1]] to <8 x float>
+
+  // CHECK: [[tmp:%.*]] = fcmp fast olt <8 x float> [[vec2_32]], [[vec1_32]]
+  // CHECK: select <8 x i1> [[tmp]], [[TYPE]] zeroinitializer, [[TYPE]]
+  res += step(thing1, thing2);
+
+  // CHECK: [[tmp:%.*]] = fmul fast <8 x float> [[vec1_32]], <float 0x
+  // CHECK: call <8 x float> @dx.op.unary.v8f32(i32 21, <8 x float> [[tmp]])  ; Exp(value)
+  res += exp(thing1);
+
+  // CHECK: [[tmp:%.*]] = call <8 x float> @dx.op.unary.v8f32(i32 23, <8 x float> [[vec1_32]])  ; Log(value)
+  // CHECK: fmul fast <8 x float> [[tmp]], <float 0x
+  res += log(thing1);
+
+  // CHECK: call <8 x float> @dx.op.unary.v8f32(i32 20, <8 x float> [[vec1_32]])  ; Htan(value)
+  res += tanh(thing1);
+  // CHECK: call <8 x float> @dx.op.unary.v8f32(i32 17, <8 x float> [[vec1_32]])  ; Atan(value)
+  res += atan(thing1);
+
+  return res;
+}
+
+// A mixed-type overload to test overload resolution and mingle different vector element types in ops
+vector<TYPE, 8> dostuff(vector<TYPE, 8> thing1, vector<UNTYPE, 8> thing2, vector<TYPE, 8> thing3) {
+  vector<TYPE, 8> res = 0;
+
+  // F64: [[unvec_64:%.*]] = fpext <8 x float> [[unvec]] to <8 x double>
+  // CHECK: call <8 x double> @dx.op.binary.v8f64(i32 36, <8 x double> [[vec2_64]], <8 x double> [[unvec_64]])  ; FMin(a,b)
+  res += min(thing1, thing2);
+
+  // CHECK: call [[TYPE]] @dx.op.binary.[[TY]](i32 35, [[TYPE]] [[vec2]], [[TYPE]] [[vec3]]) ; FMax(a,b)
+  res += max(thing1, thing3);
+
+  // CHECK: [[tmp:%.*]] = call <8 x double> @dx.op.binary.v8f64(i32 35, <8 x double> [[vec2_64]], <8 x double> [[unvec_64]])  ; FMax(a,b)
+  // CHECK: call <8 x double> @dx.op.binary.v8f64(i32 36, <8 x double> [[tmp]], <8 x double> [[vec3_64]])  ; FMin(a,b)
+  res += clamp(thing1, thing2, thing3);
+
+  // CHECK: call <8 x double> @dx.op.tertiary.v8f64(i32 47, <8 x double> [[vec2_64]], <8 x double> [[unvec_64]], <8 x double> [[vec3_64]]) ; Fma(a,b,c)
+  res += (vector<TYPE, 8>)fma((vector<double,8>)thing1, (vector<double,8>)(thing2), (vector<double,8>)thing3);
+
+  // F32: [[unvec_32:%.*]] = fptrunc <8 x double> [[unvec]] to <8 x float>
+  // CHECK: [[tmp:%.*]] = fcmp fast olt <8 x float> [[unvec_32]], [[vec2_32]]
+  // CHECK: select <8 x i1> [[tmp]], [[TYPE]] zeroinitializer, [[TYPE]]
+  res += step(thing1, thing2);
+
+  // CHECK: [[tmp:%.*]] = fmul fast <8 x float> [[vec2_32]], <float 0x
+  // CHECK: call <8 x float> @dx.op.unary.v8f32(i32 21, <8 x float> [[tmp]])  ; Exp(value)
+  res += exp(thing1);
+
+  // CHECK: [[tmp:%.*]] = call <8 x float> @dx.op.unary.v8f32(i32 23, <8 x float> [[vec2_32]])  ; Log(value)
+  // CHECK: fmul fast <8 x float> [[tmp]], <float 0x
+  res += log(thing1);
+
+  // CHECK: call <8 x float> @dx.op.unary.v8f32(i32 20, <8 x float> [[vec2_32]])  ; Htan(value)
+  res += tanh(thing1);
+  // CHECK: call <8 x float> @dx.op.unary.v8f32(i32 17, <8 x float> [[vec2_32]])  ; Atan(value)
+  res += atan(thing1);
+
+  return res;
+}
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/linker/resources/preserve_sb_types.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/linker/resources/preserve_sb_types.hlsl
@@ -155,5 +155,7 @@ export float4 xform(float4 v) {
 
 [shader("vertex")]
 float4 main(float3 pos : Position) : SV_Position {
-  return xform(float4(pos, 1)) * StructBuf[0].f;
+  float4 res = xform(float4(pos, 1));
+  res *=StructBuf[0].f;
+  return  res ;
 }
diff --git a/tools/clang/unittests/HLSLExec/CMakeLists.txt b/tools/clang/unittests/HLSLExec/CMakeLists.txt
@@ -39,3 +39,10 @@ endif()
 file(TO_NATIVE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" DOS_STYLE_SOURCE_DIR)
 file(TO_NATIVE_PATH "${TAEF_BIN_DIR}" DOS_TAEF_BIN_DIR)
 configure_file(ExecHLSLTests.vcxproj.user.txt ExecHLSLTests.vcxproj.user)
+
+# Copy the ShaderOpArith.xml file to the output directory. It's used by the exec
+# tests and it's convenient to have it copied here if you want to easily copy
+# the tests to another machine after building.
+set(XML_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/ShaderOpArith.xml)
+set(XML_DESTINATION ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}/bin)
+file(COPY ${XML_SOURCE} DESTINATION ${XML_DESTINATION})