llvm · V-FEXrt · Apr 9, 2025 · Mar 31, 2025 · Mar 31, 2025 · Apr 1, 2025
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
@@ -35,6 +35,12 @@ length_vec_impl(vector<T, N> X) {
 #endif
 }
 
+template <typename T>
+constexpr vector<T, 4> dst_impl(vector<T, 4> Src0, vector<T, 4> Src1) {
+  vector<T, 4> Dest = {1, Src0[1] * Src1[1], Src0[2], Src1[3]};
+  return Dest;
+}
+
 template <typename T> constexpr T distance_impl(T X, T Y) {
   return length_impl(X - Y);
 }

diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -174,6 +174,31 @@ const inline float distance(__detail::HLSL_FIXED_VECTOR<float, N> X,
                             __detail::HLSL_FIXED_VECTOR<float, N> Y) {
   return __detail::distance_vec_impl(X, Y);
 }
+//===----------------------------------------------------------------------===//
+// dst builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn fvector dst( fvector, fvector)
+/// \brief Returns the length of a vector
+/// \param Src0 [in] The first vector contain {_, d*d, d*d, _}
+/// \param Src1 [in] The second vector contain {_, 1/d, _, 1/d}
+///
+/// Return the computed distance vector contain {1, d, d*d, 1/d}
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+const inline vector<half, 4> dst(vector<half, 4> Src0, vector<half, 4> Src1) {
-const inline vector<half, 4> dst(vector<half, 4> Src0, vector<half, 4> Src1) {
+const inline half4 dst(half4 Src0, half4 Src1) {
-const inline vector<half, 4> dst(vector<half, 4> Src0, vector<half, 4> Src1) {
+const inline half4 dst(half4 Src0, half4 Src1) {
+  return __detail::dst_impl(Src0, Src1);
+}
+
+const inline vector<float, 4> dst(vector<float, 4> Src0,
+                                  vector<float, 4> Src1) {
-const inline vector<float, 4> dst(vector<float, 4> Src0,
-                                  vector<float, 4> Src1) {
+const inline float4 dst(float4 Src0, float4 Src1) {
-const inline vector<float, 4> dst(vector<float, 4> Src0,
-                                  vector<float, 4> Src1) {
+const inline float4 dst(float4 Src0, float4 Src1) {
+  return __detail::dst_impl(Src0, Src1);
+}
+
+const inline vector<double, 4> dst(vector<double, 4> Src0,
+                                   vector<double, 4> Src1) {
-const inline vector<double, 4> dst(vector<double, 4> Src0,
-                                   vector<double, 4> Src1) {
+const inline double4 dst(double4 Src0, double4 Src1) {
-const inline vector<double, 4> dst(vector<double, 4> Src0,
-                                   vector<double, 4> Src1) {
+const inline double4 dst(double4 Src0, double4 Src1) {
+  return __detail::dst_impl(Src0, Src1);
+}
 
 //===----------------------------------------------------------------------===//
 // dot2add builtins

diff --git a/clang/test/CodeGenHLSL/builtins/dst.hlsl b/clang/test/CodeGenHLSL/builtins/dst.hlsl
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.2-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+
+// CHECK-LABEL: linkonce_odr noundef nofpclass(nan inf) <4 x float> @_ZN4hlsl8__detail8dst_implIfEEDv4_T_S3_S3_(
+// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[P:%.*]], <4 x float> noundef nofpclass(nan inf) [[Q:%.*]]) #[[ATTR0:[0-9]+]] { 
+// CHECK: [[VECEXT:%.*]] = extractelement <4 x float> [[PADDR:%.*]], i32 1
+// CHECK: [[VECEXT1:%.*]] = extractelement <4 x float> [[QADDR:%.*]], i32 1
+// CHECK: [[MULRES:%.*]] = fmul reassoc nnan ninf nsz arcp afn float [[VECEXT]], [[VECEXT1]]
+// CHECK: [[VECINIT:%.*]] = insertelement <4 x float> <float 1.000000e+00, float poison, float poison, float poison>, float [[MULRES]], i32 1
+// CHECK: [[VECINIT2:%.*]] = extractelement <4 x float> [[PADDR2:%.*]], i32 2
+// CHECK: [[VECINIT3:%.*]] = insertelement <4 x float> [[VECINIT]], float [[VECINIT2]], i32 2
+// CHECK: [[VECINIT4:%.*]] = extractelement <4 x float> [[QADDR3:%.*]], i32 3
+// CHECK: [[VECINIT5:%.*]] = insertelement <4 x float> [[VECINIT3]], float [[VECINIT4]], i32 3
+// CHECK-NEXT: store <4 x float> [[VECINIT5]], ptr [[DEST:%.*]], align 16
+// CHECK-NEXT: [[RES:%.*]] = load <4 x float>, ptr [[DEST]], align 16
+// CHECK-NEXT: ret <4 x float> [[RES]]
+float4 dstWithFloat(float4 p1, float4 p2)
+{
+    return dst(p1, p2);
+}
+
+// CHECK-LABEL: define linkonce_odr noundef nofpclass(nan inf) <4 x half> @_ZN4hlsl8__detail8dst_implIDhEEDv4_T_S3_S3_(
+// CHECK-SAME: <4 x half> noundef nofpclass(nan inf) [[P:%.*]], <4 x half> noundef nofpclass(nan inf) [[Q:%.*]]) #[[ATTR0]] {
+// CHECK: [[VECEXT:%.*]] = extractelement <4 x half> [[PADDR:%.*]], i32 1
+// CHECK: [[VECEXT1:%.*]] = extractelement <4 x half> [[QADDR:%.*]], i32 1
+// CHECK: [[MULRES:%.*]] = fmul reassoc nnan ninf nsz arcp afn half [[VECEXT]], [[VECEXT1]]
+// CHECK: [[VECINIT:%.*]] = insertelement <4 x half> <half 0xH3C00, half poison, half poison, half poison>, half [[MULRES]], i32 1
+// CHECK: [[VECINIT2:%.*]] = extractelement <4 x half> [[PADDR2:%.*]], i32 2
+// CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT]], half [[VECINIT2]], i32 2
+// CHECK: [[VECINIT4:%.*]] = extractelement <4 x half> [[QADDR3:%.*]], i32 3
+// CHECK: [[VECINIT5:%.*]] = insertelement <4 x half> [[VECINIT3]], half [[VECINIT4]], i32 3
+// CHECK-NEXT: store <4 x half> [[VECINIT5]], ptr [[DEST:%.*]], align 8
+// CHECK-NEXT: [[RES:%.*]] = load <4 x half>, ptr [[DEST]], align 8
+// CHECK-NEXT: ret <4 x half> [[RES]]
+half4 dstwithHalf(half4 p1, half4 p2)
+{
+    return dst(p1, p2);
+}
+
+// CHECK-LABEL: define linkonce_odr noundef nofpclass(nan inf) <4 x double> @_ZN4hlsl8__detail8dst_implIdEEDv4_T_S3_S3_(
+// CHECK-SAME: <4 x double> noundef nofpclass(nan inf) [[P:%.*]], <4 x double> noundef nofpclass(nan inf) [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK: [[VECEXT:%.*]] = extractelement <4 x double> [[PADDR:%.*]], i32 1
+// CHECK: [[VECEXT1:%.*]] = extractelement <4 x double> [[QADDR:%.*]], i32 1
+// CHECK: [[MULRES:%.*]] = fmul reassoc nnan ninf nsz arcp afn double [[VECEXT]], [[VECEXT1]]
+// CHECK: [[VECINIT:%.*]] = insertelement <4 x double> <double 1.000000e+00, double poison, double poison, double poison>, double [[MULRES]], i32 1
+// CHECK: [[VECINIT2:%.*]] = extractelement <4 x double> [[PADDR2:%.*]], i32 2
+// CHECK: [[VECINIT3:%.*]] = insertelement <4 x double> [[VECINIT]], double [[VECINIT2]], i32 2
+// CHECK: [[VECINIT4:%.*]] = extractelement <4 x double> [[QADDR3:%.*]], i32 3
+// CHECK: [[VECINIT5:%.*]] = insertelement <4 x double> [[VECINIT3]], double [[VECINIT4]], i32 3
+// CHECK-NEXT: store <4 x double> [[VECINIT5]], ptr [[DEST:%.*]], align 32
+// CHECK-NEXT: [[RES:%.*]] = load <4 x double>, ptr [[DEST]], align 32
+// CHECK-NEXT: ret <4 x double> [[RES]]
+double4 dstWithDouble(double4 p1, double4 p2)
+{
+    return dst(p1, p2);
+}
+
diff --git a/clang/test/SemaHLSL/BuiltIns/dst-error.hlsl b/clang/test/SemaHLSL/BuiltIns/dst-error.hlsl
@@ -0,0 +1,75 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
+
+float4 test_too_many_arg(float4 p0)
+{
+    dst(p0, p0, p0);
+  // expected-error@-1 {{no matching function for call to 'dst'}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 3 were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 3 were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 3 were provided}}
+}
+
+float4 test_no_second_arg(float4 p0)
+{
+    return dst(p0);
+  // expected-error@-1 {{no matching function for call to 'dst'}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 1 was provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 1 was provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 1 was provided}}
+}
+
+float4 test_no_args()
+{
+    return dst();
+  // expected-error@-1 {{no matching function for call to 'dst'}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 0 were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 0 were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 2 arguments, but 0 were provided}}
+}
+
+float4 test_3_components(float3 p0, float3 p1)
+{
+    return dst(p0, p1);
+  // expected-error@-1 {{no matching function for call to 'dst'}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: no known conversion from 'vector<[...], 3>' to 'vector<[...], 4>' for 1st argument}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: no known conversion from 'vector<float, 3>' to 'vector<half, 4>' for 1st argument}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: no known conversion from 'vector<float, 3>' to 'vector<double, 4>' for 1st argument}}
+}
+
+float4 test_with_ambiguous_inp(double4 p0, float4 p1)
+{
+    return dst(p0, p1);
+  // expected-error@-1 {{call to 'dst' is ambiguous}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+}
+
+float2 test_with_return_float2(float4 p0, float4 p1)
+{
+    return dst(p0, p1);
+  // expected-warning@-1 {{implicit conversion truncates vector: 'vector<float, 4>' (vector of 4 'float' values) to 'vector<float, 2>' (vector of 2 'float' values)}}
+}
+
+float4 test_with_ambigious_float4_double_inp(float4 p0, double p1)
+{
+    return dst(p0, p1);
+  // expected-error@-1 {{call to 'dst' is ambiguous}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+}
+
+float4 test_with_ambigious_double_float4_inp(double p0, float4 p1)
+{
+    return dst(p0, p1);
+  // expected-error@-1 {{call to 'dst' is ambiguous}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+}
+
+float4 test_with_ambigious_double4_float_inp(double4 p0, float p1)
+{
+    return dst(p0, p1);
+  // expected-error@-1 {{call to 'dst' is ambiguous}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+}