Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ length_vec_impl(vector<T, N> X) {
#endif
}

template <typename T>
constexpr vector<T, 4> dst_impl(vector<T, 4> Src0, vector<T, 4> Src1) {
vector<T, 4> Dest = {1, Src0[1] * Src1[1], Src0[2], Src1[3]};
return Dest;
}

template <typename T> constexpr T distance_impl(T X, T Y) {
return length_impl(X - Y);
}
Expand Down
26 changes: 26 additions & 0 deletions clang/lib/Headers/hlsl/hlsl_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,32 @@ const inline float dot2add(half2 A, half2 B, float C) {
return __detail::dot2add_impl(A, B, C);
}

//===----------------------------------------------------------------------===//
// dst builtins
//===----------------------------------------------------------------------===//

/// \fn fvector dst(fvector, fvector)
/// \brief Calculates a distance vector.
/// \param Src0 [in] The first vector contains the squared distance represented
/// as {_, d*d, d*d, _}
/// \param Src1 [in] The second vector contains the reciprocal distance
/// represented as {_, 1/d, _, 1/d}
///
/// Return the computed distance vector contain {1, d, d*d, 1/d}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

contain {1, d, d*d, 1/d} here is also confusing to me

Copy link
Contributor Author

@metkarpoonam metkarpoonam Apr 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The combination of squared distance (src0) and reciprocal distance (src1) enables the dst function to efficiently compute a distance vector.
dest = (1, d*d, d*d,1) * (1, 1/d, 1, 1/d)

dest = (1, d, d*d, 1/d)

https://stackoverflow.com/questions/8525803/what-is-the-hlsl-dst-instruction-for

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the agorithm correctness is the problem. Its confusing because no one knows what d is. your inputs are Src0 and Src1. You need to put your documentation in those terms.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, that was the point I was making as well. Think about the comment header from the perspective of a random user who is being shown the comment as an editor popup. It needs to be useful in that context


_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
const inline half4 dst(half4 Src0, half4 Src1) {
return __detail::dst_impl(Src0, Src1);
}

const inline float4 dst(float4 Src0, float4 Src1) {
return __detail::dst_impl(Src0, Src1);
}

const inline double4 dst(double4 Src0, double4 Src1) {
return __detail::dst_impl(Src0, Src1);
}

//===----------------------------------------------------------------------===//
// fmod builtins
//===----------------------------------------------------------------------===//
Expand Down
57 changes: 57 additions & 0 deletions clang/test/CodeGenHLSL/builtins/dst.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.2-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s


// CHECK-LABEL: define {{.*}} <4 x float> @{{[A-Za-z1-9_]+}}dst_impl{{[A-Za-z1-9_]*}}(
// CHECK-SAME: <4 x float> {{[A-Za-z )(]*}} [[P:%.*]], <4 x float> {{[A-Za-z )(]*}} [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK: [[VECEXT:%.*]] = extractelement <4 x float> [[PADDR:%.*]], i32 1
// CHECK: [[VECEXT1:%.*]] = extractelement <4 x float> [[QADDR:%.*]], i32 1
// CHECK: [[MULRES:%.*]] = fmul {{[A-Za-z ]*}} float [[VECEXT]], [[VECEXT1]]
// CHECK: [[VECINIT:%.*]] = insertelement <4 x float> <float 1.000000e+00, float poison, float poison, float poison>, float [[MULRES]], i32 1
// CHECK: [[VECINIT2:%.*]] = extractelement <4 x float> [[PADDR2:%.*]], i32 2
// CHECK: [[VECINIT3:%.*]] = insertelement <4 x float> [[VECINIT]], float [[VECINIT2]], i32 2
// CHECK: [[VECINIT4:%.*]] = extractelement <4 x float> [[QADDR3:%.*]], i32 3
// CHECK: [[VECINIT5:%.*]] = insertelement <4 x float> [[VECINIT3]], float [[VECINIT4]], i32 3
// CHECK-NEXT: store <4 x float> [[VECINIT5]], ptr [[DEST:%.*]], align 16
// CHECK-NEXT: [[RES:%.*]] = load <4 x float>, ptr [[DEST]], align 16
// CHECK-NEXT: ret <4 x float> [[RES]]
float4 dstWithFloat(float4 p1, float4 p2)
{
return dst(p1, p2);
}

// CHECK-LABEL: define {{.*}} <4 x half> @{{[A-Za-z1-9_]+}}dst_impl{{[A-Za-z1-9_]*}}(
// CHECK-SAME: <4 x half> {{[A-Za-z )(]*}} [[P:%.*]], <4 x half> {{[A-Za-z )(]*}} [[Q:%.*]]) #[[ATTR0]] {
// CHECK: [[VECEXT:%.*]] = extractelement <4 x half> [[PADDR:%.*]], i32 1
// CHECK: [[VECEXT1:%.*]] = extractelement <4 x half> [[QADDR:%.*]], i32 1
// CHECK: [[MULRES:%.*]] = fmul {{[A-Za-z ]*}} half [[VECEXT]], [[VECEXT1]]
// CHECK: [[VECINIT:%.*]] = insertelement <4 x half> <half 0xH3C00, half poison, half poison, half poison>, half [[MULRES]], i32 1
// CHECK: [[VECINIT2:%.*]] = extractelement <4 x half> [[PADDR2:%.*]], i32 2
// CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT]], half [[VECINIT2]], i32 2
// CHECK: [[VECINIT4:%.*]] = extractelement <4 x half> [[QADDR3:%.*]], i32 3
// CHECK: [[VECINIT5:%.*]] = insertelement <4 x half> [[VECINIT3]], half [[VECINIT4]], i32 3
// CHECK-NEXT: store <4 x half> [[VECINIT5]], ptr [[DEST:%.*]], align 8
// CHECK-NEXT: [[RES:%.*]] = load <4 x half>, ptr [[DEST]], align 8
// CHECK-NEXT: ret <4 x half> [[RES]]
half4 dstwithHalf(half4 p1, half4 p2)
{
return dst(p1, p2);
}

// CHECK-LABEL: define {{.*}} <4 x double> @{{[A-Za-z1-9_]+}}dst_impl{{[A-Za-z1-9_]*}}(
// CHECK-SAME: <4 x double> {{[A-Za-z )(]*}} [[P:%.*]], <4 x double> {{[A-Za-z )(]*}} [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK: [[VECEXT:%.*]] = extractelement <4 x double> [[PADDR:%.*]], i32 1
// CHECK: [[VECEXT1:%.*]] = extractelement <4 x double> [[QADDR:%.*]], i32 1
// CHECK: [[MULRES:%.*]] = fmul {{[A-Za-z ]*}} double [[VECEXT]], [[VECEXT1]]
// CHECK: [[VECINIT:%.*]] = insertelement <4 x double> <double 1.000000e+00, double poison, double poison, double poison>, double [[MULRES]], i32 1
// CHECK: [[VECINIT2:%.*]] = extractelement <4 x double> [[PADDR2:%.*]], i32 2
// CHECK: [[VECINIT3:%.*]] = insertelement <4 x double> [[VECINIT]], double [[VECINIT2]], i32 2
// CHECK: [[VECINIT4:%.*]] = extractelement <4 x double> [[QADDR3:%.*]], i32 3
// CHECK: [[VECINIT5:%.*]] = insertelement <4 x double> [[VECINIT3]], double [[VECINIT4]], i32 3
// CHECK-NEXT: store <4 x double> [[VECINIT5]], ptr [[DEST:%.*]], align 32
// CHECK-NEXT: [[RES:%.*]] = load <4 x double>, ptr [[DEST]], align 32
// CHECK-NEXT: ret <4 x double> [[RES]]
double4 dstWithDouble(double4 p1, double4 p2)
{
return dst(p1, p2);
}