llvm
diff --git a/‎clang/include/clang/Basic/Builtins.td‎
Lines changed: 6 additions & 6 deletions b/‎clang/include/clang/Basic/Builtins.td‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎clang/lib/CodeGen/CGBuiltin.cpp‎
Lines changed: 15 additions & 14 deletions b/‎clang/lib/CodeGen/CGBuiltin.cpp‎
Lines changed: 15 additions & 14 deletions
diff --git a/‎clang/lib/Headers/hlsl/hlsl_detail.h‎
Lines changed: 20 additions & 2 deletions b/‎clang/lib/Headers/hlsl/hlsl_detail.h‎
Lines changed: 20 additions & 2 deletions
diff --git a/‎clang/lib/Headers/hlsl/hlsl_intrinsics.h‎
Lines changed: 13 additions & 20 deletions b/‎clang/lib/Headers/hlsl/hlsl_intrinsics.h‎
Lines changed: 13 additions & 20 deletions
diff --git a/‎clang/lib/Sema/SemaHLSL.cpp‎
Lines changed: 1 addition & 4 deletions b/‎clang/lib/Sema/SemaHLSL.cpp‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎clang/test/CodeGenHLSL/builtins/distance.hlsl‎
Lines changed: 24 additions & 12 deletions b/‎clang/test/CodeGenHLSL/builtins/distance.hlsl‎
Lines changed: 24 additions & 12 deletions
@@ -4756,6 +4756,12 @@ def HLSLAsDouble : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLReduceAdd : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_reduce_add"];
+  let Attributes = [NoThrow, Const, Constexpr];
+  let Prototype = "void(...)";
+}
+
 def HLSLWaveActiveAnyTrue : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_wave_active_any_true"];
   let Attributes = [NoThrow, Const];
@@ -4840,12 +4846,6 @@ def HLSLIsinf : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
-def HLSLLength : LangBuiltin<"HLSL_LANG"> {
-  let Spellings = ["__builtin_hlsl_length"];
-  let Attributes = [NoThrow, Const];
-  let Prototype = "void(...)";
-}
-
 def HLSLLerp : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_lerp"];
   let Attributes = [NoThrow, Const];
 
@@ -19174,20 +19174,6 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
         /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
         ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
   }
-  case Builtin::BI__builtin_hlsl_length: {
-    Value *X = EmitScalarExpr(E->getArg(0));
-
-    assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
-           "length operand must have a float representation");
-    // if the operand is a scalar, we can use the fabs llvm intrinsic directly
-    if (!E->getArg(0)->getType()->isVectorType())
-      return EmitFAbs(*this, X);
-
-    return Builder.CreateIntrinsic(
-        /*ReturnType=*/X->getType()->getScalarType(),
-        CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef<Value *>{X},
-        nullptr, "hlsl.length");
-  }
   case Builtin::BI__builtin_hlsl_normalize: {
     Value *X = EmitScalarExpr(E->getArg(0));
 
@@ -19289,6 +19275,21 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
         CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
         nullptr, "hlsl.saturate");
   }
+  case Builtin::BI__builtin_hlsl_reduce_add: {
+    Value *X = EmitScalarExpr(E->getArg(0));
+    auto EltTy = X->getType()->getScalarType();
+    if (E->getArg(0)->getType()->hasFloatingRepresentation()) {
+      Value *Seed = ConstantFP::get(EltTy, 0);
+      return Builder.CreateIntrinsic(
+          /*ReturnType=*/EltTy, llvm::Intrinsic::vector_reduce_fadd,
+          ArrayRef<Value *>{Seed, X}, nullptr, "rdx.fadd");
+    } else {
+      assert(E->getArg(0)->getType()->hasIntegerRepresentation());
+      return Builder.CreateIntrinsic(
+          /*ReturnType=*/EltTy, llvm::Intrinsic::vector_reduce_add,
+          ArrayRef<Value *>{X}, nullptr, "rdx.add");
+    }
+  }
   case Builtin::BI__builtin_hlsl_select: {
     Value *OpCond = EmitScalarExpr(E->getArg(0));
     RValue RValTrue = EmitAnyExpr(E->getArg(1));
 
@@ -41,16 +41,34 @@ constexpr enable_if_t<sizeof(U) == sizeof(T), U> bit_cast(T F) {
   return __builtin_bit_cast(U, F);
 }
 
+template <typename T>
+constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
+length_impl(T X) {
+  return __builtin_elementwise_abs(X);
+}
+
+template <typename T, int N>
+constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
+length_vec_impl(vector<T, N> X) {
+  vector<T, N> XSquared = X * X;
+  T XSquaredSum = __builtin_hlsl_reduce_add(XSquared);
+  /*T  XSquaredSum = 0;
+  for(int I = 0; I < N; I++) {
+    XSquaredSum += XSquared[I];
+  }*/
+  return __builtin_elementwise_sqrt(XSquaredSum);
+}
+
 template <typename T>
 constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
 distance_impl(T X, T Y) {
-  return __builtin_elementwise_abs(X - Y);
+  return length_impl(X - Y);
 }
 
 template <typename T, int N>
 constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
 distance_vec_impl(vector<T, N> X, vector<T, N> Y) {
-  return __builtin_hlsl_length(X - Y);
+  return length_vec_impl(X - Y);
 }
 
 } // namespace __detail
 
@@ -1327,26 +1327,19 @@ float4 lerp(float4, float4, float4);
 /// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + ...).
 
 _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half2);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half3);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float4);
+const inline half length(half X) { return __detail::length_impl(X); }
+
+template <int N>
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+const inline half length(vector<half, N> X) {
+  return __detail::length_vec_impl(X);
+}
+
+const inline float length(float X) { return __detail::length_impl(X); }
+
+template <int N> const inline float length(vector<float, N> X) {
+  return __detail::length_vec_impl(X);
+}
 
 //===----------------------------------------------------------------------===//
 // log builtins
 
@@ -2063,12 +2063,9 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       return true;
     break;
   }
-  case Builtin::BI__builtin_hlsl_length: {
-    if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
-      return true;
+  case Builtin::BI__builtin_hlsl_reduce_add: {
     if (SemaRef.checkArgCount(TheCall, 1))
       return true;
-
     ExprResult A = TheCall->getArg(0);
     QualType ArgTyA = A.get()->getType();
     QualType RetTy;
 
@@ -16,26 +16,32 @@ half test_distance_half(half X, half Y) { return distance(X, Y); }
 // CHECK-SAME: <2 x half> noundef [[X:%.*]], <2 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <2 x half> [[X]], [[Y]]
-// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.dx.length.v2f16(<2 x half> [[SUB_I]])
-// CHECK-NEXT:    ret half [[HLSL_LENGTH_I]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <2 x half> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
+// CHECK-NEXT:    ret half [[TMP0]]
 //
 half test_distance_half2(half2 X, half2 Y) { return distance(X, Y); }
 
 // CHECK-LABEL: define noundef half @_Z19test_distance_half3Dv3_DhS_(
 // CHECK-SAME: <3 x half> noundef [[X:%.*]], <3 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <3 x half> [[X]], [[Y]]
-// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.dx.length.v3f16(<3 x half> [[SUB_I]])
-// CHECK-NEXT:    ret half [[HLSL_LENGTH_I]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <3 x half> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v3f16(half 0xH0000, <3 x half> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
+// CHECK-NEXT:    ret half [[TMP0]]
 //
 half test_distance_half3(half3 X, half3 Y) { return distance(X, Y); }
 
 // CHECK-LABEL: define noundef half @_Z19test_distance_half4Dv4_DhS_(
 // CHECK-SAME: <4 x half> noundef [[X:%.*]], <4 x half> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <4 x half> [[X]], [[Y]]
-// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef half @llvm.dx.length.v4f16(<4 x half> [[SUB_I]])
-// CHECK-NEXT:    ret half [[HLSL_LENGTH_I]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x half> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[RDX_FADD_I]])
+// CHECK-NEXT:    ret half [[TMP0]]
 //
 half test_distance_half4(half4 X, half4 Y) { return distance(X, Y); }
 
@@ -52,25 +58,31 @@ float test_distance_float(float X, float Y) { return distance(X, Y); }
 // CHECK-SAME: <2 x float> noundef [[X:%.*]], <2 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <2 x float> [[X]], [[Y]]
-// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.dx.length.v2f32(<2 x float> [[SUB_I]])
-// CHECK-NEXT:    ret float [[HLSL_LENGTH_I]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <2 x float> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
+// CHECK-NEXT:    ret float [[TMP0]]
 //
 float test_distance_float2(float2 X, float2 Y) { return distance(X, Y); }
 
 // CHECK-LABEL: define noundef float @_Z20test_distance_float3Dv3_fS_(
 // CHECK-SAME: <3 x float> noundef [[X:%.*]], <3 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <3 x float> [[X]], [[Y]]
-// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.dx.length.v3f32(<3 x float> [[SUB_I]])
-// CHECK-NEXT:    ret float [[HLSL_LENGTH_I]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <3 x float> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
+// CHECK-NEXT:    ret float [[TMP0]]
 //
 float test_distance_float3(float3 X, float3 Y) { return distance(X, Y); }
 
 // CHECK-LABEL: define noundef float @_Z20test_distance_float4Dv4_fS_(
 // CHECK-SAME: <4 x float> noundef [[X:%.*]], <4 x float> noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <4 x float> [[X]], [[Y]]
-// CHECK-NEXT:    [[HLSL_LENGTH_I:%.*]] = tail call noundef float @llvm.dx.length.v4f32(<4 x float> [[SUB_I]])
-// CHECK-NEXT:    ret float [[HLSL_LENGTH_I]]
+// CHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x float> [[SUB_I]], [[SUB_I]]
+// CHECK-NEXT:    [[RDX_FADD_I:%.*]] = tail call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[MUL_I]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
+// CHECK-NEXT:    ret float [[TMP0]]
 //
 float test_distance_float4(float4 X, float4 Y) { return distance(X, Y); }