llvm · Alexander-Johnston · Sep 29, 2025
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
@@ -5204,6 +5204,18 @@ def HLSLGetSpirvSpecConstant : LangBuiltin<"HLSL_LANG">, HLSLScalarTemplate {
   let Prototype = "T(unsigned int, T)";
 }
 
+def HLSLDerivCoarseX: LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_deriv_coarse_x"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
+def HLSLDerivCoarseY: LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_deriv_coarse_y"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];

diff --git a/clang/include/clang/Basic/BuiltinsSPIRVCommon.td b/clang/include/clang/Basic/BuiltinsSPIRVCommon.td
@@ -21,3 +21,4 @@ def subgroup_local_invocation_id : SPIRVBuiltin<"uint32_t()", [NoThrow, Const]>;
 def distance : SPIRVBuiltin<"void(...)", [NoThrow, Const]>;
 def length : SPIRVBuiltin<"void(...)", [NoThrow, Const]>;
 def smoothstep : SPIRVBuiltin<"void(...)", [NoThrow, Const, CustomTypeChecking]>;
+def fwidth : SPIRVBuiltin<"void(...)", [NoThrow, Const, CustomTypeChecking]>;
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -532,6 +532,24 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
         /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(),
         ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac");
   }
+  case Builtin::BI__builtin_hlsl_elementwise_deriv_coarse_x: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    if (!E->getArg(0)->getType()->hasFloatingRepresentation())
+      llvm_unreachable(
+          "deriv coarse x operand must have a float representation");
+    return Builder.CreateIntrinsic(
+        /*ReturnType=*/Op0->getType(), llvm::Intrinsic::dx_deriv_coarse_x,
+        ArrayRef<Value *>{Op0}, nullptr, "hlsl.deriv.coarse.x");
+  }
+  case Builtin::BI__builtin_hlsl_elementwise_deriv_coarse_y: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    if (!E->getArg(0)->getType()->hasFloatingRepresentation())
+      llvm_unreachable(
+          "deriv coarse x operand must have a float representation");
+    return Builder.CreateIntrinsic(
+        /*ReturnType=*/Op0->getType(), llvm::Intrinsic::dx_deriv_coarse_y,
+        ArrayRef<Value *>{Op0}, nullptr, "hlsl.deriv.coarse.y");
+  }
   case Builtin::BI__builtin_hlsl_elementwise_isinf: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
     llvm::Type *Xty = Op0->getType();

diff --git a/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp b/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp
@@ -151,6 +151,11 @@ Value *CodeGenFunction::EmitSPIRVBuiltinExpr(unsigned BuiltinID,
         Intrinsic::spv_global_offset,
         ArrayRef<Value *>{EmitScalarExpr(E->getArg(0))}, nullptr,
         "spv.global.offset");
+  case SPIRV::BI__builtin_spirv_fwidth:
+    return Builder.CreateIntrinsic(
+        /*ReturnType=*/getTypes().ConvertType(E->getType()),
+        Intrinsic::spv_fwidth, ArrayRef<Value *>{EmitScalarExpr(E->getArg(0))},
+        nullptr, "spv.fwidth");
   }
   return nullptr;
 }
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
@@ -148,6 +148,31 @@ template <typename T> constexpr T ldexp_impl(T X, T Exp) {
   return exp2(Exp) * X;
 }
 
+template <typename T> constexpr T fwidth_impl(T input) {
+#if (__has_builtin(__builtin_spirv_fwidth))
+  return __builtin_spirv_fwidth(input);
+#else
+  T derivCoarseX = __builtin_hlsl_elementwise_deriv_coarse_x(input);
+  derivCoarseX = abs(derivCoarseX);
+  T derivCoarseY = __builtin_hlsl_elementwise_deriv_coarse_y(input);
+  derivCoarseY = abs(derivCoarseY);
+  return derivCoarseX + derivCoarseY;
+#endif
+}
+
+template <typename T, int N>
+constexpr vector<T, N> fwidth_vec_impl(vector<T, N> input) {
+#if (__has_builtin(__builtin_spirv_fwidth))
+  return __builtin_spirv_fwidth(input);
+#else
+  vector<T, N> derivCoarseX = __builtin_hlsl_elementwise_deriv_coarse_x(input);
+  derivCoarseX = abs(derivCoarseX);
+  vector<T, N> derivCoarseY = __builtin_hlsl_elementwise_deriv_coarse_y(input);
+  derivCoarseY = abs(derivCoarseY);
+  return derivCoarseX + derivCoarseY;
+#endif
+}
+
 } // namespace __detail
 } // namespace hlsl
 

diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -605,5 +605,30 @@ smoothstep(__detail::HLSL_FIXED_VECTOR<float, N> Min,
   return __detail::smoothstep_vec_impl(Min, Max, X);
 }
 
+//===----------------------------------------------------------------------===//
+// fwidth builtin
+//===----------------------------------------------------------------------===//
+
+/// \fn T fwidth(T x)
+/// \brief Computes the sum of the absolute values of the partial derivatives
+/// with regard to the x and y screen space coordinates.
+/// \param x [in] The floating-point scalar or vector to process.
+///
+/// The return value is a floating-point scalar or vector where each element
+/// holds the computation of the matching element in the input.
+
+template <typename T>
+const inline __detail::enable_if_t<
+    __detail::is_arithmetic<T>::Value && __detail::is_same<float, T>::value, T>
+fwidth(T input) {
+  return __detail::fwidth_impl(input);
+}
+
+template <int N>
+const inline __detail::HLSL_FIXED_VECTOR<float, N>
+fwidth(__detail::HLSL_FIXED_VECTOR<float, N> input) {
+  return __detail::fwidth_vec_impl(input);
+}
+
 } // namespace hlsl
 #endif //_HLSL_HLSL_INTRINSICS_H_
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
@@ -3080,7 +3080,9 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   case Builtin::BI__builtin_hlsl_elementwise_degrees:
   case Builtin::BI__builtin_hlsl_elementwise_radians:
   case Builtin::BI__builtin_hlsl_elementwise_rsqrt:
-  case Builtin::BI__builtin_hlsl_elementwise_frac: {
+  case Builtin::BI__builtin_hlsl_elementwise_frac:
+  case Builtin::BI__builtin_hlsl_elementwise_deriv_coarse_x:
+  case Builtin::BI__builtin_hlsl_elementwise_deriv_coarse_y: {
     if (SemaRef.checkArgCount(TheCall, 1))
       return true;
     if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall,

diff --git a/clang/lib/Sema/SemaSPIRV.cpp b/clang/lib/Sema/SemaSPIRV.cpp
@@ -360,6 +360,24 @@ bool SemaSPIRV::CheckSPIRVBuiltinFunctionCall(const TargetInfo &TI,
   case SPIRV::BI__builtin_spirv_generic_cast_to_ptr_explicit: {
     return checkGenericCastToPtr(SemaRef, TheCall);
   }
+  case SPIRV::BI__builtin_spirv_fwidth: {
+    if (SemaRef.checkArgCount(TheCall, 1))
+      return true;
+
+    // Check if first argument has floating representation
+    ExprResult A = TheCall->getArg(0);
+    QualType ArgTyA = A.get()->getType();
+    if (!ArgTyA->hasFloatingRepresentation()) {
+      SemaRef.Diag(A.get()->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+          << /* ordinal */ 1 << /* scalar or vector */ 5 << /* no int */ 0
+          << /* fp */ 1 << ArgTyA;
+      return true;
+    }
+
+    QualType RetTy = ArgTyA;
+    TheCall->setType(RetTy);
+    break;
+  }
   }
   return false;
 }

diff --git a/clang/test/CodeGenSPIRV/Builtins/fwidth.c b/clang/test/CodeGenSPIRV/Builtins/fwidth.c
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -O1 -triple spirv-pc-vulkan-compute %s -emit-llvm -o - | FileCheck %s
+
+typedef _Float16 half;
+typedef half half2 __attribute__((ext_vector_type(2)));
+typedef half half3 __attribute__((ext_vector_type(3)));
+typedef half half4 __attribute__((ext_vector_type(4)));
+typedef float float2 __attribute__((ext_vector_type(2)));
+typedef float float3 __attribute__((ext_vector_type(3)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+
+// CHECK: [[fwidth0:%.*]] = tail call half @llvm.spv.fwidth.f16(half {{%.*}})
+// CHECK: ret half [[fwidth0]] 
+half test_fwidth_half(half X) { return __builtin_spirv_fwidth(X); }
+
+// CHECK: [[fwidth0:%.*]] = tail call <2 x half> @llvm.spv.fwidth.v2f16(<2 x half>  {{%.*}})
+// CHECK: ret <2 x half> [[fwidth0]] 
+half2 test_fwidth_half2(half2 X) { return __builtin_spirv_fwidth(X); }
+
+// CHECK: [[fwidth0:%.*]] = tail call <3 x half> @llvm.spv.fwidth.v3f16(<3 x half> {{%.*}})
+// CHECK: ret <3 x half> [[fwidth0]] 
+half3 test_fwidth_half3(half3 X) { return __builtin_spirv_fwidth(X); }
+
+// CHECK: [[fwidth0:%.*]] = tail call <4 x half> @llvm.spv.fwidth.v4f16(<4 x half> {{%.*}})
+// CHECK: ret <4 x half> [[fwidth0]] 
+half4 test_fwidth_half4(half4 X) { return __builtin_spirv_fwidth(X); }
+
+// CHECK: [[fwidth0:%.*]] = tail call float @llvm.spv.fwidth.f32(float {{%.*}})
+// CHECK: ret float [[fwidth0]] 
+float test_fwidth_float(float X) { return __builtin_spirv_fwidth(X); }
+
+// CHECK: [[fwidth1:%.*]] = tail call <2 x float> @llvm.spv.fwidth.v2f32(<2 x float> {{%.*}})
+// CHECK: ret <2 x float> [[fwidth1]]
+float2 test_fwidth_float2(float2 X) { return __builtin_spirv_fwidth(X); }
+
+// CHECK: [[fwidth2:%.*]] = tail call <3 x float> @llvm.spv.fwidth.v3f32(<3 x float> {{%.*}})
+// CHECK: ret <3 x float> [[fwidth2]]
+float3 test_fwidth_float3(float3 X) { return __builtin_spirv_fwidth(X); }
+
+// CHECK: [[fwidth3:%.*]] = tail call <4 x float> @llvm.spv.fwidth.v4f32(<4 x float> {{%.*}})
+// CHECK: ret <4 x float> [[fwidth3]]
+float4 test_fwidth_float4(float4 X) { return __builtin_spirv_fwidth(X); }
diff --git a/clang/test/SemaSPIRV/BuiltIns/fwidth-errors.c b/clang/test/SemaSPIRV/BuiltIns/fwidth-errors.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 %s -triple spirv-pc-vulkan-compute -verify
+
+typedef float float2 __attribute__((ext_vector_type(2)));
+
+void test_too_few_arg()
+{
+  return __builtin_spirv_fwidth();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+}
+
+float test_too_many_arg(float p0) {
+  return __builtin_spirv_fwidth(p0, p0);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+}
+
+float test_int_scalar_inputs(int p0) {
+  return __builtin_spirv_fwidth(p0);
+  //  expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int')}}
+}
+
+float test_mismatched_return(float2 p0) {
+  return __builtin_spirv_fwidth(p0);
+  // expected-error@-1 {{returning 'float2' (vector of 2 'float' values) from a function with incompatible result type 'float'}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -162,6 +162,8 @@ def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>
     [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>;
 def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>;
+def int_dx_deriv_coarse_x : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+def int_dx_deriv_coarse_y : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;

diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -132,6 +132,7 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]
   def int_spv_group_memory_barrier_with_group_sync
       : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>;
   def int_spv_discard : DefaultAttrsIntrinsic<[], [], []>;
+  def int_spv_fwidth : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
   def int_spv_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
   def int_spv_sclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
   def int_spv_nclamp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
@@ -922,6 +922,24 @@ def Discard : DXILOp<82, discard> {
   let stages = [Stages<DXIL1_0, [pixel]>];
 }
 
+def DerivCoarseX : DXILOp<83, unary> {
+  let Doc = "computes the rate of change per stamp in x direction";
+  let intrinsics = [IntrinSelect<int_dx_deriv_coarse_x>];
+  let arguments = [OverloadTy];
+  let result = OverloadTy;
+  let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
+  let stages = [Stages<DXIL1_0, [library, pixel, compute, amplification, mesh, node]>];
+}
+
+def DerivCoarseY : DXILOp<84, unary> {
+  let Doc = "computes the rate of change per stamp in y direction";
+  let intrinsics = [IntrinSelect<int_dx_deriv_coarse_y>];
+  let arguments = [OverloadTy];
+  let result = OverloadTy;
+  let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
+  let stages = [Stages<DXIL1_0, [library, pixel, compute, amplification, mesh, node]>];
+}
+
 def ThreadId : DXILOp<93, threadId> {
   let Doc = "Reads the thread ID";
   let intrinsics = [IntrinSelect<int_dx_thread_id>];

diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -48,6 +48,8 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
   case Intrinsic::dx_firstbitshigh:
   case Intrinsic::dx_firstbituhigh:
   case Intrinsic::dx_frac:
+  case Intrinsic::dx_deriv_coarse_x:
+  case Intrinsic::dx_deriv_coarse_y:
   case Intrinsic::dx_isinf:
   case Intrinsic::dx_isnan:
   case Intrinsic::dx_rsqrt:

diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -179,6 +179,9 @@ class SPIRVInstructionSelector : public InstructionSelector {
   bool selectSplatVector(Register ResVReg, const SPIRVType *ResType,
                          MachineInstr &I) const;
 
+  bool selectFwidth(Register ResVReg, const SPIRVType *ResType,
+                    MachineInstr &I) const;
+
   bool selectCmp(Register ResVReg, const SPIRVType *ResType,
                  unsigned comparisonOpcode, MachineInstr &I) const;
   bool selectDiscard(Register ResVReg, const SPIRVType *ResType,
@@ -2615,6 +2618,15 @@ bool SPIRVInstructionSelector::selectDiscard(Register ResVReg,
       .constrainAllUses(TII, TRI, RBI);
 }
 
+bool SPIRVInstructionSelector::selectFwidth(Register ResVReg,
+                                            const SPIRVType *ResType,
+                                            MachineInstr &I) const {
+  return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFwidth))
+      .addDef(ResVReg)
+      .addUse(GR.getSPIRVTypeID(ResType))
+      .addUse(I.getOperand(2).getReg());
+}
+
 bool SPIRVInstructionSelector::selectCmp(Register ResVReg,
                                          const SPIRVType *ResType,
                                          unsigned CmpOpc,
@@ -3451,6 +3463,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
   case Intrinsic::spv_discard: {
     return selectDiscard(ResVReg, ResType, I);
   }
+  case Intrinsic::spv_fwidth: {
+    return selectFwidth(ResVReg, ResType, I);
+  }
   case Intrinsic::modf: {
     return selectModf(ResVReg, ResType, I);
   }

diff --git a/llvm/test/CodeGen/DirectX/deriv_coarse_x.ll b/llvm/test/CodeGen/DirectX/deriv_coarse_x.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+; Make sure dxil operation function calls for fwidth are generated for float, half vec, float, an32float v
+; Make sure dxil operation function calls for fwidth are generated for float, half vec, flv4oat, an32float vec
+
+
+define noundef half @deriv_coarse_x_half(half noundef %a) {
+; CHECK: call half @dx.op.unary.f16(i32 83, half %{{.*}})
+entry:
+  %dx.deriv.coarse.x = call half @llvm.dx.deriv.coarse.x.f16(half %a)
+  ret half %dx.deriv.coarse.x
+}
+
+define noundef float @deriv_coarse_x_float(float noundef %a) {
+; CHECK: call float @dx.op.unary.f32(i32 83, float %{{.*}})
+entry:
+  %dx.deriv.coarse.x = call float @llvm.dx.deriv.coarse.x.f32(float %a)
+  ret float %dx.deriv.coarse.x
+}
+
+define noundef <4 x float> @deriv_coarse_x_float4(<4 x float> noundef %a) {
+; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee0]])
+; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee1]])
+; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee2]])
+; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee3]])
+; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+; CHECK: ret <4 x float> %{{.*}}
+entry:
+  %dx.deriv.coarse.x = call <4 x float> @llvm.dx.deriv.coarse.x.v4f32(<4 x float> %a)
+  ret <4 x float> %dx.deriv.coarse.x
+}
+
+declare half @llvm.dx.deriv.coarse.x.f16(half)
+declare float @llvm.dx.deriv.coarse.x.f32(float)
+declare <4 x float> @llvm.dx.deriv.coarse.x.v4f32(<4 x float>)
+
diff --git a/llvm/test/CodeGen/DirectX/deriv_coarse_x_error.ll b/llvm/test/CodeGen/DirectX/deriv_coarse_x_error.ll
@@ -0,0 +1,15 @@
+; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+
+; DXIL operation deriv_coarse_x does not support double overload type
+; CHECK: in function deriv_coarse_x
+; CHECK-SAME: Cannot create DerivCoarseX operation: Invalid overload type
+
+; Function Attrs: noinline nounwind optnone
+define noundef double @deriv_coarse_x_double(double noundef %a) #0 {
+entry:
+  %a.addr = alloca double, align 8
+  store double %a, ptr %a.addr, align 8
+  %0 = load double, ptr %a.addr, align 8
+  %dx.deriv_coarse_x = call double @llvm.dx.deriv.coarse.x.f64(double %0)
+  ret double %dx.deriv_coarse_x
+}