diff --git a/.eslintrc.js b/.eslintrc.js index a45a51b0988..9d7d925156c 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -101,6 +101,7 @@ const abbreviations = [ "BGR", "SFE", "BVH", + "FSR", ]; // Join them into a single regex string diff --git a/packages/dev/core/src/PostProcesses/RenderPipeline/Pipelines/fsr1RenderingPipeline.ts b/packages/dev/core/src/PostProcesses/RenderPipeline/Pipelines/fsr1RenderingPipeline.ts new file mode 100644 index 00000000000..e3e7244d4d9 --- /dev/null +++ b/packages/dev/core/src/PostProcesses/RenderPipeline/Pipelines/fsr1RenderingPipeline.ts @@ -0,0 +1,204 @@ +import type { Scene } from "core/scene"; +import type { Nullable } from "core/types"; + +import { PostProcessRenderEffect } from "../postProcessRenderEffect"; +import { PostProcessRenderPipeline } from "../postProcessRenderPipeline"; +import { PostProcess } from "../../postProcess"; +import { ThinFSR1UpscalePostProcess } from "../../thinFSR1UpscalePostProcess"; +import { ThinFSR1SharpenPostProcess } from "../../thinFSR1SharpenPostProcess"; + +/** + * FideltyFX Super Resolution (FSR) 1 render pipeline. + * This can be used to render the scene at a lower resolution and upscale it. + */ +export class FSR1RenderingPipeline extends PostProcessRenderPipeline { + /** + * AMD's recommended `scaleFactor` for an "Ultra Quality" preset (equal to 1.3) + */ + public static readonly SCALE_ULTRA_QUALITY = 1.3; + /** + * AMD's recommended `scaleFactor` for a "Quality" preset (equal to 1.5) + */ + public static readonly SCALE_QUALITY = 1.5; + /** + * AMD's recommended `scaleFactor` for a "Balanced" preset (equal to 1.7) + */ + public static readonly SCALE_BALANCED = 1.7; + /** + * AMD's recommended `scaleFactor` for a "Performance" preset (equal to 2) + */ + public static readonly SCALE_PERFORMANCE = 2; + + private readonly _scene: Scene; + + /** + * Returns true if FSR is supported by the running hardware + */ + public override get isSupported(): boolean { + return this.engine.isWebGPU; + } + + private _samples = 4; + /** + * MSAA sample count (default: 4). + * Disabling MSAA is not recommended since aliased edges will be exagerrated by the FSR pass. + * Always have atleast one AA solution enabled, wether that be MSAA with this setting or a post-process effect like FXAA or TAA. + */ + public get samples(): number { + return this._samples; + } + + public set samples(samples: number) { + if (this._samples === samples) { + return; + } + this._samples = samples; + if (this._upscalePostProcess) { + this._upscalePostProcess.samples = this._samples; + } + } + + private _scaleFactor = FSR1RenderingPipeline.SCALE_QUALITY; + /** + * How much smaller to render the scene at (default: 1.5). + * For example, a value of 2 will render the scene at half resolution. + */ + public get scaleFactor(): number { + return this._scaleFactor; + } + + public set scaleFactor(factor: number) { + if (this._scaleFactor === factor) { + return; + } + this._scaleFactor = factor; + this._buildPipeline(); + } + + private _sharpnessStops = 0.2; + /** + * The number of stops (halving) of the reduction of sharpness (default: 0.2). + * A value of 0 indicates a maximum sharpness. + */ + public get sharpnessStops(): number { + return this._sharpnessStops; + } + + public set sharpnessStops(stops: number) { + if (this._sharpnessStops === stops) { + return; + } + this._sharpnessStops = stops; + this._thinSharpenPostProcess.updateConstants(this._sharpnessStops); + } + + /** + * The FSR upscale PostProcess ID in the pipeline + */ + // eslint-disable-next-line @typescript-eslint/naming-convention + public FSR1UpscaleEffect = "FSR1UpscaleEffect"; + private readonly _thinUpscalePostProcess: ThinFSR1UpscalePostProcess; + private _upscalePostProcess: Nullable; + + /** + * The FSR sharpen PostProcess ID in the pipeline + */ + // eslint-disable-next-line @typescript-eslint/naming-convention + public FSR1SharpenEffect = "FSR1SharpenEffect"; + private readonly _thinSharpenPostProcess: ThinFSR1SharpenPostProcess; + private _sharpenPostProcess: PostProcess; + + /** + * Creates a new FSR 1 rendering pipeline + * @param name The rendering pipeline name + * @param scene The scene linked to this pipeline + * @param cameras The array of cameras that the rendering pipeline will be attached to (default: scene.cameras) + */ + constructor(name: string, scene: Scene, cameras = scene.cameras) { + super(scene.getEngine(), name); + this._scene = scene; + this._cameras = cameras.slice(); + + this._thinUpscalePostProcess = new ThinFSR1UpscalePostProcess(name + "Upscale", this.engine); + this._thinSharpenPostProcess = new ThinFSR1SharpenPostProcess(name + "Sharpen", this.engine); + this._createSharpenPostProcess(); + + if (this.isSupported) { + scene.postProcessRenderPipelineManager.addPipeline(this); + this._buildPipeline(); + } + } + + private _buildPipeline(): void { + if (!this.isSupported) { + return; + } + const cameras = this._cameras.slice(); + this._scene.postProcessRenderPipelineManager.detachCamerasFromRenderPipeline(this._name, cameras); + this._reset(); + + this._disposeUpscalePostProcess(); + this._createUpscalePostProcess(); + + this.addEffect(new PostProcessRenderEffect(this.engine, this.FSR1UpscaleEffect, () => this._upscalePostProcess)); + this.addEffect(new PostProcessRenderEffect(this.engine, this.FSR1SharpenEffect, () => this._sharpenPostProcess)); + + this._scene.postProcessRenderPipelineManager.attachCamerasToRenderPipeline(this.name, cameras); + } + + /** + * Disposes of the pipeline + */ + public override dispose(): void { + this._disposeSharpenPostProcess(); + this._thinSharpenPostProcess.dispose(); + this._disposeUpscalePostProcess(); + this._thinUpscalePostProcess.dispose(); + super.dispose(); + } + + private _createUpscalePostProcess(): void { + const postProcess = new PostProcess(this._thinUpscalePostProcess.name, ThinFSR1UpscalePostProcess.FragmentUrl, { + uniformBuffers: ThinFSR1UpscalePostProcess.UniformBuffers, + size: 1 / this._scaleFactor, + engine: this.engine, + effectWrapper: this._thinUpscalePostProcess, + }); + postProcess.samples = this._samples; + + postProcess.onApplyObservable.add(() => { + this._thinUpscalePostProcess.updateConstants( + postProcess.width, + postProcess.height, + postProcess.width, + postProcess.height, + this.engine.getRenderWidth(), + this.engine.getRenderHeight() + ); + }); + + this._upscalePostProcess = postProcess; + } + + private _disposeUpscalePostProcess(): void { + for (const camera of this._cameras) { + this._upscalePostProcess?.dispose(camera); + } + this._upscalePostProcess = null; + } + + private _createSharpenPostProcess(): void { + this._thinSharpenPostProcess.updateConstants(this._sharpnessStops); + this._sharpenPostProcess = new PostProcess(this._thinSharpenPostProcess.name, ThinFSR1SharpenPostProcess.FragmentUrl, { + uniformBuffers: ThinFSR1SharpenPostProcess.UniformBuffers, + engine: this.engine, + effectWrapper: this._thinSharpenPostProcess, + }); + } + + private _disposeSharpenPostProcess(): void { + for (const camera of this._cameras) { + this._sharpenPostProcess.dispose(camera); + } + } +} diff --git a/packages/dev/core/src/PostProcesses/RenderPipeline/Pipelines/index.ts b/packages/dev/core/src/PostProcesses/RenderPipeline/Pipelines/index.ts index ab1ed8e86cd..f46a2b72ae2 100644 --- a/packages/dev/core/src/PostProcesses/RenderPipeline/Pipelines/index.ts +++ b/packages/dev/core/src/PostProcesses/RenderPipeline/Pipelines/index.ts @@ -5,6 +5,7 @@ export * from "./ssaoRenderingPipeline"; export * from "./standardRenderingPipeline"; export * from "./ssrRenderingPipeline"; export * from "./taaRenderingPipeline"; +export * from "./fsr1RenderingPipeline"; // SSAO2 export * from "../../../Shaders/ssao2.fragment"; @@ -22,3 +23,6 @@ export * from "../../../ShadersWGSL/screenSpaceReflection2BlurCombiner.fragment" import "../../../Shaders/taa.fragment"; import "../../../ShadersWGSL/taa.fragment"; + +import "../../../ShadersWGSL/fsr1Upscale.fragment"; +import "../../../ShadersWGSL/fsr1Sharpen.fragment"; diff --git a/packages/dev/core/src/PostProcesses/thinFSR1SharpenPostProcess.ts b/packages/dev/core/src/PostProcesses/thinFSR1SharpenPostProcess.ts new file mode 100644 index 00000000000..8e7b110e233 --- /dev/null +++ b/packages/dev/core/src/PostProcesses/thinFSR1SharpenPostProcess.ts @@ -0,0 +1,76 @@ +import type { AbstractEngine } from "core/Engines/abstractEngine"; +import { Engine } from "core/Engines/engine"; +import { EffectWrapper, type EffectWrapperCreationOptions } from "core/Materials/effectRenderer"; +import { ShaderLanguage } from "core/Materials/shaderLanguage"; +import { UniformBuffer } from "core/Materials/uniformBuffer"; +import type { Nullable } from "core/types"; + +/** + * Robust Contrast Adaptive Sharpening (RCAS) post-process used by FSR 1 + */ +export class ThinFSR1SharpenPostProcess extends EffectWrapper { + /** + * The fragment shader URL + */ + public static readonly FragmentUrl = "fsr1Sharpen"; + + /** + * The list of uniform buffers used by the effect + */ + public static readonly UniformBuffers = ["constants"]; + + private readonly _uniformBuffer: UniformBuffer; + + /** + * Creates a new FSR 1 sharpen post process + * @param name Name of the effect + * @param engine Engine to use to render the effect. If not provided, the last created engine will be used + * @param options Options to configure the effect + */ + constructor(name: string, engine?: Nullable, options?: EffectWrapperCreationOptions) { + engine ??= Engine.LastCreatedEngine!; + super({ + ...options, + name, + engine, + useShaderStore: true, + useAsPostProcess: true, + fragmentShader: ThinFSR1SharpenPostProcess.FragmentUrl, + shaderLanguage: ShaderLanguage.WGSL, + uniformBuffers: ThinFSR1SharpenPostProcess.UniformBuffers, + }); + + this._uniformBuffer = new UniformBuffer(engine, [], false, name); + this._uniformBuffer.addUniform("con", 4); + this._uniformBuffer.create(); + this._uniformBuffer.bindToEffect(this.effect, "constants"); + } + + protected override _gatherImports(useWebGPU: boolean | undefined, list: Promise[]): void { + list.push(import("../ShadersWGSL/fsr1Sharpen.fragment")); + } + + /** + * Binds the data to the effect. + * @param noDefaultBindings if true, the default bindings (scale and alpha mode) will not be set. + */ + public override bind(noDefaultBindings?: boolean): void { + super.bind(noDefaultBindings); + this._uniformBuffer.bindUniformBuffer(); + } + + /** + * Call to setup required constant values + * @param sharpness The number of stops (halving) of the reduction of sharpness (0 = maximum sharpness) + */ + public updateConstants(sharpness: number): void { + // Code based on FsrRcasCon from FSR 1: + // https://github.com/GPUOpen-Effects/FidelityFX-FSR/blob/a21ffb8f6c13233ba336352bdff293894c706575/ffx-fsr/ffx_fsr1.h#L662 + sharpness = Math.pow(2, -sharpness); + + // Technically these are uints in the shader but they're bitwise converted to floats anyway + // Since we haven't added the half-float shader yet, we don't need the second constant, which would require JS half-float calculation + this._uniformBuffer.updateFloat4("con", sharpness, 0, 0, 0); + this._uniformBuffer.update(); + } +} diff --git a/packages/dev/core/src/PostProcesses/thinFSR1UpscalePostProcess.ts b/packages/dev/core/src/PostProcesses/thinFSR1UpscalePostProcess.ts new file mode 100644 index 00000000000..f815411e765 --- /dev/null +++ b/packages/dev/core/src/PostProcesses/thinFSR1UpscalePostProcess.ts @@ -0,0 +1,95 @@ +import type { AbstractEngine } from "core/Engines/abstractEngine"; +import { Engine } from "core/Engines/engine"; +import { EffectWrapper, type EffectWrapperCreationOptions } from "core/Materials/effectRenderer"; +import { ShaderLanguage } from "core/Materials/shaderLanguage"; +import { UniformBuffer } from "core/Materials/uniformBuffer"; +import type { Nullable } from "core/types"; + +/** + * Edge Adaptive Spatial Upsampling (EASU) post-process used by FSR 1 + */ +export class ThinFSR1UpscalePostProcess extends EffectWrapper { + /** + * The fragment shader URL + */ + public static readonly FragmentUrl = "fsr1Upscale"; + + /** + * The list of uniform buffers used by the effect + */ + public static readonly UniformBuffers = ["constants"]; + + private readonly _uniformBuffer: UniformBuffer; + + /** + * Creates a new FSR 1 upscale post process + * @param name Name of the effect + * @param engine Engine to use to render the effect. If not provided, the last created engine will be used + * @param options Options to configure the effect + */ + constructor(name: string, engine?: Nullable, options?: EffectWrapperCreationOptions) { + engine ??= Engine.LastCreatedEngine!; + super({ + ...options, + name, + engine, + useShaderStore: true, + useAsPostProcess: true, + fragmentShader: ThinFSR1UpscalePostProcess.FragmentUrl, + shaderLanguage: ShaderLanguage.WGSL, + uniformBuffers: ThinFSR1UpscalePostProcess.UniformBuffers, + }); + + this._uniformBuffer = new UniformBuffer(engine, [], false, name); + this._uniformBuffer.addUniform("con0", 4); + this._uniformBuffer.addUniform("con1", 4); + this._uniformBuffer.addUniform("con2", 4); + this._uniformBuffer.addUniform("con3", 4); + this._uniformBuffer.create(); + this._uniformBuffer.bindToEffect(this.effect, "constants"); + } + + protected override _gatherImports(useWebGPU: boolean | undefined, list: Promise[]): void { + list.push(import("../ShadersWGSL/fsr1Upscale.fragment")); + } + + /** + * Binds the data to the effect. + * @param noDefaultBindings if true, the default bindings (scale and alpha mode) will not be set. + */ + public override bind(noDefaultBindings?: boolean): void { + super.bind(noDefaultBindings); + this._uniformBuffer.bindUniformBuffer(); + } + + /** + * Call to setup required constant values + * @param viewportWidth The rendered input width being upscaled + * @param viewportHeight The rendered input height being upscaled + * @param inputWidth The width of the texture containing the input viewport + * @param inputHeight The height of the texture containing the input viewport + * @param outputWidth The display width which the input image gets upscaled to + * @param outputHeight The display height which the input image gets upscaled to + */ + public updateConstants(viewportWidth: number, viewportHeight: number, inputWidth: number, inputHeight: number, outputWidth: number, outputHeight: number): void { + // Code based on FsrEasuCon from FSR 1: + // https://github.com/GPUOpen-Effects/FidelityFX-FSR/blob/a21ffb8f6c13233ba336352bdff293894c706575/ffx-fsr/ffx_fsr1.h#L156 + const rcpInputWidth = 1 / inputWidth; + const rcpInputHeight = 1 / inputHeight; + const rcpOutputWidth = 1 / outputWidth; + const rcpOutputHeight = 1 / outputHeight; + + // Technically these are uints in the shader but they're bitwise converted to floats anyway + this._uniformBuffer.updateFloat4( + "con0", + viewportWidth * rcpOutputWidth, + viewportHeight * rcpOutputHeight, + 0.5 * viewportWidth * rcpOutputWidth - 0.5, + 0.5 * viewportHeight * rcpOutputHeight - 0.5 + ); + this._uniformBuffer.updateFloat4("con1", rcpInputWidth, rcpInputHeight, 1 * rcpInputWidth, -1 * rcpInputHeight); + this._uniformBuffer.updateFloat4("con2", -1 * rcpInputWidth, 2 * rcpInputHeight, 1 * rcpInputWidth, 2 * rcpInputHeight); + this._uniformBuffer.updateFloat4("con3", 0 * rcpInputWidth, 4 * rcpInputHeight, 0, 0); + this._uniformBuffer.update(); + } +} diff --git a/packages/dev/core/src/ShadersWGSL/ShadersInclude/ffxFunctions.fx b/packages/dev/core/src/ShadersWGSL/ShadersInclude/ffxFunctions.fx new file mode 100644 index 00000000000..f80f07acc30 --- /dev/null +++ b/packages/dev/core/src/ShadersWGSL/ShadersInclude/ffxFunctions.fx @@ -0,0 +1,32 @@ +// FidelityFX Shader Portability functions used by FSR 1, converted to WGSL +// https://github.com/GPUOpen-Effects/FidelityFX-FSR/blob/master/ffx-fsr/ffx_a.h + +fn AMax3F1(x: f32, y: f32, z: f32) -> f32 { + return max(x,max(y,z)); +} + +fn AMax3F3(x: vec3f, y: vec3f, z: vec3f) -> vec3f { + return max(x,max(y,z)); +} + +fn AMin3F1(x: f32, y: f32, z: f32) -> f32 { + return min(x,min(y,z)); +} + +fn AMin3F3(x: vec3f, y: vec3f, z: vec3f) -> vec3f { + return min(x,min(y,z)); +} + +// Float approximations +fn APrxLoRcpF1(a: f32) -> f32 { + return bitcast(u32(0x7ef07ebb)-bitcast(a)); +} + +fn APrxMedRcpF1(a: f32) -> f32 { + let b=bitcast(u32(0x7ef19fff)-bitcast(a)); + return b*(-b*a+f32(2.0)); +} + +fn APrxLoRsqF1(a: f32) -> f32 { + return bitcast(u32(0x5f347d74)-(bitcast(a)>>1)); +} diff --git a/packages/dev/core/src/ShadersWGSL/fsr1Sharpen.fragment.fx b/packages/dev/core/src/ShadersWGSL/fsr1Sharpen.fragment.fx new file mode 100644 index 00000000000..492d81cf485 --- /dev/null +++ b/packages/dev/core/src/ShadersWGSL/fsr1Sharpen.fragment.fx @@ -0,0 +1,102 @@ +// FideltyFX FSR 1 converted to WGSL +// https://github.com/GPUOpen-Effects/FidelityFX-FSR/blob/master/ffx-fsr/ffx_fsr1.h +#include + +// This is set at the limit of providing unnatural results for sharpening. +#define FSR_RCAS_LIMIT (0.25-(1.0/16.0)) + +struct FsrRcasCon { + con: vec4u, +} + +var textureSampler: texture_2d; +var constants: FsrRcasCon; + +fn FsrRcasLoadF(p: vec2i) -> vec4f { + return textureLoad(textureSampler, p, 0); +} + +fn FsrRcasF( + pixR: ptr, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. + pixG: ptr, + pixB: ptr, + ip: vec2u, // Integer pixel position in output. + con: vec4u // Constant generated by RcasSetup(). +) { + // Algorithm uses minimal 3x3 pixel neighborhood. + // b + // d e f + // h + let sp=vec2i(ip); + let b=FsrRcasLoadF(sp+vec2i( 0,-1)).rgb; + let d=FsrRcasLoadF(sp+vec2i(-1, 0)).rgb; + let e=FsrRcasLoadF(sp).rgb; + let f=FsrRcasLoadF(sp+vec2i( 1, 0)).rgb; + let h=FsrRcasLoadF(sp+vec2i( 0, 1)).rgb; + // Rename (32-bit) or regroup (16-bit). + let bR=b.r; + let bG=b.g; + let bB=b.b; + let dR=d.r; + let dG=d.g; + let dB=d.b; + let eR=e.r; + let eG=e.g; + let eB=e.b; + let fR=f.r; + let fG=f.g; + let fB=f.b; + let hR=h.r; + let hG=h.g; + let hB=h.b; + // Luma times 2. + let bL=bB*f32(0.5)+(bR*f32(0.5)+bG); + let dL=dB*f32(0.5)+(dR*f32(0.5)+dG); + let eL=eB*f32(0.5)+(eR*f32(0.5)+eG); + let fL=fB*f32(0.5)+(fR*f32(0.5)+fG); + let hL=hB*f32(0.5)+(hR*f32(0.5)+hG); + // Noise detection. + var nz=f32(0.25)*bL+f32(0.25)*dL+f32(0.25)*fL+f32(0.25)*hL-eL; + nz=saturate(abs(nz)*APrxMedRcpF1(AMax3F1(AMax3F1(bL,dL,eL),fL,hL)-AMin3F1(AMin3F1(bL,dL,eL),fL,hL))); + nz=f32(-0.5)*nz+f32(1.0); + // Min and max of ring. + let mn4R=min(AMin3F1(bR,dR,fR),hR); + let mn4G=min(AMin3F1(bG,dG,fG),hG); + let mn4B=min(AMin3F1(bB,dB,fB),hB); + let mx4R=max(AMax3F1(bR,dR,fR),hR); + let mx4G=max(AMax3F1(bG,dG,fG),hG); + let mx4B=max(AMax3F1(bB,dB,fB),hB); + // Immediate constants for peak range. + let peakC=vec2f(1.0,-1.0*4.0); + // Limiters, these need to be high precision RCPs. + let hitMinR=min(mn4R,eR)*(1.0/(f32(4.0)*mx4R)); + let hitMinG=min(mn4G,eG)*(1.0/(f32(4.0)*mx4G)); + let hitMinB=min(mn4B,eB)*(1.0/(f32(4.0)*mx4B)); + let hitMaxR=(peakC.x-max(mx4R,eR))*(1.0/(f32(4.0)*mn4R+peakC.y)); + let hitMaxG=(peakC.x-max(mx4G,eG))*(1.0/(f32(4.0)*mn4G+peakC.y)); + let hitMaxB=(peakC.x-max(mx4B,eB))*(1.0/(f32(4.0)*mn4B+peakC.y)); + let lobeR=max(-hitMinR,hitMaxR); + let lobeG=max(-hitMinG,hitMaxG); + let lobeB=max(-hitMinB,hitMaxB); + var lobe=max(f32(-FSR_RCAS_LIMIT),min(AMax3F1(lobeR,lobeG,lobeB),f32(0.0)))*bitcast(con.x); + // Apply noise removal. + #ifdef FSR_RCAS_DENOISE + lobe*=nz; + #endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + let rcpL=APrxMedRcpF1(f32(4.0)*lobe+f32(1.0)); + *pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; + *pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; + *pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL; + return; +} + +@fragment +fn main(input: FragmentInputs) -> FragmentOutputs { + var pixR: f32; + var pixG: f32; + var pixB: f32; + let ip = vec2u(fragmentInputs.position.xy); + FsrRcasF(&pixR, &pixG, &pixB, ip, constants.con); + fragmentOutputs.color = vec4f(pixR, pixG, pixB, 1); +} diff --git a/packages/dev/core/src/ShadersWGSL/fsr1Upscale.fragment.fx b/packages/dev/core/src/ShadersWGSL/fsr1Upscale.fragment.fx new file mode 100644 index 00000000000..ffe70017b67 --- /dev/null +++ b/packages/dev/core/src/ShadersWGSL/fsr1Upscale.fragment.fx @@ -0,0 +1,250 @@ +// FideltyFX FSR 1 converted to WGSL +// https://github.com/GPUOpen-Effects/FidelityFX-FSR/blob/master/ffx-fsr/ffx_fsr1.h +#include + +struct FsrEasuCon { + con0: vec4u, + con1: vec4u, + con2: vec4u, + con3: vec4u, +} + +var textureSampler: texture_2d; +var textureSamplerSampler: sampler; +var constants: FsrEasuCon; + +// Input callback prototypes +fn FsrEasuRF(p: vec2f) -> vec4f { + return textureGather(0, textureSampler, textureSamplerSampler, p); +} + +fn FsrEasuGF(p: vec2f) -> vec4f { + return textureGather(1, textureSampler, textureSamplerSampler, p); +} + +fn FsrEasuBF(p: vec2f) -> vec4f { + return textureGather(2, textureSampler, textureSamplerSampler, p); +} + +// Filtering for a given tap for the scalar. +fn FsrEasuTapF( + aC: ptr, // Accumulated color, with negative lobe. + aW: ptr, // Accumulated weight. + off: vec2f, // Pixel offset from resolve position to tap. + dir: vec2f, // Gradient direction. + len: vec2f, // Length. + lob: f32, // Negative lobe strength. + clp: f32, // Clipping point. + c: vec3f // Tap color. +) { + // Rotate offset by direction. + var v: vec2f; + v.x=(off.x*( dir.x))+(off.y*dir.y); + v.y=(off.x*(-dir.y))+(off.y*dir.x); + // Anisotropy. + v*=len; + // Compute distance^2. + var d2=v.x*v.x+v.y*v.y; + // Limit to the window as at corner, 2 taps can easily be outside. + d2=min(d2,clp); + // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x. + // (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2 + // |_______________________________________| |_______________| + // base window + // The general form of the 'base' is, + // (a*(b*x^2-1)^2-(a-1)) + // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe. + var wB=f32(2.0/5.0)*d2+f32(-1.0); + var wA=lob*d2+(-1.0); + wB*=wB; + wA*=wA; + wB=f32(25.0/16.0)*wB+f32(-(25.0/16.0-1.0)); + let w=wB*wA; + // Do weighted average. + *aC+=c*w;*aW+=w; +} + +// Accumulate direction and length. +fn FsrEasuSetF( + dir: ptr, + len: ptr, + pp: vec2f, + biS: bool, biT: bool, biU: bool, biV: bool, + lA: f32, lB: f32, lC: f32, lD: f32, lE: f32 +) { + // Compute bilinear weight, branches factor out as predicates are compiler time immediates. + // s t + // u v + var w = f32(0.0); + if biS { + w=(f32(1.0)-pp.x)*(f32(1.0)-pp.y); + } + if biT { + w= pp.x *(f32(1.0)-pp.y); + } + if biU { + w=(f32(1.0)-pp.x)* pp.y ; + } + if biV { + w= pp.x * pp.y ; + } + // Direction is the '+' diff. + // a + // b c d + // e + // Then takes magnitude from abs average of both sides of 'c'. + // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms. + let dc=lD-lC; + let cb=lC-lB; + var lenX=max(abs(dc),abs(cb)); + lenX=APrxLoRcpF1(lenX); + let dirX=lD-lB; + dir.x+=dirX*w; + lenX=saturate(abs(dirX)*lenX); + lenX*=lenX; + *len+=lenX*w; + // Repeat for the y axis. + let ec=lE-lC; + let ca=lC-lA; + var lenY=max(abs(ec),abs(ca)); + lenY=APrxLoRcpF1(lenY); + let dirY=lE-lA; + dir.y+=dirY*w; + lenY=saturate(abs(dirY)*lenY); + lenY*=lenY; + *len+=lenY*w; +} + +fn FsrEasuF( + pix: ptr, + ip: vec2u, // Integer pixel position in output. + con0: vec4u, // Constants generated by FsrEasuCon(). + con1: vec4u, + con2: vec4u, + con3: vec4u +) { +//------------------------------------------------------------------------------------------------------------------------------ + // Get position of 'f'. + var pp=vec2f(ip)*bitcast(con0.xy)+bitcast(con0.zw); + let fp=floor(pp); + pp-=fp; +//------------------------------------------------------------------------------------------------------------------------------ + // 12-tap kernel. + // b c + // e f g h + // i j k l + // n o + // Gather 4 ordering. + // a b + // r g + // For packed FP16, need either {rg} or {ab} so using the following setup for gather in all versions, + // a b <- unused (z) + // r g + // a b a b + // r g r g + // a b + // r g <- unused (z) + // Allowing dead-code removal to remove the 'z's. + let p0=fp*bitcast(con1.xy)+bitcast(con1.zw); + // These are from p0 to avoid pulling two constants on pre-Navi hardware. + let p1=p0+bitcast(con2.xy); + let p2=p0+bitcast(con2.zw); + let p3=p0+bitcast(con3.xy); + let bczzR=FsrEasuRF(p0); + let bczzG=FsrEasuGF(p0); + let bczzB=FsrEasuBF(p0); + let ijfeR=FsrEasuRF(p1); + let ijfeG=FsrEasuGF(p1); + let ijfeB=FsrEasuBF(p1); + let klhgR=FsrEasuRF(p2); + let klhgG=FsrEasuGF(p2); + let klhgB=FsrEasuBF(p2); + let zzonR=FsrEasuRF(p3); + let zzonG=FsrEasuGF(p3); + let zzonB=FsrEasuBF(p3); +//------------------------------------------------------------------------------------------------------------------------------ + // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD). + let bczzL=bczzB*vec4f(0.5)+(bczzR*vec4f(0.5)+bczzG); + let ijfeL=ijfeB*vec4f(0.5)+(ijfeR*vec4f(0.5)+ijfeG); + let klhgL=klhgB*vec4f(0.5)+(klhgR*vec4f(0.5)+klhgG); + let zzonL=zzonB*vec4f(0.5)+(zzonR*vec4f(0.5)+zzonG); + // Rename. + let bL=bczzL.x; + let cL=bczzL.y; + let iL=ijfeL.x; + let jL=ijfeL.y; + let fL=ijfeL.z; + let eL=ijfeL.w; + let kL=klhgL.x; + let lL=klhgL.y; + let hL=klhgL.z; + let gL=klhgL.w; + let oL=zzonL.z; + let nL=zzonL.w; + // Accumulate for bilinear interpolation. + var dir=vec2f(0.0); + var len=f32(0.0); + FsrEasuSetF(&dir,&len,pp,true, false,false,false,bL,eL,fL,gL,jL); + FsrEasuSetF(&dir,&len,pp,false,true ,false,false,cL,fL,gL,hL,kL); + FsrEasuSetF(&dir,&len,pp,false,false,true ,false,fL,iL,jL,kL,nL); + FsrEasuSetF(&dir,&len,pp,false,false,false,true ,gL,jL,kL,lL,oL); +//------------------------------------------------------------------------------------------------------------------------------ + // Normalize with approximation, and cleanup close to zero. + let dir2=dir*dir; + var dirR=dir2.x+dir2.y; + var zro=dirR FragmentOutputs { + var pix: vec3f; + let ip = vec2u(fragmentInputs.position.xy); + FsrEasuF(&pix, ip, constants.con0, constants.con1, constants.con2, constants.con3); + fragmentOutputs.color = vec4f(pix, 1); +}