diff --git a/.gitignore b/.gitignore index ebfe867..7458408 100644 --- a/.gitignore +++ b/.gitignore @@ -58,4 +58,5 @@ TJURP_BurstDebugInformation_DoNotShip/ UserSettings/ **/.DS_Store -out/ \ No newline at end of file +out/ +.vscode/ \ No newline at end of file diff --git a/Assets/Scenes/Oasis/Art/Environment/Materials/HeightFog_Mat.mat b/Assets/Scenes/Oasis/Art/Environment/Materials/HeightFog_Mat.mat index 692489c..835134f 100644 --- a/Assets/Scenes/Oasis/Art/Environment/Materials/HeightFog_Mat.mat +++ b/Assets/Scenes/Oasis/Art/Environment/Materials/HeightFog_Mat.mat @@ -1,5 +1,5 @@ %YAML 1.1 -%TAG !u! tag:yousandi.cn,2023: +%TAG !u! tag:unity3d.com,2011: --- !u!21 &2100000 Material: serializedVersion: 8 diff --git a/Assets/Scenes/Oasis/Art/Environment/Materials/OasisSunsetSkybox_Mat.mat b/Assets/Scenes/Oasis/Art/Environment/Materials/OasisSunsetSkybox_Mat.mat index 2cb731f..0bbeb9c 100644 --- a/Assets/Scenes/Oasis/Art/Environment/Materials/OasisSunsetSkybox_Mat.mat +++ b/Assets/Scenes/Oasis/Art/Environment/Materials/OasisSunsetSkybox_Mat.mat @@ -68,7 +68,7 @@ Material: m_Scale: {x: 1, y: 1} m_Offset: {x: 0, y: 0} - _Tex: - m_Texture: {fileID: 8900000, guid: 870ca4df5e7d0044eb0d94414730e50c, type: 3} + m_Texture: {fileID: 8900000, guid: 5500c7bc667b55f49b29485c043a2b42, type: 3} m_Scale: {x: 1, y: 1} m_Offset: {x: 0, y: 0} - unity_Lightmaps: diff --git a/Assets/Scenes/Oasis/Settings/Oasis_Outdoor_Volume_Profile.asset b/Assets/Scenes/Oasis/Settings/Oasis_Outdoor_Volume_Profile.asset index 941dadf..da808b8 100644 --- a/Assets/Scenes/Oasis/Settings/Oasis_Outdoor_Volume_Profile.asset +++ b/Assets/Scenes/Oasis/Settings/Oasis_Outdoor_Volume_Profile.asset @@ -1,5 +1,5 @@ %YAML 1.1 -%TAG !u! tag:yousandi.cn,2023: +%TAG !u! tag:unity3d.com,2011: --- !u!114 &-4720686990112659349 MonoBehaviour: m_ObjectHideFlags: 3 diff --git a/Assets/Settings/Mobile/Mobile_High.asset b/Assets/Settings/Mobile/Mobile_High.asset index b12cecf..4c8e350 100644 --- a/Assets/Settings/Mobile/Mobile_High.asset +++ b/Assets/Settings/Mobile/Mobile_High.asset @@ -28,7 +28,7 @@ MonoBehaviour: m_SupportsHDR: 1 m_HDRColorBufferPrecision: 0 m_MSAA: 1 - m_RenderScale: 0.58823526 + m_RenderScale: 0.5 m_UpscalingFilter: 0 m_FsrOverrideSharpness: 1 m_FsrSharpness: 1 @@ -114,5 +114,5 @@ MonoBehaviour: m_PrefilterNativeRenderPass: 1 m_ShaderVariantLogLevel: 0 m_ShadowCascades: 0 - superResolution: 5 + superResolution: 1 vrsRate: 0 diff --git a/Assets/Settings/Mobile/Mobile_High_Renderer.asset b/Assets/Settings/Mobile/Mobile_High_Renderer.asset index aa65a12..b220cdd 100644 --- a/Assets/Settings/Mobile/Mobile_High_Renderer.asset +++ b/Assets/Settings/Mobile/Mobile_High_Renderer.asset @@ -14,6 +14,30 @@ MonoBehaviour: m_EditorClassIdentifier: m_Active: 0 quality: 0 +--- !u!114 &-7390778553674367771 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_CorrespondingSourceObject: {fileID: 0} + m_PrefabInstance: {fileID: 0} + m_PrefabAsset: {fileID: 0} + m_GameObject: {fileID: 0} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: 386e439eeef2849448d91896c74ff1d5, type: 3} + m_Name: FSR + m_EditorClassIdentifier: + m_Active: 1 + quality: 5 + v1setting: + EasuCompute: {fileID: 7200000, guid: 787b0c165dad9074e9489817de945916, type: 3} + RacsCompute: {fileID: 7200000, guid: a50f730ab549f794cbe91f005703e208, type: 3} + IsHdr: 1 + Sharpness: 0.922 + MipMapBias: -1.74 + v2setting: + ComputeShader: {fileID: 7200000, guid: 7b9233ff11584414688b5f10a526ca15, type: 3} + v3setting: + ComputeShader: {fileID: 7200000, guid: b79f3a2fdeeac5745b545fc41148f6c3, type: 3} --- !u!114 &-2621301742936824463 MonoBehaviour: m_ObjectHideFlags: 0 @@ -26,7 +50,7 @@ MonoBehaviour: m_Script: {fileID: 11500000, guid: a00dddc5b3ea7fe45953ccbd49b58b94, type: 3} m_Name: GSR m_EditorClassIdentifier: - m_Active: 1 + m_Active: 0 quality: 4 v1settings: EnableEdgeDirection: 1 @@ -40,7 +64,7 @@ MonoBehaviour: GSRV2ComputeShader: {fileID: 7200000, guid: 9a4e6ff1f9a33fa4ea1e9744e313e2fd, type: 3} FiveSample: 0 Exposure_co_rcp: 1 - MipMapBias: -1.34 + MipMapBias: 0 --- !u!114 &-2390300670611609275 MonoBehaviour: m_ObjectHideFlags: 0 @@ -154,7 +178,8 @@ MonoBehaviour: - {fileID: -2621301742936824463} - {fileID: 4962205925092569722} - {fileID: -8576419846133267094} - m_RendererFeatureMap: bc3f630842f2e70dd6a559c442a94bfd4529d15534f2d3de228858dca8d122222735d34a93f399f2716523fbf3439fdb7a327b7bff4bdd446ac59dfa966ffa88 + - {fileID: -7390778553674367771} + m_RendererFeatureMap: bc3f630842f2e70dd6a559c442a94bfd4529d15534f2d3de228858dca8d122222735d34a93f399f2716523fbf3439fdb7a327b7bff4bdd446ac59dfa966ffa88e5fca93e10ae6e99 m_UseNativeRenderPass: 0 postProcessData: {fileID: 11400000, guid: 41439944d30ece34e96484bdb6645b55, type: 2} shaders: @@ -339,7 +364,7 @@ MonoBehaviour: MipMapBias: 0 antiGhosting: 0.1 sharpMaterial: {fileID: 2100000, guid: b356b97c3a610794582dd87ab85f4e98, type: 2} - sharpness: 1.652 + sharpness: 0.824 sharpMipLevel: 0 --- !u!114 &6334271670068977784 MonoBehaviour: diff --git a/Assets/Settings/VolumeProfiles/LowQualityVolumeProfile.asset b/Assets/Settings/VolumeProfiles/LowQualityVolumeProfile.asset index babaaff..5776da3 100644 --- a/Assets/Settings/VolumeProfiles/LowQualityVolumeProfile.asset +++ b/Assets/Settings/VolumeProfiles/LowQualityVolumeProfile.asset @@ -1,5 +1,5 @@ %YAML 1.1 -%TAG !u! tag:yousandi.cn,2023: +%TAG !u! tag:unity3d.com,2011: --- !u!114 &-8189118008837629952 MonoBehaviour: m_ObjectHideFlags: 3 @@ -170,7 +170,7 @@ MonoBehaviour: m_Script: {fileID: 11500000, guid: 97c23e3b12dc18c42a140437e53d3951, type: 3} m_Name: Tonemapping m_EditorClassIdentifier: - active: 1 + active: 0 mode: m_OverrideState: 1 m_Value: 1 diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/Passes/PostProcessPass.cs b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/Passes/PostProcessPass.cs index 4e30616..3104432 100644 --- a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/Passes/PostProcessPass.cs +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/Passes/PostProcessPass.cs @@ -497,7 +497,7 @@ namespace UnityEngine.Rendering.Universal } var asset = UniversalRenderPipeline.asset; // SuperResolution - if (asset.SuperResolution != ESuperResolution.None) + if (asset.SuperResolution != ESuperResolution.None && asset.SuperResolution != ESuperResolution.FSR1) { ref var colorDescriptor = ref cameraData.cameraTargetDescriptor; colorDescriptor.width = cameraData.camera.pixelWidth; @@ -507,6 +507,7 @@ namespace UnityEngine.Rendering.Universal || asset.SuperResolution == ESuperResolution.DLSS1 || asset.SuperResolution == ESuperResolution.DLSS2 || asset.SuperResolution == ESuperResolution.DLSS3 + || asset.SuperResolution == ESuperResolution.FSR1 ) { colorDescriptor.enableRandomWrite = true; @@ -691,7 +692,6 @@ namespace UnityEngine.Rendering.Universal } } } - } internal void Present(CommandBuffer cmd, ref CameraData cameraData, RTHandle rt) diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Scripts/FSR.cs b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Scripts/FSR.cs index cf5b004..7617291 100644 --- a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Scripts/FSR.cs +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Scripts/FSR.cs @@ -1,4 +1,7 @@ using System; +using System.Runtime.InteropServices; +using Unity.Mathematics; +using UnityEditor.VersionControl; using UnityEngine; using UnityEngine.Rendering; using UnityEngine.Rendering.Universal; @@ -21,6 +24,38 @@ namespace X.Rendering.Feature { [SerializeField] private FsrQuality quality; + [Serializable] + public class V1Setting + { + public ComputeShader EasuCompute; + public ComputeShader RacsCompute; + public bool IsHdr; + [Range(0.0f, 1.0f)] + public float Sharpness; + [Range(-5, 5)] + public float MipMapBias; + } + [SerializeField] + V1Setting v1setting; + + [Serializable] + class V2Setting + { + public ComputeShader ComputeShader; + + } + [SerializeField] + V2Setting v2setting; + + + [Serializable] + class V3Setting + { + public ComputeShader ComputeShader; + + } + [SerializeField] + V3Setting v3setting; FSRV1 fsr1; enum FsrVer @@ -38,11 +73,23 @@ namespace X.Rendering.Feature public override void Create() { - fsr1 = new(); + fsr1 = new(v1setting); } public void DoSR(CommandBuffer cmd, RTHandle source, RTHandle destination, RTHandle motionVector, ref RenderingData renderingData) { + switch (fsrVer) + { + case FsrVer.FSRV1: + fsr1.DoSR(cmd, source, destination, motionVector, ref renderingData); + break; + case FsrVer.FSRV2: + break; + case FsrVer.FSRV3: + break; + default: + break; + } } public float GetRenderScale() @@ -137,9 +184,90 @@ namespace X.Rendering.Feature sealed class FSRV1 : IDisposable { + FSR.V1Setting v1setting; + ComputeBuffer fsr1BUffer; + + [StructLayout(LayoutKind.Sequential)] + struct CBFSR1 + { + public float4 const0; + public float4 const1; + public float4 const2; + public float4 const3; + public float4 sample; + } + CBFSR1[] cbFsr1Step1; + CBFSR1[] cbFsr1sStep2; + public FSRV1(FSR.V1Setting v1setting) + { + this.v1setting = v1setting; + fsr1BUffer = new ComputeBuffer(1, Marshal.SizeOf(), ComputeBufferType.Structured); + cbFsr1Step1 = new CBFSR1[1]; + cbFsr1sStep2 = new CBFSR1[1]; + } + + RTHandle tempRT; // 抗锯齿输出,在tonemapping 后最好,在 Film grain, Chromatic aberration 等高频视觉后处理之前 + public void DoSR(CommandBuffer cmd, in RTHandle source, in RTHandle destination, in RTHandle motionVector, ref RenderingData renderingData) + { + float inputW = source.referenceSize.x; + float inputH = source.referenceSize.y; + float outputW = destination.referenceSize.x; + float outputH = destination.referenceSize.y; + SuperResolutionParamSets.Instance.Set("MipmapBias", v1setting.MipMapBias); + + + cbFsr1Step1[0].const0 = new float4(inputW / outputW, inputH / outputH, 0.5f * inputW / outputW - 0.5f, 0.5f * inputH / outputH - 0.5f); + cbFsr1Step1[0].const1 = new float4(1 / inputW, 1 / inputH, 1 / inputW, -1 / inputH); + cbFsr1Step1[0].const2 = new float4(-1 / inputW, 2 / inputH, 1 / inputW, 2 / inputH); + cbFsr1Step1[0].const3 = new float4(0, 4 / inputH, 0, 0); + cbFsr1Step1[0].sample = v1setting.IsHdr ? 1 : 0; + cmd.SetBufferData(fsr1BUffer, cbFsr1Step1); + + var cameraData = renderingData.cameraData; + var colorDescriptor = cameraData.cameraTargetDescriptor; + colorDescriptor.width = cameraData.camera.pixelWidth; + colorDescriptor.height = cameraData.camera.pixelHeight; + colorDescriptor.enableRandomWrite = true; + colorDescriptor.depthBufferBits = 0; + colorDescriptor.graphicsFormat = UnityEngine.Experimental.Rendering.GraphicsFormat.B10G11R11_UFloatPack32; + + RenderingUtils.ReAllocateIfNeeded(ref tempRT, colorDescriptor); + cmd.SetComputeConstantBufferParam(v1setting.EasuCompute, "cbFSR1", fsr1BUffer, 0 , 1); + cmd.SetComputeTextureParam(v1setting.EasuCompute, 0, "r_input_color", source); + cmd.SetComputeTextureParam(v1setting.EasuCompute, 0, "rw_upscaled_output", tempRT); + + const int threadGroupWorkRegionDim = 16; + int dispatchX = Mathf.CeilToInt(outputW / threadGroupWorkRegionDim); + int dispatchY = Mathf.CeilToInt(outputH / threadGroupWorkRegionDim); + + cmd.DispatchCompute(v1setting.EasuCompute, 0, dispatchX, dispatchY, 1); + + float sharpenessRemapped = (1 - v1setting.Sharpness) * 2.5f; + float sharpness = Mathf.Pow(2.0f, -sharpenessRemapped); + uint sharpnessAsHalf = Mathf.FloatToHalf(sharpness); + int packedSharpness = (int)(sharpnessAsHalf | (sharpnessAsHalf << 16)); + float packedSharpnessAsFloat = BitConverter.Int32BitsToSingle(packedSharpness); + + cbFsr1sStep2[0].const0 = new float4(sharpness, packedSharpnessAsFloat, 0, 0); + cbFsr1sStep2[0].sample = v1setting.IsHdr ? 1 : 0; + + cmd.SetBufferData(fsr1BUffer, cbFsr1sStep2); + cmd.SetComputeConstantBufferParam(v1setting.RacsCompute, "cbFSR1", fsr1BUffer, 0, 1); + cmd.SetComputeTextureParam(v1setting.RacsCompute, 0, "r_internal_upscaled_color", tempRT); + + cmd.SetComputeTextureParam(v1setting.RacsCompute, 0, "rw_upscaled_output", destination); + + cmd.DispatchCompute(v1setting.RacsCompute, 0, dispatchX, dispatchY, 1); + } + public void Dispose() { + if (tempRT?.rt) + { + fsr1BUffer?.Release(); + tempRT.Release(); + } } } } diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Scripts/GSR.cs b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Scripts/GSR.cs index d0fa73e..4405b2b 100644 --- a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Scripts/GSR.cs +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Scripts/GSR.cs @@ -25,8 +25,40 @@ namespace X.Rendering.Feature public sealed class GSR : ScriptableRendererFeature, ISuperResolutionFeature { public GsrQuality quality; + [Serializable] + public sealed class V1Setting + { + [SerializeField] + public bool EnableEdgeDirection = true; + [SerializeField, Range(1, 4)] + public int Mode = 1; + [SerializeField, Range(0.5f, 2.5f)] + public float Sharp = 2; + [SerializeField, Range(-5f, 2.5f)] + public float MipMapBias = 0; + [SerializeField] + public Material GSRV1Material; + } + [SerializeField] private V1Setting v1settings; + + [Serializable] + public sealed class V2Setting + { + [SerializeField] + public bool UseCompute2Pass = false; + [SerializeField] + public Material GSRV2Material; + [SerializeField] + public ComputeShader GSRV2ComputeShader; + [SerializeField] + public bool FiveSample = false; + [SerializeField] + public float Exposure_co_rcp = 1; + [SerializeField, Range(-5f, 2.5f)] + public float MipMapBias = 0; + } [SerializeField] private V2Setting v2settings; @@ -136,31 +168,18 @@ namespace X.Rendering.Feature } #region GSR1 - [Serializable] - sealed class V1Setting - { - [SerializeField] - public bool EnableEdgeDirection = true; - [SerializeField, Range(1, 4)] - public int Mode = 1; - [SerializeField, Range(0.5f, 2.5f)] - public float Sharp = 2; - [SerializeField, Range(-5f, 2.5f)] - public float MipMapBias = 0; - [SerializeField] - public Material GSRV1Material; - } + sealed class GSRV1 : IDisposable { - private V1Setting v1settings; + private GSR.V1Setting v1settings; private ProfilingSampler profiler; private static readonly int InputTextureId = Shader.PropertyToID("_InputTexture"); private static readonly int ViewportInfoId = Shader.PropertyToID("_ViewportInfo"); private static readonly int GSR1ParamsId = Shader.PropertyToID("_GSR1Params"); - public GSRV1(V1Setting v1settings) + public GSRV1(GSR.V1Setting v1settings) { this.v1settings = v1settings; profiler = new ProfilingSampler(nameof(GSRV1)); @@ -206,26 +225,9 @@ namespace X.Rendering.Feature #endregion #region GSR2 - [Serializable] - sealed class V2Setting - { - [SerializeField] - public bool UseCompute2Pass = false; - [SerializeField] - public Material GSRV2Material; - [SerializeField] - public ComputeShader GSRV2ComputeShader; - [SerializeField] - public bool FiveSample = false; - [SerializeField] - public float Exposure_co_rcp = 1; - [SerializeField, Range(-5f, 2.5f)] - public float MipMapBias = 0; - } - sealed class GSRV2 : IDisposable { - private V2Setting v2settings; + private GSR.V2Setting v2settings; private static Matrix4x4 prevViewProjMatrix; private static int cameraStillFrameCnt; private ProfilingSampler profiler; @@ -248,7 +250,7 @@ namespace X.Rendering.Feature public int sameCameraFrmNum; - public GSRV2(V2Setting v2settings) + public GSRV2(GSR.V2Setting v2settings) { for (int i = 0; i < 32; ++i) { diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV1_EASU.compute b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV1_EASU.compute new file mode 100644 index 0000000..7590e90 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV1_EASU.compute @@ -0,0 +1,32 @@ +#pragma kernel EASUCS +// #pragma kernel RCASCS + +#define FSR1_BIND_SRV_INPUT_COLOR 0 +#define FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR 0 +#define FSR1_BIND_UAV_UPSCALED_OUTPUT 1 +#define FFX_GPU 1 + +#define FSR1_BIND_CB_FSR1 0 + +#include "fsr1/ffx_fsr1_callbacks_hlsl.h" +#include "fsr1/ffx_fsr1_easu.h" + +#ifndef FFX_FSR1_THREAD_GROUP_WIDTH +#define FFX_FSR1_THREAD_GROUP_WIDTH 64 +#endif // #ifndef FFX_FSR1_THREAD_GROUP_WIDTH +#ifndef FFX_FSR1_THREAD_GROUP_HEIGHT +#define FFX_FSR1_THREAD_GROUP_HEIGHT 1 +#endif // FFX_FSR1_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR1_THREAD_GROUP_DEPTH +#define FFX_FSR1_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR1_THREAD_GROUP_DEPTH +#ifndef FFX_FSR1_NUM_THREADS +#define FFX_FSR1_NUM_THREADS [numthreads(FFX_FSR1_THREAD_GROUP_WIDTH, FFX_FSR1_THREAD_GROUP_HEIGHT, FFX_FSR1_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR1_NUM_THREADS + +[WaveSize(64)] +FFX_FSR1_NUM_THREADS +void EASUCS(uint3 LocalThreadId : SV_GroupThreadID, uint3 WorkGroupId : SV_GroupID, uint3 Dtid : SV_DispatchThreadID) +{ + EASU(LocalThreadId, WorkGroupId, Dtid); +} diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV1_EASU.compute.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV1_EASU.compute.meta new file mode 100644 index 0000000..9f5227a --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV1_EASU.compute.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 787b0c165dad9074e9489817de945916 +ComputeShaderImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV1_RACS.compute b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV1_RACS.compute new file mode 100644 index 0000000..8efacb4 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV1_RACS.compute @@ -0,0 +1,29 @@ +#pragma kernel RCASCS + + #define FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR 0 + #define FSR1_BIND_UAV_UPSCALED_OUTPUT 0 + #define FSR1_BIND_CB_FSR1 0 + #define FFX_GPU 1 + + #include "fsr1/ffx_fsr1_callbacks_hlsl.h" + #include "fsr1/ffx_fsr1_rcas.h" + +#ifndef FFX_FSR1_THREAD_GROUP_WIDTH +#define FFX_FSR1_THREAD_GROUP_WIDTH 64 +#endif // #ifndef FFX_FSR1_THREAD_GROUP_WIDTH +#ifndef FFX_FSR1_THREAD_GROUP_HEIGHT +#define FFX_FSR1_THREAD_GROUP_HEIGHT 1 +#endif // FFX_FSR1_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR1_THREAD_GROUP_DEPTH +#define FFX_FSR1_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR1_THREAD_GROUP_DEPTH +#ifndef FFX_FSR1_NUM_THREADS +#define FFX_FSR1_NUM_THREADS [numthreads(FFX_FSR1_THREAD_GROUP_WIDTH, FFX_FSR1_THREAD_GROUP_HEIGHT, FFX_FSR1_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR1_NUM_THREADS + +[WaveSize(64)] +FFX_FSR1_NUM_THREADS +void RCASCS(uint3 LocalThreadId : SV_GroupThreadID, uint3 WorkGroupId : SV_GroupID, uint3 Dtid : SV_DispatchThreadID) +{ + RCAS(LocalThreadId, WorkGroupId, Dtid); +} \ No newline at end of file diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV1_RACS.compute.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV1_RACS.compute.meta new file mode 100644 index 0000000..8f338ba --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV1_RACS.compute.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: a50f730ab549f794cbe91f005703e208 +ComputeShaderImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV2.compute b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV2.compute new file mode 100644 index 0000000..ad8fcb5 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV2.compute @@ -0,0 +1,14 @@ +// Each #kernel tells which function to compile; you can have many kernels +#pragma kernel CSMain + +// Create a RenderTexture with enableRandomWrite flag and set it +// with cs.SetTexture +RWTexture2D Result; + +[numthreads(8,8,1)] +void CSMain (uint3 id : SV_DispatchThreadID) +{ + // TODO: insert actual code here! + + Result[id.xy] = float4(id.x & id.y, (id.x & 15)/15.0, (id.y & 15)/15.0, 0.0); +} diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV2.compute.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV2.compute.meta new file mode 100644 index 0000000..af5dcbd --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV2.compute.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 7b9233ff11584414688b5f10a526ca15 +ComputeShaderImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV3.compute b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV3.compute new file mode 100644 index 0000000..ad8fcb5 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV3.compute @@ -0,0 +1,14 @@ +// Each #kernel tells which function to compile; you can have many kernels +#pragma kernel CSMain + +// Create a RenderTexture with enableRandomWrite flag and set it +// with cs.SetTexture +RWTexture2D Result; + +[numthreads(8,8,1)] +void CSMain (uint3 id : SV_DispatchThreadID) +{ + // TODO: insert actual code here! + + Result[id.xy] = float4(id.x & id.y, (id.x & 15)/15.0, (id.y & 15)/15.0, 0.0); +} diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV3.compute.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV3.compute.meta new file mode 100644 index 0000000..c3e099e --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/FsrV3.compute.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: b79f3a2fdeeac5745b545fc41148f6c3 +ComputeShaderImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_common_types.h b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_common_types.h new file mode 100644 index 0000000..2c4f0ba --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_common_types.h @@ -0,0 +1,558 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_COMMON_TYPES_H +#define FFX_COMMON_TYPES_H + +#if defined(FFX_CPU) +#define FFX_PARAMETER_IN +#define FFX_PARAMETER_OUT +#define FFX_PARAMETER_INOUT +#define FFX_PARAMETER_UNIFORM +#elif defined(FFX_HLSL) +#define FFX_PARAMETER_IN in +#define FFX_PARAMETER_OUT out +#define FFX_PARAMETER_INOUT inout +#define FFX_PARAMETER_UNIFORM uniform +#elif defined(FFX_GLSL) +#define FFX_PARAMETER_IN in +#define FFX_PARAMETER_OUT out +#define FFX_PARAMETER_INOUT inout +#define FFX_PARAMETER_UNIFORM const //[cacao_placeholder] until a better fit is found! +#endif // #if defined(FFX_CPU) + +#if defined(FFX_CPU) +/// A typedef for a boolean value. +/// +/// @ingroup CPUTypes +typedef bool FfxBoolean; + +/// A typedef for a unsigned 8bit integer. +/// +/// @ingroup CPUTypes +typedef uint8_t FfxUInt8; + +/// A typedef for a unsigned 16bit integer. +/// +/// @ingroup CPUTypes +typedef uint16_t FfxUInt16; + +/// A typedef for a unsigned 32bit integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32; + +/// A typedef for a unsigned 64bit integer. +/// +/// @ingroup CPUTypes +typedef uint64_t FfxUInt64; + +/// A typedef for a signed 8bit integer. +/// +/// @ingroup CPUTypes +typedef int8_t FfxInt8; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup CPUTypes +typedef int16_t FfxInt16; + +/// A typedef for a signed 32bit integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32; + +/// A typedef for a signed 64bit integer. +/// +/// @ingroup CPUTypes +typedef int64_t FfxInt64; + +/// A typedef for a floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32; + +/// A typedef for a 2-dimensional floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x2[2]; + +/// A typedef for a 3-dimensional floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x3[3]; + +/// A typedef for a 4-dimensional floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x4[4]; + +/// A typedef for a 2x2 floating point matrix. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x2x2[4]; + +/// A typedef for a 3x3 floating point matrix. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x3x3[9]; + +/// A typedef for a 3x4 floating point matrix. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x3x4[12]; + +/// A typedef for a 4x4 floating point matrix. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x4x4[16]; + +/// A typedef for a 2-dimensional 32bit signed integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32x2[2]; + +/// A typedef for a 3-dimensional 32bit signed integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32x3[3]; + +/// A typedef for a 4-dimensional 32bit signed integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32x4[4]; + +/// A typedef for a 2-dimensional 32bit usigned integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32x2[2]; + +/// A typedef for a 3-dimensional 32bit unsigned integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32x3[3]; + +/// A typedef for a 4-dimensional 32bit unsigned integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32x4[4]; +#endif // #if defined(FFX_CPU) + +#if defined(FFX_HLSL) + +#define FfxFloat32Mat4 matrix +#define FfxFloat32Mat3 matrix + +/// A typedef for a boolean value. +/// +/// @ingroup HLSLTypes +typedef bool FfxBoolean; + +#if FFX_HLSL_SM>=62 + +/// @defgroup HLSL62Types HLSL 6.2 And Above Types +/// HLSL 6.2 and above type defines for all commonly used variables +/// +/// @ingroup HLSLTypes + +/// A typedef for a floating point value. +/// +/// @ingroup HLSL62Types +typedef float32_t FfxFloat32; + +/// A typedef for a 2-dimensional floating point value. +/// +/// @ingroup HLSL62Types +typedef float32_t2 FfxFloat32x2; + +/// A typedef for a 3-dimensional floating point value. +/// +/// @ingroup HLSL62Types +typedef float32_t3 FfxFloat32x3; + +/// A typedef for a 4-dimensional floating point value. +/// +/// @ingroup HLSL62Types +typedef float32_t4 FfxFloat32x4; + +/// A [cacao_placeholder] typedef for matrix type until confirmed. +typedef float4x4 FfxFloat32x4x4; +typedef float3x4 FfxFloat32x3x4; +typedef float3x3 FfxFloat32x3x3; +typedef float2x2 FfxFloat32x2x2; + +/// A typedef for a unsigned 32bit integer. +/// +/// @ingroup HLSL62Types +typedef uint32_t FfxUInt32; + +/// A typedef for a 2-dimensional 32bit unsigned integer. +/// +/// @ingroup HLSL62Types +typedef uint32_t2 FfxUInt32x2; + +/// A typedef for a 3-dimensional 32bit unsigned integer. +/// +/// @ingroup HLSL62Types +typedef uint32_t3 FfxUInt32x3; + +/// A typedef for a 4-dimensional 32bit unsigned integer. +/// +/// @ingroup HLSL62Types +typedef uint32_t4 FfxUInt32x4; + +/// A typedef for a signed 32bit integer. +/// +/// @ingroup HLSL62Types +typedef int32_t FfxInt32; + +/// A typedef for a 2-dimensional signed 32bit integer. +/// +/// @ingroup HLSL62Types +typedef int32_t2 FfxInt32x2; + +/// A typedef for a 3-dimensional signed 32bit integer. +/// +/// @ingroup HLSL62Types +typedef int32_t3 FfxInt32x3; + +/// A typedef for a 4-dimensional signed 32bit integer. +/// +/// @ingroup HLSL62Types +typedef int32_t4 FfxInt32x4; + +#else // #if FFX_HLSL_SM>=62 + +/// @defgroup HLSLBaseTypes HLSL 6.1 And Below Types +/// HLSL 6.1 and below type defines for all commonly used variables +/// +/// @ingroup HLSLTypes + +#define FfxFloat32 float +#define FfxFloat32x2 float2 +#define FfxFloat32x3 float3 +#define FfxFloat32x4 float4 + +/// A [cacao_placeholder] typedef for matrix type until confirmed. +#define FfxFloat32x4x4 float4x4 +#define FfxFloat32x3x4 float3x4 +#define FfxFloat32x3x3 float3x3 +#define FfxFloat32x2x2 float2x2 + +/// A typedef for a unsigned 32bit integer. +/// +/// @ingroup GPU +typedef uint FfxUInt32; +typedef uint2 FfxUInt32x2; +typedef uint3 FfxUInt32x3; +typedef uint4 FfxUInt32x4; + +typedef int FfxInt32; +typedef int2 FfxInt32x2; +typedef int3 FfxInt32x3; +typedef int4 FfxInt32x4; + +#endif // #if FFX_HLSL_SM>=62 + +#if FFX_HALF + +#if FFX_HLSL_SM >= 62 + +typedef float16_t FfxFloat16; +typedef float16_t2 FfxFloat16x2; +typedef float16_t3 FfxFloat16x3; +typedef float16_t4 FfxFloat16x4; + +/// A typedef for an unsigned 16bit integer. +/// +/// @ingroup HLSLTypes +typedef uint16_t FfxUInt16; +typedef uint16_t2 FfxUInt16x2; +typedef uint16_t3 FfxUInt16x3; +typedef uint16_t4 FfxUInt16x4; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup HLSLTypes +typedef int16_t FfxInt16; +typedef int16_t2 FfxInt16x2; +typedef int16_t3 FfxInt16x3; +typedef int16_t4 FfxInt16x4; +#else // #if FFX_HLSL_SM>=62 +typedef min16float FfxFloat16; +typedef min16float2 FfxFloat16x2; +typedef min16float3 FfxFloat16x3; +typedef min16float4 FfxFloat16x4; + +/// A typedef for an unsigned 16bit integer. +/// +/// @ingroup HLSLTypes +typedef min16uint FfxUInt16; +typedef min16uint2 FfxUInt16x2; +typedef min16uint3 FfxUInt16x3; +typedef min16uint4 FfxUInt16x4; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup HLSLTypes +typedef min16int FfxInt16; +typedef min16int2 FfxInt16x2; +typedef min16int3 FfxInt16x3; +typedef min16int4 FfxInt16x4; +#endif // #if FFX_HLSL_SM>=62 + +#endif // FFX_HALF + +#endif // #if defined(FFX_HLSL) + +#if defined(FFX_GLSL) + +#define FfxFloat32Mat4 mat4 +#define FfxFloat32Mat3 mat3 + +/// A typedef for a boolean value. +/// +/// @ingroup GLSLTypes +#define FfxBoolean bool +#define FfxFloat32 float +#define FfxFloat32x2 vec2 +#define FfxFloat32x3 vec3 +#define FfxFloat32x4 vec4 +#define FfxUInt32 uint +#define FfxUInt32x2 uvec2 +#define FfxUInt32x3 uvec3 +#define FfxUInt32x4 uvec4 +#define FfxInt32 int +#define FfxInt32x2 ivec2 +#define FfxInt32x3 ivec3 +#define FfxInt32x4 ivec4 + +/// A [cacao_placeholder] typedef for matrix type until confirmed. +#define FfxFloat32x4x4 mat4 +#define FfxFloat32x3x4 mat4x3 +#define FfxFloat32x3x3 mat3 +#define FfxFloat32x2x2 mat2 + +#if FFX_HALF +#define FfxFloat16 float16_t +#define FfxFloat16x2 f16vec2 +#define FfxFloat16x3 f16vec3 +#define FfxFloat16x4 f16vec4 +#define FfxUInt16 uint16_t +#define FfxUInt16x2 u16vec2 +#define FfxUInt16x3 u16vec3 +#define FfxUInt16x4 u16vec4 +#define FfxInt16 int16_t +#define FfxInt16x2 i16vec2 +#define FfxInt16x3 i16vec3 +#define FfxInt16x4 i16vec4 +#endif // FFX_HALF +#endif // #if defined(FFX_GLSL) + +// Global toggles: +// #define FFX_HALF (1) +// #define FFX_HLSL_SM (62) + +#if FFX_HALF + +#if FFX_HLSL_SM >= 62 + +#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName; +#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName; +#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#else //FFX_HLSL_SM>=62 + +#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef min16##BaseComponentType TypeName; +#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) FFX_MIN16_SCALAR( TypeName, BaseComponentType ); +#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ); +#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ); + +#endif //FFX_HLSL_SM>=62 + +#else //FFX_HALF + +#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName; +#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName; +#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#endif //FFX_HALF + +#if defined(FFX_GPU) +// Common typedefs: +#if defined(FFX_HLSL) +FFX_MIN16_SCALAR( FFX_MIN16_F , float ); +FFX_MIN16_VECTOR( FFX_MIN16_F2, float, 2 ); +FFX_MIN16_VECTOR( FFX_MIN16_F3, float, 3 ); +FFX_MIN16_VECTOR( FFX_MIN16_F4, float, 4 ); + +FFX_MIN16_SCALAR( FFX_MIN16_I, int ); +FFX_MIN16_VECTOR( FFX_MIN16_I2, int, 2 ); +FFX_MIN16_VECTOR( FFX_MIN16_I3, int, 3 ); +FFX_MIN16_VECTOR( FFX_MIN16_I4, int, 4 ); + +FFX_MIN16_SCALAR( FFX_MIN16_U, uint ); +FFX_MIN16_VECTOR( FFX_MIN16_U2, uint, 2 ); +FFX_MIN16_VECTOR( FFX_MIN16_U3, uint, 3 ); +FFX_MIN16_VECTOR( FFX_MIN16_U4, uint, 4 ); + +FFX_16BIT_SCALAR( FFX_F16_t , float ); +FFX_16BIT_VECTOR( FFX_F16_t2, float, 2 ); +FFX_16BIT_VECTOR( FFX_F16_t3, float, 3 ); +FFX_16BIT_VECTOR( FFX_F16_t4, float, 4 ); + +FFX_16BIT_SCALAR( FFX_I16_t, int ); +FFX_16BIT_VECTOR( FFX_I16_t2, int, 2 ); +FFX_16BIT_VECTOR( FFX_I16_t3, int, 3 ); +FFX_16BIT_VECTOR( FFX_I16_t4, int, 4 ); + +FFX_16BIT_SCALAR( FFX_U16_t, uint ); +FFX_16BIT_VECTOR( FFX_U16_t2, uint, 2 ); +FFX_16BIT_VECTOR( FFX_U16_t3, uint, 3 ); +FFX_16BIT_VECTOR( FFX_U16_t4, uint, 4 ); + +#define TYPEDEF_MIN16_TYPES(Prefix) \ +typedef FFX_MIN16_F Prefix##_F; \ +typedef FFX_MIN16_F2 Prefix##_F2; \ +typedef FFX_MIN16_F3 Prefix##_F3; \ +typedef FFX_MIN16_F4 Prefix##_F4; \ +typedef FFX_MIN16_I Prefix##_I; \ +typedef FFX_MIN16_I2 Prefix##_I2; \ +typedef FFX_MIN16_I3 Prefix##_I3; \ +typedef FFX_MIN16_I4 Prefix##_I4; \ +typedef FFX_MIN16_U Prefix##_U; \ +typedef FFX_MIN16_U2 Prefix##_U2; \ +typedef FFX_MIN16_U3 Prefix##_U3; \ +typedef FFX_MIN16_U4 Prefix##_U4; + +#define TYPEDEF_16BIT_TYPES(Prefix) \ +typedef FFX_16BIT_F Prefix##_F; \ +typedef FFX_16BIT_F2 Prefix##_F2; \ +typedef FFX_16BIT_F3 Prefix##_F3; \ +typedef FFX_16BIT_F4 Prefix##_F4; \ +typedef FFX_16BIT_I Prefix##_I; \ +typedef FFX_16BIT_I2 Prefix##_I2; \ +typedef FFX_16BIT_I3 Prefix##_I3; \ +typedef FFX_16BIT_I4 Prefix##_I4; \ +typedef FFX_16BIT_U Prefix##_U; \ +typedef FFX_16BIT_U2 Prefix##_U2; \ +typedef FFX_16BIT_U3 Prefix##_U3; \ +typedef FFX_16BIT_U4 Prefix##_U4; + +#define TYPEDEF_FULL_PRECISION_TYPES(Prefix) \ +typedef FfxFloat32 Prefix##_F; \ +typedef FfxFloat32x2 Prefix##_F2; \ +typedef FfxFloat32x3 Prefix##_F3; \ +typedef FfxFloat32x4 Prefix##_F4; \ +typedef FfxInt32 Prefix##_I; \ +typedef FfxInt32x2 Prefix##_I2; \ +typedef FfxInt32x3 Prefix##_I3; \ +typedef FfxInt32x4 Prefix##_I4; \ +typedef FfxUInt32 Prefix##_U; \ +typedef FfxUInt32x2 Prefix##_U2; \ +typedef FfxUInt32x3 Prefix##_U3; \ +typedef FfxUInt32x4 Prefix##_U4; +#endif // #if defined(FFX_HLSL) + +#if defined(FFX_GLSL) + +#if FFX_HALF + +#define FFX_MIN16_F float16_t +#define FFX_MIN16_F2 f16vec2 +#define FFX_MIN16_F3 f16vec3 +#define FFX_MIN16_F4 f16vec4 + +#define FFX_MIN16_I int16_t +#define FFX_MIN16_I2 i16vec2 +#define FFX_MIN16_I3 i16vec3 +#define FFX_MIN16_I4 i16vec4 + +#define FFX_MIN16_U uint16_t +#define FFX_MIN16_U2 u16vec2 +#define FFX_MIN16_U3 u16vec3 +#define FFX_MIN16_U4 u16vec4 + +#define FFX_16BIT_F float16_t +#define FFX_16BIT_F2 f16vec2 +#define FFX_16BIT_F3 f16vec3 +#define FFX_16BIT_F4 f16vec4 + +#define FFX_16BIT_I int16_t +#define FFX_16BIT_I2 i16vec2 +#define FFX_16BIT_I3 i16vec3 +#define FFX_16BIT_I4 i16vec4 + +#define FFX_16BIT_U uint16_t +#define FFX_16BIT_U2 u16vec2 +#define FFX_16BIT_U3 u16vec3 +#define FFX_16BIT_U4 u16vec4 + +#else // FFX_HALF + +#define FFX_MIN16_F float +#define FFX_MIN16_F2 vec2 +#define FFX_MIN16_F3 vec3 +#define FFX_MIN16_F4 vec4 + +#define FFX_MIN16_I int +#define FFX_MIN16_I2 ivec2 +#define FFX_MIN16_I3 ivec3 +#define FFX_MIN16_I4 ivec4 + +#define FFX_MIN16_U uint +#define FFX_MIN16_U2 uvec2 +#define FFX_MIN16_U3 uvec3 +#define FFX_MIN16_U4 uvec4 + +#define FFX_16BIT_F float +#define FFX_16BIT_F2 vec2 +#define FFX_16BIT_F3 vec3 +#define FFX_16BIT_F4 vec4 + +#define FFX_16BIT_I int +#define FFX_16BIT_I2 ivec2 +#define FFX_16BIT_I3 ivec3 +#define FFX_16BIT_I4 ivec4 + +#define FFX_16BIT_U uint +#define FFX_16BIT_U2 uvec2 +#define FFX_16BIT_U3 uvec3 +#define FFX_16BIT_U4 uvec4 + +#endif // FFX_HALF + +#endif // #if defined(FFX_GLSL) + +#endif // #if defined(FFX_GPU) +#endif // #ifndef FFX_COMMON_TYPES_H diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_common_types.h.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_common_types.h.meta new file mode 100644 index 0000000..95af5a6 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_common_types.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 760c5fa13b805d240a834996d19dafb3 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core.h b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core.h new file mode 100644 index 0000000..94e98b7 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core.h @@ -0,0 +1,83 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// @defgroup FfxGPU GPU +/// The FidelityFX SDK GPU References +/// +/// @ingroup ffxSDK + +/// @defgroup FfxHLSL HLSL References +/// FidelityFX SDK HLSL GPU References +/// +/// @ingroup FfxGPU + +/// @defgroup FfxGLSL GLSL References +/// FidelityFX SDK GLSL GPU References +/// +/// @ingroup FfxGPU + +/// @defgroup FfxGPUEffects FidelityFX GPU References +/// FidelityFX Effect GPU Reference Documentation +/// +/// @ingroup FfxGPU + +/// @defgroup GPUCore GPU Core +/// GPU defines and functions +/// +/// @ingroup FfxGPU + +#if !defined(FFX_CORE_H) +#define FFX_CORE_H + +#define FFX_HLSL +#define FFX_GPU + +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic push +#pragma dxc diagnostic ignored "-Wambig-lit-shift" +#endif //__hlsl_dx_compiler + +#include "ffx_common_types.h" + +#if defined(FFX_CPU) + #include "ffx_core_cpu.h" +#endif // #if defined(FFX_CPU) + +#if defined(FFX_GLSL) && defined(FFX_GPU) + #include "ffx_core_glsl.h" +#endif // #if defined(FFX_GLSL) && defined(FFX_GPU) + +#if defined(FFX_HLSL) && defined(FFX_GPU) + #include "ffx_core_hlsl.h" +#endif // #if defined(FFX_HLSL) && defined(FFX_GPU) + +#if defined(FFX_GPU) + #include "ffx_core_gpu_common.h" + #include "ffx_core_gpu_common_half.h" + #include "ffx_core_portability.h" +#endif // #if defined(FFX_GPU) + +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic pop +#endif //__hlsl_dx_compiler + +#endif // #if !defined(FFX_CORE_H) diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core.h.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core.h.meta new file mode 100644 index 0000000..055ec6e --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: ea0017a7ac0ca58409fbdb344448b2b2 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_cpu.h b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_cpu.h new file mode 100644 index 0000000..4b6c41a --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_cpu.h @@ -0,0 +1,338 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// A define for a true value in a boolean expression. +/// +/// @ingroup CPUTypes +#define FFX_TRUE (1) + +/// A define for a false value in a boolean expression. +/// +/// @ingroup CPUTypes +#define FFX_FALSE (0) + +#if !defined(FFX_STATIC) +/// A define to abstract declaration of static variables and functions. +/// +/// @ingroup CPUTypes +#define FFX_STATIC static +#endif // #if !defined(FFX_STATIC) + +/// @defgroup CPUCore CPU Core +/// Core CPU-side defines and functions +/// +/// @ingroup ffxHost + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wunused-variable" +#endif + +/// Interpret the bit layout of an IEEE-754 floating point value as an unsigned integer. +/// +/// @param [in] x A 32bit floating value. +/// +/// @returns +/// An unsigned 32bit integer value containing the bit pattern of x. +/// +/// @ingroup CPUCore +FFX_STATIC FfxUInt32 ffxAsUInt32(FfxFloat32 x) +{ + union + { + FfxFloat32 f; + FfxUInt32 u; + } bits; + + bits.f = x; + return bits.u; +} + +FFX_STATIC FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b) +{ + return a[0] * b[0] + a[1] * b[1]; +} + +FFX_STATIC FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b) +{ + return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; +} + +FFX_STATIC FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b) +{ + return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]; +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the GLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup CPUCore +FFX_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) +{ + return y * t + (-x * t + x); +} + +/// Compute the reciprocal of a value. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal value of x. +/// +/// @ingroup CPUCore +FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 x) +{ + return 1.0f / x; +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup CPUCore +FFX_STATIC FfxFloat32 ffxSqrt(FfxFloat32 x) +{ + return FfxFloat32(sqrt(x)); +} + +FFX_STATIC FfxUInt32 ffxAShrSU1(FfxUInt32 a, FfxUInt32 b) +{ + return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup CPUCore +FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 x) +{ + return x - FfxFloat32(floor(x)); +} + +/// Compute the reciprocal square root of a value. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal square root value of x. +/// +/// @ingroup CPUCore +FFX_STATIC FfxFloat32 ffxRsqrt(FfxFloat32 x) +{ + return ffxReciprocal(ffxSqrt(x)); +} + +FFX_STATIC FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y) +{ + return x < y ? x : y; +} + +FFX_STATIC FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y) +{ + return x < y ? x : y; +} + +FFX_STATIC FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y) +{ + return x > y ? x : y; +} + +FFX_STATIC FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y) +{ + return x > y ? x : y; +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup CPUCore +FFX_STATIC FfxFloat32 ffxSaturate(FfxFloat32 x) +{ + return ffxMin(1.0f, ffxMax(0.0f, x)); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +FFX_STATIC void ffxOpAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d[0] = a[0] + b; + d[1] = a[1] + b; + d[2] = a[2] + b; + return; +} + +FFX_STATIC void ffxOpACpyF3(FfxFloat32x3 d, FfxFloat32x3 a) +{ + d[0] = a[0]; + d[1] = a[1]; + d[2] = a[2]; + return; +} + +FFX_STATIC void ffxOpAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) +{ + d[0] = a[0] * b[0]; + d[1] = a[1] * b[1]; + d[2] = a[2] * b[2]; + return; +} + +FFX_STATIC void ffxOpAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d[0] = a[0] * b; + d[1] = a[1] * b; + d[2] = a[2] * b; + return; +} + +FFX_STATIC void ffxOpARcpF3(FfxFloat32x3 d, FfxFloat32x3 a) +{ + d[0] = ffxReciprocal(a[0]); + d[1] = ffxReciprocal(a[1]); + d[2] = ffxReciprocal(a[2]); + return; +} + +/// Convert FfxFloat32 to half (in lower 16-bits of output). +/// +/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf +/// +/// The function supports denormals. +/// +/// Some conversion rules are to make computations possibly "safer" on the GPU, +/// -INF & -NaN -> -65504 +/// +INF & +NaN -> +65504 +/// +/// @param [in] f The 32bit floating point value to convert. +/// +/// @returns +/// The closest 16bit floating point value to f. +/// +/// @ingroup CPUCore +FFX_STATIC FfxUInt32 ffxF32ToF16(FfxFloat32 f) +{ + static FfxUInt16 base[512] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, + 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, + 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, + 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, + 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff + }; + + static FfxUInt8 shift[512] = { + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18 + }; + + union + { + FfxFloat32 f; + FfxUInt32 u; + } bits; + + bits.f = f; + FfxUInt32 u = bits.u; + FfxUInt32 i = u >> 23; + return (FfxUInt32)(base[i]) + ((u & 0x7fffff) >> shift[i]); +} + +/// Pack 2x32-bit floating point values in a single 32bit value. +/// +/// This function first converts each component of value into their nearest 16-bit floating +/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the +/// 32bit unsigned integer respectively. +/// +/// @param [in] x A 2-dimensional floating point value to convert and pack. +/// +/// @returns +/// A packed 32bit value containing 2 16bit floating point values. +/// +/// @ingroup CPUCore +FFX_STATIC FfxUInt32 ffxPackHalf2x16(FfxFloat32x2 x) +{ + return ffxF32ToF16(x[0]) + (ffxF32ToF16(x[1]) << 16); +} diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_cpu.h.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_cpu.h.meta new file mode 100644 index 0000000..67a62c5 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_cpu.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 21ca63a53d7db7b4fa15d57db61f6129 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_glsl.h b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_glsl.h new file mode 100644 index 0000000..c8dccac --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_glsl.h @@ -0,0 +1,1916 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// @defgroup GLSLCore GLSL Core +/// GLSL core defines and functions +/// +/// @ingroup FfxGLSL + +/// A define for abstracting select functionality for pre/post HLSL 21 +/// +/// @ingroup GLSLCore +#define FFX_SELECT(cond, arg1, arg2) cond ? arg1 : arg2 + +/// A define for abstracting shared memory between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_GROUPSHARED shared + +/// A define for abstracting compute memory barriers between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_GROUP_MEMORY_BARRIER groupMemoryBarrier(); barrier() + +/// A define for abstracting compute atomic additions between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_ATOMIC_ADD(x, y) atomicAdd(x, y) + +/// A define for abstracting compute atomic additions between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_ATOMIC_ADD_RETURN(x, y, r) r = atomicAdd(x, y) + +/// A define for abstracting compute atomic OR between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_ATOMIC_OR(x, y) atomicOr(x, y) + +/// A define for abstracting compute atomic min between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_ATOMIC_MIN(x, y) atomicMin(x, y) + +/// A define for abstracting compute atomic max between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_ATOMIC_MAX(x, y) atomicMax(x, y) + +/// A define added to accept static markup on functions to aid CPU/GPU portability of code. +/// +/// @ingroup GLSLCore +#define FFX_STATIC + +/// A define for abstracting loop unrolling between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_UNROLL + +/// A define for abstracting a 'greater than' comparison operator between two types. +/// +/// @ingroup GLSLCore +#define FFX_GREATER_THAN(x, y) greaterThan(x, y) + +/// A define for abstracting a 'greater than or equal' comparison operator between two types. +/// +/// @ingroup GLSLCore +#define FFX_GREATER_THAN_EQUAL(x, y) greaterThanEqual(x, y) + +/// A define for abstracting a 'less than' comparison operator between two types. +/// +/// @ingroup GLSLCore +#define FFX_LESS_THAN(x, y) lessThan(x, y) + +/// A define for abstracting a 'less than or equal' comparison operator between two types. +/// +/// @ingroup GLSLCore +#define FFX_LESS_THAN_EQUAL(x, y) lessThanEqual(x, y) + +/// A define for abstracting an 'equal' comparison operator between two types. +/// +/// @ingroup GLSLCore +#define FFX_EQUAL(x, y) equal(x, y) + +/// A define for abstracting a 'not equal' comparison operator between two types. +/// +/// @ingroup GLSLCore +#define FFX_NOT_EQUAL(x, y) notEqual(x, y) + +/// A define for abstracting matrix multiply operations between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_MATRIX_MULTIPLY(a, b) (a * b) + +/// A define for abstracting vector transformations between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_TRANSFORM_VECTOR(a, b) (a * b) + +/// A define for abstracting modulo operations between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_MODULO(a, b) (mod(a, b)) + +/// Broadcast a scalar value to a 1-dimensional floating point vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 2-dimensional floating point vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32x2(FfxFloat32(x)) + +/// Broadcast a scalar value to a 3-dimensional floating point vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32x3(FfxFloat32(x)) + +/// Broadcast a scalar value to a 4-dimensional floating point vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32x4(FfxFloat32(x)) + +/// Broadcast a scalar value to a 1-dimensional unsigned integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_UINT32(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 2-dimensional unsigned integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_UINT32X2(x) FfxUInt32x2(FfxUInt32(x)) + +/// Broadcast a scalar value to a 3-dimensional unsigned integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_UINT32X3(x) FfxUInt32x3(FfxUInt32(x)) + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_UINT32X4(x) FfxUInt32x4(FfxUInt32(x)) + +/// Broadcast a scalar value to a 1-dimensional signed integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_INT32(x) FfxInt32(x) + +/// Broadcast a scalar value to a 2-dimensional signed integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_INT32X2(x) FfxInt32x2(FfxInt32(x)) + +/// Broadcast a scalar value to a 3-dimensional signed integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_INT32X3(x) FfxInt32x3(FfxInt32(x)) + +/// Broadcast a scalar value to a 4-dimensional signed integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_INT32X4(x) FfxInt32x4(FfxInt32(x)) + +/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_FLOAT16(x) FFX_MIN16_F(x) + +/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_FLOAT16X2(x) FFX_MIN16_F2(FFX_MIN16_F(x)) + +/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_FLOAT16X3(x) FFX_MIN16_F3(FFX_MIN16_F(x)) + +/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_FLOAT16X4(x) FFX_MIN16_F4(FFX_MIN16_F(x)) + +/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_UINT16(x) FFX_MIN16_U(x) + +/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_UINT16X2(x) FFX_MIN16_U2(FFX_MIN16_U(x)) + +/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_UINT16X3(x) FFX_MIN16_U3(FFX_MIN16_U(x)) + +/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_UINT16X4(x) FFX_MIN16_U4(FFX_MIN16_U(x)) + +/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_INT16(x) FFX_MIN16_I(x) + +/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_INT16X2(x) FFX_MIN16_I2(FFX_MIN16_I(x)) + +/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_INT16X3(x) FFX_MIN16_I3(FFX_MIN16_I(x)) + +/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector. +/// +/// @ingroup GLSLCore +#define FFX_BROADCAST_MIN_INT16X4(x) FFX_MIN16_I4(FFX_MIN16_I(x)) + + #extension GL_EXT_shader_explicit_arithmetic_types : require +#if !defined(FFX_SKIP_EXT) +#if FFX_HALF + #extension GL_EXT_shader_16bit_storage : require +#endif // FFX_HALF + +#if defined(FFX_LONG) + #extension GL_ARB_gpu_shader_int64 : require + #extension GL_NV_shader_atomic_int64 : require +#endif // #if defined(FFX_LONG) + +#if defined(FFX_WAVE) + #extension GL_KHR_shader_subgroup_arithmetic : require + #extension GL_KHR_shader_subgroup_ballot : require + #extension GL_KHR_shader_subgroup_quad : require + #extension GL_KHR_shader_subgroup_shuffle : require +#endif // #if defined(FFX_WAVE) +#endif // #if !defined(FFX_SKIP_EXT) + +// Forward declarations +FfxFloat32 ffxSqrt(FfxFloat32 x); +FfxFloat32x2 ffxSqrt(FfxFloat32x2 x); +FfxFloat32x3 ffxSqrt(FfxFloat32x3 x); +FfxFloat32x4 ffxSqrt(FfxFloat32x4 x); + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxAsFloat(FfxUInt32 x) +{ + return uintBitsToFloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x) +{ + return uintBitsToFloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x) +{ + return uintBitsToFloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x) +{ + return uintBitsToFloat(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup GLSLCore +FfxUInt32 ffxAsUInt32(FfxFloat32 x) +{ + return floatBitsToUint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup GLSLCore +FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x) +{ + return floatBitsToUint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup GLSLCore +FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x) +{ + return floatBitsToUint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup GLSLCore +FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x) +{ + return floatBitsToUint(x); +} + +/// Pack 2x32-bit floating point values in a single 32bit value. +/// +/// This function first converts each component of value into their nearest 16-bit floating +/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the +/// 32bit unsigned integer respectively. +/// +/// @param [in] value A 2-dimensional floating point value to convert and pack. +/// +/// @returns +/// A packed 32bit value containing 2 16bit floating point values. +/// +/// @ingroup GLSLCore +FfxUInt32 ffxPackHalf2x16(FfxFloat32x2 value) +{ + return packHalf2x16(value); +} + +/// Convert a 32bit IEEE 754 floating point value to its nearest 16bit equivalent. +/// +/// @param [in] value The value to convert. +/// +/// @returns +/// The nearest 16bit equivalent of value. +/// +/// @ingroup GLSLCore +FfxUInt32 ffxF32ToF16(FfxFloat32 value) +{ + return packHalf2x16(FfxFloat32x2(value, 0.0)); +} + +/// Broadcast a scalar value to a 2-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional floating point vector with value in each component. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxBroadcast2(FfxFloat32 value) +{ + return FfxFloat32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional floating point vector with value in each component. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxBroadcast3(FfxFloat32 value) +{ + return FfxFloat32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional floating point vector with value in each component. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxBroadcast4(FfxFloat32 value) +{ + return FfxFloat32x4(value, value, value, value); +} + +/// Broadcast a scalar value to a 2-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional signed integer vector with value in each component. +/// +/// @ingroup GLSLCore +FfxInt32x2 ffxBroadcast2(FfxInt32 value) +{ + return FfxInt32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional signed integer vector with value in each component. +/// +/// @ingroup GLSLCore +FfxInt32x3 ffxBroadcast3(FfxInt32 value) +{ + return FfxInt32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional signed integer vector with value in each component. +/// +/// @ingroup GLSLCore +FfxInt32x4 ffxBroadcast4(FfxInt32 value) +{ + return FfxInt32x4(value, value, value, value); +} + +/// Broadcast a scalar value to a 2-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup GLSLCore +FfxUInt32x2 ffxBroadcast2(FfxUInt32 value) +{ + return FfxUInt32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup GLSLCore +FfxUInt32x3 ffxBroadcast3(FfxUInt32 value) +{ + return FfxUInt32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup GLSLCore +FfxUInt32x4 ffxBroadcast4(FfxUInt32 value) +{ + return FfxUInt32x4(value, value, value, value); +} + +/// +/// +/// @ingroup GLSLCore +FfxUInt32 ffxBitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits) +{ + return bitfieldExtract(src, FfxInt32(off), FfxInt32(bits)); +} + +/// +/// +/// @ingroup GLSLCore +FfxUInt32 ffxBitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask) +{ + return (ins & mask) | (src & (~mask)); +} + +// Proxy for V_BFI_B32 where the 'mask' is set as 'bits', 'mask=(1<mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) +{ + return mix(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the GLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t) +{ + return mix(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the GLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t) +{ + return mix(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the GLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t) +{ + return mix(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the GLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t) +{ + return mix(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the GLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t) +{ + return mix(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the GLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t) +{ + return mix(x, y, t); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on +/// GCN or RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on +/// GCN or RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on +/// GCN or RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on +/// GCN or RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on +/// GCN or RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on +/// GCN or RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) +{ + return max(x, max(y, z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on +/// GCN and RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup GLSLCore +FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) +{ + return min(x, min(y, z)); +} + +/// Compute the reciprocal of a value. +/// +/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function rcp can be used. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal value of x. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxReciprocal(FfxFloat32 x) +{ + return FfxFloat32(1.0) / x; +} + +/// Compute the reciprocal of a value. +/// +/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function rcp can be used. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal value of x. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxReciprocal(FfxFloat32x2 x) +{ + return ffxBroadcast2(1.0) / x; +} + +/// Compute the reciprocal of a value. +/// +/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function rcp can be used. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal value of x. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxReciprocal(FfxFloat32x3 x) +{ + return ffxBroadcast3(1.0) / x; +} + +/// Compute the reciprocal of a value. +/// +/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function rcp can be used. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal value of x. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxReciprocal(FfxFloat32x4 x) +{ + return ffxBroadcast4(1.0) / x; +} + +/// Compute the reciprocal square root of a value. +/// +/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function rsqrt can be used. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal square root value of x. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxRsqrt(FfxFloat32 x) +{ + return FfxFloat32(1.0) / ffxSqrt(x); +} + +/// Compute the reciprocal square root of a value. +/// +/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function rsqrt can be used. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal square root value of x. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxRsqrt(FfxFloat32x2 x) +{ + return ffxBroadcast2(1.0) / ffxSqrt(x); +} + +/// Compute the reciprocal square root of a value. +/// +/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function rsqrt can be used. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal square root value of x. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxRsqrt(FfxFloat32x3 x) +{ + return ffxBroadcast3(1.0) / ffxSqrt(x); +} + +/// Compute the reciprocal square root of a value. +/// +/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function rsqrt can be used. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal square root value of x. +/// +/// @ingroup GLSLCore +FfxFloat32x4 rsqrt(FfxFloat32x4 x) +{ + return ffxBroadcast4(1.0) / ffxSqrt(x); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxSaturate(FfxFloat32 x) +{ + return clamp(x, FfxFloat32(0.0), FfxFloat32(1.0)); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxSaturate(FfxFloat32x2 x) +{ + return clamp(x, ffxBroadcast2(0.0), ffxBroadcast2(1.0)); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxSaturate(FfxFloat32x3 x) +{ + return clamp(x, ffxBroadcast3(0.0), ffxBroadcast3(1.0)); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxSaturate(FfxFloat32x4 x) +{ + return clamp(x, ffxBroadcast4(0.0), ffxBroadcast4(1.0)); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxFract(FfxFloat32 x) +{ + return fract(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxFract(FfxFloat32x2 x) +{ + return fract(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxFract(FfxFloat32x3 x) +{ + return fract(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxFract(FfxFloat32x4 x) +{ + return fract(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxRound(FfxFloat32 x) +{ + return roundEven(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxRound(FfxFloat32x2 x) +{ + return roundEven(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxRound(FfxFloat32x3 x) +{ + return roundEven(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxRound(FfxFloat32x4 x) +{ + return roundEven(x); +} + +FfxUInt32 ffxAShrSU1(FfxUInt32 a, FfxUInt32 b) +{ + return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); +} + +FfxUInt32 ffxPackF32(FfxFloat32x2 v){ + return packHalf2x16(v); +} + +FfxFloat32x2 ffxUnpackF32(FfxUInt32 u){ + return unpackHalf2x16(u); +} + +FfxUInt32x2 ffxPackF32x2(FfxFloat32x4 v){ + return FfxUInt32x2(ffxPackF32(v.xy), ffxPackF32(v.zw)); +} + +FfxFloat32x4 ffxUnpackF32x2(FfxUInt32x2 a){ + return FfxFloat32x4(ffxUnpackF32(a.x), ffxUnpackF32(a.y)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32 ffxInvertSafe(FfxFloat32 v){ + FfxFloat32 s = sign(v); + FfxFloat32 s2 = s*s; + return s2/(v + s2 - 1.0); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x2 ffxInvertSafe(FfxFloat32x2 v){ + FfxFloat32x2 s = sign(v); + FfxFloat32x2 s2 = s*s; + return s2/(v + s2 - FfxFloat32x2(1.0, 1.0)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x3 ffxInvertSafe(FfxFloat32x3 v){ + FfxFloat32x3 s = sign(v); + FfxFloat32x3 s2 = s*s; + return s2/(v + s2 - FfxFloat32x3(1.0, 1.0, 1.0)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x4 ffxInvertSafe(FfxFloat32x4 v){ + FfxFloat32x4 s = sign(v); + FfxFloat32x4 s2 = s*s; + return s2/(v + s2 - FfxFloat32x4(1.0, 1.0, 1.0, 1.0)); +} +#if FFX_HALF +#define FFX_UINT32_TO_FLOAT16X2(x) unpackFloat2x16(FfxUInt32(x)) + +FfxUInt32 ffxPackF16(FfxFloat16x2 v){ + return packHalf2x16(v); +} + +FfxFloat16x2 ffxUnpackF16(FfxUInt32 u){ + return FfxFloat16x2(unpackHalf2x16(u)); +} + +FfxFloat16x4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x) +{ + return FfxFloat16x4(unpackFloat2x16(x.x), unpackFloat2x16(x.y)); +} +#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x)) +#define FFX_UINT32_TO_UINT16X2(x) unpackUint2x16(FfxUInt32(x)) +#define FFX_UINT32X2_TO_UINT16X4(x) unpackUint4x16(pack64(FfxUInt32x2(x))) +//------------------------------------------------------------------------------------------------------------------------------ +#define FFX_FLOAT16X2_TO_UINT32(x) packFloat2x16(FfxFloat16x2(x)) +FfxUInt32x2 ffxFloat16x4ToUint32x2(FfxFloat16x4 x) +{ + return FfxUInt32x2(packFloat2x16(x.xy), packFloat2x16(x.zw)); +} +#define FFX_FLOAT16X4_TO_UINT32X2(x) ffxFloat16x4ToUint32x2(FfxFloat16x4(x)) +#define FFX_UINT16X2_TO_UINT32(x) packUint2x16(FfxUInt16x2(x)) +#define FFX_UINT16X4_TO_UINT32X2(x) unpack32(packUint4x16(FfxUInt16x4(x))) +//============================================================================================================================== +#define FFX_TO_UINT16(x) halfBitsToUint16(FfxFloat16(x)) +#define FFX_TO_UINT16X2(x) halfBitsToUint16(FfxFloat16x2(x)) +#define FFX_TO_UINT16X3(x) halfBitsToUint16(FfxFloat16x3(x)) +#define FFX_TO_UINT16X4(x) halfBitsToUint16(FfxFloat16x4(x)) +//------------------------------------------------------------------------------------------------------------------------------ +#define FFX_TO_FLOAT16(x) uint16BitsToHalf(FfxUInt16(x)) +#define FFX_TO_FLOAT16X2(x) uint16BitsToHalf(FfxUInt16x2(x)) +#define FFX_TO_FLOAT16X3(x) uint16BitsToHalf(FfxUInt16x3(x)) +#define FFX_TO_FLOAT16X4(x) uint16BitsToHalf(FfxUInt16x4(x)) +//============================================================================================================================== +FfxFloat16 ffxBroadcastFloat16(FfxFloat16 a) +{ + return FfxFloat16(a); +} +FfxFloat16x2 ffxBroadcastFloat16x2(FfxFloat16 a) +{ + return FfxFloat16x2(a, a); +} +FfxFloat16x3 ffxBroadcastFloat16x3(FfxFloat16 a) +{ + return FfxFloat16x3(a, a, a); +} +FfxFloat16x4 ffxBroadcastFloat16x4(FfxFloat16 a) +{ + return FfxFloat16x4(a, a, a, a); +} +#define FFX_BROADCAST_FLOAT16(a) FfxFloat16(a) +#define FFX_BROADCAST_FLOAT16X2(a) FfxFloat16x2(FfxFloat16(a)) +#define FFX_BROADCAST_FLOAT16X3(a) FfxFloat16x3(FfxFloat16(a)) +#define FFX_BROADCAST_FLOAT16X4(a) FfxFloat16x4(FfxFloat16(a)) +//------------------------------------------------------------------------------------------------------------------------------ +FfxInt16 ffxBroadcastInt16(FfxInt16 a) +{ + return FfxInt16(a); +} +FfxInt16x2 ffxBroadcastInt16x2(FfxInt16 a) +{ + return FfxInt16x2(a, a); +} +FfxInt16x3 ffxBroadcastInt16x3(FfxInt16 a) +{ + return FfxInt16x3(a, a, a); +} +FfxInt16x4 ffxBroadcastInt16x4(FfxInt16 a) +{ + return FfxInt16x4(a, a, a, a); +} +#define FFX_BROADCAST_INT16(a) FfxInt16(a) +#define FFX_BROADCAST_INT16X2(a) FfxInt16x2(FfxInt16(a)) +#define FFX_BROADCAST_INT16X3(a) FfxInt16x3(FfxInt16(a)) +#define FFX_BROADCAST_INT16X4(a) FfxInt16x4(FfxInt16(a)) +//------------------------------------------------------------------------------------------------------------------------------ +FfxUInt16 ffxBroadcastUInt16(FfxUInt16 a) +{ + return FfxUInt16(a); +} +FfxUInt16x2 ffxBroadcastUInt16x2(FfxUInt16 a) +{ + return FfxUInt16x2(a, a); +} +FfxUInt16x3 ffxBroadcastUInt16x3(FfxUInt16 a) +{ + return FfxUInt16x3(a, a, a); +} +FfxUInt16x4 ffxBroadcastUInt16x4(FfxUInt16 a) +{ + return FfxUInt16x4(a, a, a, a); +} +#define FFX_BROADCAST_UINT16(a) FfxUInt16(a) +#define FFX_BROADCAST_UINT16X2(a) FfxUInt16x2(FfxUInt16(a)) +#define FFX_BROADCAST_UINT16X3(a) FfxUInt16x3(FfxUInt16(a)) +#define FFX_BROADCAST_UINT16X4(a) FfxUInt16x4(FfxUInt16(a)) +//============================================================================================================================== +FfxUInt16 ffxAbsHalf(FfxUInt16 a) +{ + return FfxUInt16(abs(FfxInt16(a))); +} +FfxUInt16x2 ffxAbsHalf(FfxUInt16x2 a) +{ + return FfxUInt16x2(abs(FfxInt16x2(a))); +} +FfxUInt16x3 ffxAbsHalf(FfxUInt16x3 a) +{ + return FfxUInt16x3(abs(FfxInt16x3(a))); +} +FfxUInt16x4 ffxAbsHalf(FfxUInt16x4 a) +{ + return FfxUInt16x4(abs(FfxInt16x4(a))); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxClampHalf(FfxFloat16 x, FfxFloat16 n, FfxFloat16 m) +{ + return clamp(x, n, m); +} +FfxFloat16x2 ffxClampHalf(FfxFloat16x2 x, FfxFloat16x2 n, FfxFloat16x2 m) +{ + return clamp(x, n, m); +} +FfxFloat16x3 ffxClampHalf(FfxFloat16x3 x, FfxFloat16x3 n, FfxFloat16x3 m) +{ + return clamp(x, n, m); +} +FfxFloat16x4 ffxClampHalf(FfxFloat16x4 x, FfxFloat16x4 n, FfxFloat16x4 m) +{ + return clamp(x, n, m); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxFract(FfxFloat16 x) +{ + return fract(x); +} +FfxFloat16x2 ffxFract(FfxFloat16x2 x) +{ + return fract(x); +} +FfxFloat16x3 ffxFract(FfxFloat16x3 x) +{ + return fract(x); +} +FfxFloat16x4 ffxFract(FfxFloat16x4 x) +{ + return fract(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxLerp(FfxFloat16 x, FfxFloat16 y, FfxFloat16 a) +{ + return mix(x, y, a); +} +FfxFloat16x2 ffxLerp(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16 a) +{ + return mix(x, y, a); +} +FfxFloat16x2 ffxLerp(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 a) +{ + return mix(x, y, a); +} +FfxFloat16x3 ffxLerp(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 a) +{ + return mix(x, y, a); +} +FfxFloat16x3 ffxLerp(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16 a) +{ + return mix(x, y, a); +} +FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16 a) +{ + return mix(x, y, a); +} +FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 a) +{ + return mix(x, y, a); +} +//------------------------------------------------------------------------------------------------------------------------------ +// No packed version of ffxMax3. +FfxFloat16 ffxMax3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + return max(x, max(y, z)); +} +FfxFloat16x2 ffxMax3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + return max(x, max(y, z)); +} +FfxFloat16x3 ffxMax3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + return max(x, max(y, z)); +} +FfxFloat16x4 ffxMax3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + return max(x, max(y, z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +// No packed version of ffxMin3. +FfxFloat16 ffxMin3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + return min(x, min(y, z)); +} +FfxFloat16x2 ffxMin3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + return min(x, min(y, z)); +} +FfxFloat16x3 ffxMin3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + return min(x, min(y, z)); +} +FfxFloat16x4 ffxMin3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + return min(x, min(y, z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxMed3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxFloat16x2 ffxMed3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxFloat16x3 ffxMed3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxFloat16x4 ffxMed3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxReciprocalHalf(FfxFloat16 x) +{ + return FFX_BROADCAST_FLOAT16(1.0) / x; +} +FfxFloat16x2 ffxReciprocalHalf(FfxFloat16x2 x) +{ + return FFX_BROADCAST_FLOAT16X2(1.0) / x; +} +FfxFloat16x3 ffxReciprocalHalf(FfxFloat16x3 x) +{ + return FFX_BROADCAST_FLOAT16X3(1.0) / x; +} +FfxFloat16x4 ffxReciprocalHalf(FfxFloat16x4 x) +{ + return FFX_BROADCAST_FLOAT16X4(1.0) / x; +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxReciprocalSquareRootHalf(FfxFloat16 x) +{ + return FFX_BROADCAST_FLOAT16(1.0) / sqrt(x); +} +FfxFloat16x2 ffxReciprocalSquareRootHalf(FfxFloat16x2 x) +{ + return FFX_BROADCAST_FLOAT16X2(1.0) / sqrt(x); +} +FfxFloat16x3 ffxReciprocalSquareRootHalf(FfxFloat16x3 x) +{ + return FFX_BROADCAST_FLOAT16X3(1.0) / sqrt(x); +} +FfxFloat16x4 ffxReciprocalSquareRootHalf(FfxFloat16x4 x) +{ + return FFX_BROADCAST_FLOAT16X4(1.0) / sqrt(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxSaturate(FfxFloat16 x) +{ + return clamp(x, FFX_BROADCAST_FLOAT16(0.0), FFX_BROADCAST_FLOAT16(1.0)); +} +FfxFloat16x2 ffxSaturate(FfxFloat16x2 x) +{ + return clamp(x, FFX_BROADCAST_FLOAT16X2(0.0), FFX_BROADCAST_FLOAT16X2(1.0)); +} +FfxFloat16x3 ffxSaturate(FfxFloat16x3 x) +{ + return clamp(x, FFX_BROADCAST_FLOAT16X3(0.0), FFX_BROADCAST_FLOAT16X3(1.0)); +} +FfxFloat16x4 ffxSaturate(FfxFloat16x4 x) +{ + return clamp(x, FFX_BROADCAST_FLOAT16X4(0.0), FFX_BROADCAST_FLOAT16X4(1.0)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxUInt16 ffxBitShiftRightHalf(FfxUInt16 a, FfxUInt16 b) +{ + return FfxUInt16(FfxInt16(a) >> FfxInt16(b)); +} +FfxUInt16x2 ffxBitShiftRightHalf(FfxUInt16x2 a, FfxUInt16x2 b) +{ + return FfxUInt16x2(FfxInt16x2(a) >> FfxInt16x2(b)); +} +FfxUInt16x3 ffxBitShiftRightHalf(FfxUInt16x3 a, FfxUInt16x3 b) +{ + return FfxUInt16x3(FfxInt16x3(a) >> FfxInt16x3(b)); +} +FfxUInt16x4 ffxBitShiftRightHalf(FfxUInt16x4 a, FfxUInt16x4 b) +{ + return FfxUInt16x4(FfxInt16x4(a) >> FfxInt16x4(b)); +} +#endif // FFX_HALF + +#if defined(FFX_WAVE) +// Where 'x' must be a compile time literal. +FfxFloat32 ffxWaveXorF1(FfxFloat32 v, FfxUInt32 x) +{ + return subgroupShuffleXor(v, x); +} +FfxFloat32x2 ffxWaveXorF2(FfxFloat32x2 v, FfxUInt32 x) +{ + return subgroupShuffleXor(v, x); +} +FfxFloat32x3 ffxWaveXorF3(FfxFloat32x3 v, FfxUInt32 x) +{ + return subgroupShuffleXor(v, x); +} +FfxFloat32x4 ffxWaveXorF4(FfxFloat32x4 v, FfxUInt32 x) +{ + return subgroupShuffleXor(v, x); +} +FfxUInt32 ffxWaveXorU1(FfxUInt32 v, FfxUInt32 x) +{ + return subgroupShuffleXor(v, x); +} +FfxUInt32x2 ffxWaveXorU2(FfxUInt32x2 v, FfxUInt32 x) +{ + return subgroupShuffleXor(v, x); +} +FfxUInt32x3 ffxWaveXorU3(FfxUInt32x3 v, FfxUInt32 x) +{ + return subgroupShuffleXor(v, x); +} +FfxUInt32x4 ffxWaveXorU4(FfxUInt32x4 v, FfxUInt32 x) +{ + return subgroupShuffleXor(v, x); +} +FfxBoolean ffxWaveIsFirstLane() +{ + return subgroupElect(); +} +FfxUInt32 ffxWaveLaneIndex() +{ + return gl_SubgroupInvocationID; +} +FfxBoolean ffxWaveReadAtLaneIndexB1(FfxBoolean v, FfxUInt32 x ) +{ + return subgroupShuffle(v, x); +} +FfxUInt32 ffxWavePrefixCountBits(FfxBoolean v) +{ + return subgroupBallotExclusiveBitCount(subgroupBallot(v)); +} +FfxUInt32 ffxWaveActiveCountBits(FfxBoolean v) +{ + return subgroupBallotBitCount(subgroupBallot(v)); +} +FfxUInt32 ffxWaveReadLaneFirstU1(FfxUInt32 v) +{ + return subgroupBroadcastFirst(v); +} +FfxUInt32x2 ffxWaveReadLaneFirstU2(FfxUInt32x2 v) +{ + return subgroupBroadcastFirst(v); +} +FfxBoolean ffxWaveReadLaneFirstB1(FfxBoolean v) +{ + return subgroupBroadcastFirst(v); +} +FfxUInt32 ffxWaveOr(FfxUInt32 a) +{ + return subgroupOr(a); +} +FfxUInt32 ffxWaveMin(FfxUInt32 a) +{ + return subgroupMin(a); +} +FfxFloat32 ffxWaveMin(FfxFloat32 a) +{ + return subgroupMin(a); +} +FfxUInt32 ffxWaveMax(FfxUInt32 a) +{ + return subgroupMax(a); +} +FfxFloat32 ffxWaveMax(FfxFloat32 a) +{ + return subgroupMax(a); +} +FfxUInt32 ffxWaveSum(FfxUInt32 a) +{ + return subgroupAdd(a); +} +FfxFloat32 ffxWaveSum(FfxFloat32 a) +{ + return subgroupAdd(a); +} +FfxUInt32 ffxWaveLaneCount() +{ + return gl_SubgroupSize; +} +#if defined(FFX_WAVE_ALL_TRUE) +FfxBoolean ffxWaveAllTrue(FfxBoolean v) +{ + return subgroupAll(v); +} +#endif +FfxFloat32 ffxQuadReadX(FfxFloat32 v) +{ + return subgroupQuadSwapHorizontal(v); +} +FfxFloat32x2 ffxQuadReadX(FfxFloat32x2 v) +{ + return subgroupQuadSwapHorizontal(v); +} +FfxFloat32 ffxQuadReadY(FfxFloat32 v) +{ + return subgroupQuadSwapVertical(v); +} +FfxFloat32x2 ffxQuadReadY(FfxFloat32x2 v) +{ + return subgroupQuadSwapVertical(v); +} + +//------------------------------------------------------------------------------------------------------------------------------ +#if FFX_HALF +FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x) +{ + return FFX_UINT32_TO_FLOAT16X2(subgroupShuffleXor(FFX_FLOAT16X2_TO_UINT32(v), x)); +} +FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x) +{ + return FFX_UINT32X2_TO_FLOAT16X4(subgroupShuffleXor(FFX_FLOAT16X4_TO_UINT32X2(v), x)); +} +FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x) +{ + return FFX_UINT32_TO_UINT16X2(subgroupShuffleXor(FFX_UINT16X2_TO_UINT32(v), x)); +} +FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x) +{ + return FFX_UINT32X2_TO_UINT16X4(subgroupShuffleXor(FFX_UINT16X4_TO_UINT32X2(v), x)); +} +#endif // FFX_HALF +#endif // #if defined(FFX_WAVE) diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_glsl.h.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_glsl.h.meta new file mode 100644 index 0000000..119ff2d --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_glsl.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: a1cef7785827878448d5c78f36c70d4b +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_gpu_common.h b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_gpu_common.h new file mode 100644 index 0000000..9f88c94 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_gpu_common.h @@ -0,0 +1,2736 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// A define for a true value in a boolean expression. +/// +/// @ingroup GPUCore +#define FFX_TRUE (true) + +/// A define for a false value in a boolean expression. +/// +/// @ingroup GPUCore +#define FFX_FALSE (false) + +/// A define value for positive infinity. +/// +/// @ingroup GPUCore +#define FFX_POSITIVE_INFINITY_FLOAT ffxAsFloat(0x7f800000u) + +/// A define value for negative infinity. +/// +/// @ingroup GPUCore +#define FFX_NEGATIVE_INFINITY_FLOAT ffxAsFloat(0xff800000u) + +/// A define value for PI. +/// +/// @ingroup GPUCore +#define FFX_PI (3.14159) + +FFX_STATIC const FfxFloat32 FFX_FP16_MIN = 6.10e-05f; +FFX_STATIC const FfxFloat32 FFX_FP16_MAX = 65504.0f; +FFX_STATIC const FfxFloat32 FFX_TONEMAP_EPSILON = 1.0f / FFX_FP16_MAX; + +#define FFX_HAS_FLAG(v, f) ((v & f) == f) + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxMin(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxMin(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxMin(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32 ffxMin(FfxInt32 x, FfxInt32 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x2 ffxMin(FfxInt32x2 x, FfxInt32x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x3 ffxMin(FfxInt32x3 x, FfxInt32x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x4 ffxMin(FfxInt32x4 x, FfxInt32x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxMin(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxMin(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxMin(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return min(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxMax(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxMax(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxMax(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32 ffxMax(FfxInt32 x, FfxInt32 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x2 ffxMax(FfxInt32x2 x, FfxInt32x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x3 ffxMax(FfxInt32x3 x, FfxInt32x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x4 ffxMax(FfxInt32x4 x, FfxInt32x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxMax(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxMax(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxMax(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return max(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat32 ffxPow(FfxFloat32 x, FfxFloat32 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxPow(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxPow(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxPow(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return pow(x, y); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat32 ffxSqrt(FfxFloat32 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxSqrt(FfxFloat32x2 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxSqrt(FfxFloat32x3 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxSqrt(FfxFloat32x4 x) +{ + return sqrt(x); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat32 ffxCopySignBit(FfxFloat32 d, FfxFloat32 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & FfxUInt32(0x80000000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxCopySignBit(FfxFloat32x2 d, FfxFloat32x2 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast2(0x80000000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxCopySignBit(FfxFloat32x3 d, FfxFloat32x3 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast3(0x80000000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxCopySignBit(FfxFloat32x4 d, FfxFloat32x4 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast4(0x80000000u))); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat32 ffxIsSigned(FfxFloat32 m) +{ + return ffxSaturate(m * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxIsSigned(FfxFloat32x2 m) +{ + return ffxSaturate(m * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxIsSigned(FfxFloat32x3 m) +{ + return ffxSaturate(m * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against for have the sign set. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or positive. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxIsSigned(FfxFloat32x4 m) +{ + return ffxSaturate(m * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat32 ffxIsGreaterThanZero(FfxFloat32 m) +{ + return ffxSaturate(m * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxIsGreaterThanZero(FfxFloat32x2 m) +{ + return ffxSaturate(m * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxIsGreaterThanZero(FfxFloat32x3 m) +{ + return ffxSaturate(m * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxIsGreaterThanZero(FfxFloat32x4 m) +{ + return ffxSaturate(m * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Convert a 32bit floating point value to sortable integer. +/// +/// - If sign bit=0, flip the sign bit (positives). +/// - If sign bit=1, flip all bits (negatives). +/// +/// The function has the side effects that: +/// - Larger integers are more positive values. +/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). +/// +/// @param [in] value The floating point value to make sortable. +/// +/// @returns +/// The sortable integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxFloatToSortableInteger(FfxUInt32 value) +{ + return value ^ ((ffxAShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); +} + +/// Convert a sortable integer to a 32bit floating point value. +/// +/// The function has the side effects that: +/// - If sign bit=1, flip the sign bit (positives). +/// - If sign bit=0, flip all bits (negatives). +/// +/// @param [in] value The floating point value to make sortable. +/// +/// @returns +/// The sortable integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxSortableIntegerToFloat(FfxUInt32 value) +{ + return value ^ ((~ffxAShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateSqrt(FfxFloat32 value) +{ + return ffxAsFloat((ffxAsUInt32(value) >> FfxUInt32(1)) + FfxUInt32(0x1fbc4639)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateReciprocal(FfxFloat32 value) +{ + return ffxAsFloat(FfxUInt32(0x7ef07ebb) - ffxAsUInt32(value)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateReciprocalMedium(FfxFloat32 value) +{ + FfxFloat32 b = ffxAsFloat(FfxUInt32(0x7ef19fff) - ffxAsUInt32(value)); + return b * (-b * value + FfxFloat32(2.0)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal square root for. +/// +/// @returns +/// An approximation of the reciprocal square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateReciprocalSquareRoot(FfxFloat32 value) +{ + return ffxAsFloat(FfxUInt32(0x5f347d74) - (ffxAsUInt32(value) >> FfxUInt32(1))); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateSqrt(FfxFloat32x2 value) +{ + return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast2(1u)) + ffxBroadcast2(0x1fbc4639u)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateReciprocal(FfxFloat32x2 value) +{ + return ffxAsFloat(ffxBroadcast2(0x7ef07ebbu) - ffxAsUInt32(value)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateReciprocalMedium(FfxFloat32x2 value) +{ + FfxFloat32x2 b = ffxAsFloat(ffxBroadcast2(0x7ef19fffu) - ffxAsUInt32(value)); + return b * (-b * value + ffxBroadcast2(2.0f)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateReciprocalSquareRoot(FfxFloat32x2 value) +{ + return ffxAsFloat(ffxBroadcast2(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast2(1u))); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateSqrt(FfxFloat32x3 value) +{ + return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast3(1u)) + ffxBroadcast3(0x1fbc4639u)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateReciprocal(FfxFloat32x3 value) +{ + return ffxAsFloat(ffxBroadcast3(0x7ef07ebbu) - ffxAsUInt32(value)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateReciprocalMedium(FfxFloat32x3 value) +{ + FfxFloat32x3 b = ffxAsFloat(ffxBroadcast3(0x7ef19fffu) - ffxAsUInt32(value)); + return b * (-b * value + ffxBroadcast3(2.0f)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateReciprocalSquareRoot(FfxFloat32x3 value) +{ + return ffxAsFloat(ffxBroadcast3(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast3(1u))); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateSqrt(FfxFloat32x4 value) +{ + return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast4(1u)) + ffxBroadcast4(0x1fbc4639u)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateReciprocal(FfxFloat32x4 value) +{ + return ffxAsFloat(ffxBroadcast4(0x7ef07ebbu) - ffxAsUInt32(value)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateReciprocalMedium(FfxFloat32x4 value) +{ + FfxFloat32x4 b = ffxAsFloat(ffxBroadcast4(0x7ef19fffu) - ffxAsUInt32(value)); + return b * (-b * value + ffxBroadcast4(2.0f)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateReciprocalSquareRoot(FfxFloat32x4 value) +{ + return ffxAsFloat(ffxBroadcast4(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast4(1u))); +} + +/// Calculate dot product of 'a' and 'b'. +/// +/// @param [in] a First vector input. +/// @param [in] b Second vector input. +/// +/// @returns +/// The value of a dot b. +/// +/// @ingroup GPUCore +FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b) +{ + return dot(a, b); +} + +/// Calculate dot product of 'a' and 'b'. +/// +/// @param [in] a First vector input. +/// @param [in] b Second vector input. +/// +/// @returns +/// The value of a dot b. +/// +/// @ingroup GPUCore +FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b) +{ + return dot(a, b); +} + +/// Calculate dot product of 'a' and 'b'. +/// +/// @param [in] a First vector input. +/// @param [in] b Second vector input. +/// +/// @returns +/// The value of a dot b. +/// +/// @ingroup GPUCore +FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b) +{ + return dot(a, b); +} + + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximatePQToGamma2Medium(FfxFloat32 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximatePQToLinear(FfxFloat32 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateGamma2ToPQ(FfxFloat32 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateGamma2ToPQMedium(FfxFloat32 a) +{ + FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46)); + FfxFloat32 b4 = b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateGamma2ToPQHigh(FfxFloat32 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateLinearToPQ(FfxFloat32 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateLinearToPQMedium(FfxFloat32 a) +{ + FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723)); + FfxFloat32 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateLinearToPQHigh(FfxFloat32 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximatePQToGamma2Medium(FfxFloat32x2 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximatePQToLinear(FfxFloat32x2 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateGamma2ToPQ(FfxFloat32x2 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateGamma2ToPQMedium(FfxFloat32x2 a) +{ + FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u)); + FfxFloat32x2 b4 = b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateGamma2ToPQHigh(FfxFloat32x2 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateLinearToPQ(FfxFloat32x2 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateLinearToPQMedium(FfxFloat32x2 a) +{ + FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u)); + FfxFloat32x2 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateLinearToPQHigh(FfxFloat32x2 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximatePQToGamma2Medium(FfxFloat32x3 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximatePQToLinear(FfxFloat32x3 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateGamma2ToPQ(FfxFloat32x3 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateGamma2ToPQMedium(FfxFloat32x3 a) +{ + FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u)); + FfxFloat32x3 b4 = b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateGamma2ToPQHigh(FfxFloat32x3 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateLinearToPQ(FfxFloat32x3 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateLinearToPQMedium(FfxFloat32x3 a) +{ + FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u)); + FfxFloat32x3 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateLinearToPQHigh(FfxFloat32x3 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximatePQToGamma2Medium(FfxFloat32x4 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximatePQToLinear(FfxFloat32x4 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateGamma2ToPQ(FfxFloat32x4 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateGamma2ToPQMedium(FfxFloat32x4 a) +{ + FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u)); + FfxFloat32x4 b4 = b * b * b * b * b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateGamma2ToPQHigh(FfxFloat32x4 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateLinearToPQ(FfxFloat32x4 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateLinearToPQMedium(FfxFloat32x4 a) +{ + FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u)); + FfxFloat32x4 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateLinearToPQHigh(FfxFloat32x4 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +// An approximation of sine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate sine for. +// +// @returns +// The approximate sine of value. +FfxFloat32 ffxParabolicSin(FfxFloat32 value) +{ + return value * abs(value) - value; +} + +// An approximation of sine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate sine for. +// +// @returns +// The approximate sine of value. +FfxFloat32x2 ffxParabolicSin(FfxFloat32x2 x) +{ + return x * abs(x) - x; +} + +// An approximation of cosine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate cosine for. +// +// @returns +// The approximate cosine of value. +FfxFloat32 ffxParabolicCos(FfxFloat32 x) +{ + x = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75)); + x = x * FfxFloat32(2.0) - FfxFloat32(1.0); + return ffxParabolicSin(x); +} + +// An approximation of cosine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate cosine for. +// +// @returns +// The approximate cosine of value. +FfxFloat32x2 ffxParabolicCos(FfxFloat32x2 x) +{ + x = ffxFract(x * ffxBroadcast2(0.5f) + ffxBroadcast2(0.75f)); + x = x * ffxBroadcast2(2.0f) - ffxBroadcast2(1.0f); + return ffxParabolicSin(x); +} + +// An approximation of both sine and cosine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate cosine for. +// +// @returns +// A FfxFloat32x2 containing approximations of both sine and cosine of value. +FfxFloat32x2 ffxParabolicSinCos(FfxFloat32 x) +{ + FfxFloat32 y = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75)); + y = y * FfxFloat32(2.0) - FfxFloat32(1.0); + return ffxParabolicSin(FfxFloat32x2(x, y)); +} + +/// Conditional free logic AND operation using values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt32 ffxZeroOneAnd(FfxUInt32 x, FfxUInt32 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return min(x, y); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt32 ffxZeroOneAnd(FfxUInt32 x) +{ + return x ^ FfxUInt32(1); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x) +{ + return x ^ ffxBroadcast2(1u); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x) +{ + return x ^ ffxBroadcast3(1u); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x) +{ + return x ^ ffxBroadcast4(1u); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt32 ffxZeroOneOr(FfxUInt32 x, FfxUInt32 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxZeroOneOr(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxZeroOneOr(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxZeroOneOr(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return max(x, y); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxUInt32 ffxZeroOneAndToU1(FfxFloat32 x) +{ + return FfxUInt32(FfxFloat32(1.0) - x); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxZeroOneAndToU2(FfxFloat32x2 x) +{ + return FfxUInt32x2(ffxBroadcast2(1.0) - x); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxZeroOneAndToU3(FfxFloat32x3 x) +{ + return FfxUInt32x3(ffxBroadcast3(1.0) - x); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxZeroOneAndToU4(FfxFloat32x4 x) +{ + return FfxUInt32x4(ffxBroadcast4(1.0) - x); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneAndOr(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneAndOr(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneAndOr(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneAndOr(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return ffxSaturate(x * y + z); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneIsGreaterThanZero(FfxFloat32 x) +{ + return ffxSaturate(x * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneIsGreaterThanZero(FfxFloat32x2 x) +{ + return ffxSaturate(x * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneIsGreaterThanZero(FfxFloat32x3 x) +{ + return ffxSaturate(x * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneIsGreaterThanZero(FfxFloat32x4 x) +{ + return ffxSaturate(x * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneAnd(FfxFloat32 x) +{ + return FfxFloat32(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneAnd(FfxFloat32x2 x) +{ + return ffxBroadcast2(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneAnd(FfxFloat32x3 x) +{ + return ffxBroadcast3(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneAnd(FfxFloat32x4 x) +{ + return ffxBroadcast4(1.0) - x; +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneOr(FfxFloat32 x, FfxFloat32 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneOr(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneOr(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneOr(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return max(x, y); +} + +/// Choose between two FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneSelect(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + FfxFloat32 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneSelect(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + FfxFloat32x2 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneSelect(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + FfxFloat32x3 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneSelect(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + FfxFloat32x4 r = (-x) * z + z; + return x * y + r; +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneIsSigned(FfxFloat32 x) +{ + return ffxSaturate(x * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneIsSigned(FfxFloat32x2 x) +{ + return ffxSaturate(x * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneIsSigned(FfxFloat32x3 x) +{ + return ffxSaturate(x * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneIsSigned(FfxFloat32x4 x) +{ + return ffxSaturate(x * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] color The color to convert to Rec. 709. +/// +/// @returns +/// The color in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxRec709FromLinear(FfxFloat32 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); + return clamp(j.x, color * j.y, pow(color, j.z) * k.x + k.y); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] color The color to convert to Rec. 709. +/// +/// @returns +/// The color in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxRec709FromLinear(FfxFloat32x2 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); + return clamp(j.xx, color * j.yy, pow(color, j.zz) * k.xx + k.yy); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] color The color to convert to Rec. 709. +/// +/// @returns +/// The color in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxRec709FromLinear(FfxFloat32x3 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); + return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromRec709(FfxFloat32 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromRec709(FfxFloat32x2 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromRec709(FfxFloat32x3 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. +/// +/// @param [in] value The value to convert to gamma space from linear. +/// @param [in] power The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxGammaFromLinear(FfxFloat32 value, FfxFloat32 power) +{ + return pow(value, FfxFloat32(power)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. +/// +/// @param [in] value The value to convert to gamma space from linear. +/// @param [in] power The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxGammaFromLinear(FfxFloat32x2 value, FfxFloat32 power) +{ + return pow(value, ffxBroadcast2(power)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. +/// +/// @param [in] value The value to convert to gamma space from linear. +/// @param [in] power The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxGammaFromLinear(FfxFloat32x3 value, FfxFloat32 power) +{ + return pow(value, ffxBroadcast3(power)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromGamma(FfxFloat32 color, FfxFloat32 power) +{ + return pow(color, FfxFloat32(power)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromGamma(FfxFloat32x2 color, FfxFloat32 power) +{ + return pow(color, ffxBroadcast2(power)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromGamma(FfxFloat32x3 color, FfxFloat32 power) +{ + return pow(color, ffxBroadcast3(power)); +} + +/// Compute a PQ value from a linear value. +/// +/// @param [in] value The value to convert to PQ from linear. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxPQFromLinear(FfxFloat32 value) +{ + FfxFloat32 p = pow(value, FfxFloat32(0.159302)); + return pow((FfxFloat32(0.835938) + FfxFloat32(18.8516) * p) / (FfxFloat32(1.0) + FfxFloat32(18.6875) * p), FfxFloat32(78.8438)); +} + +/// Compute a PQ value from a linear value. +/// +/// @param [in] value The value to convert to PQ from linear. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxPQFromLinear(FfxFloat32x2 value) +{ + FfxFloat32x2 p = pow(value, ffxBroadcast2(0.159302)); + return pow((ffxBroadcast2(0.835938) + ffxBroadcast2(18.8516) * p) / (ffxBroadcast2(1.0) + ffxBroadcast2(18.6875) * p), ffxBroadcast2(78.8438)); +} + +/// Compute a PQ value from a linear value. +/// +/// @param [in] value The value to convert to PQ from linear. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxPQFromLinear(FfxFloat32x3 value) +{ + FfxFloat32x3 p = pow(value, ffxBroadcast3(0.159302)); + return pow((ffxBroadcast3(0.835938) + ffxBroadcast3(18.8516) * p) / (ffxBroadcast3(1.0) + ffxBroadcast3(18.6875) * p), ffxBroadcast3(78.8438)); +} + +/// Compute a linear value from a value in a PQ space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in PQ space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromPQ(FfxFloat32 value) +{ + FfxFloat32 p = pow(value, FfxFloat32(0.0126833)); + return pow(ffxSaturate(p - FfxFloat32(0.835938)) / (FfxFloat32(18.8516) - FfxFloat32(18.6875) * p), FfxFloat32(6.27739)); +} + +/// Compute a linear value from a value in a PQ space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in PQ space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromPQ(FfxFloat32x2 value) +{ + FfxFloat32x2 p = pow(value, ffxBroadcast2(0.0126833)); + return pow(ffxSaturate(p - ffxBroadcast2(0.835938)) / (ffxBroadcast2(18.8516) - ffxBroadcast2(18.6875) * p), ffxBroadcast2(6.27739)); +} + +/// Compute a linear value from a value in a PQ space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in PQ space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 value) +{ + FfxFloat32x3 p = pow(value, ffxBroadcast3(0.0126833)); + return pow(ffxSaturate(p - ffxBroadcast3(0.835938)) / (ffxBroadcast3(18.8516) - ffxBroadcast3(18.6875) * p), ffxBroadcast3(6.27739)); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] value The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxSrgbFromLinear(FfxFloat32 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] value The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxSrgbFromLinear(FfxFloat32x2 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.xx, value * j.yy, pow(value, j.zz) * k.xx + k.yy); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] value The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxSrgbFromLinear(FfxFloat32x3 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.xxx, value * j.yyy, pow(value, j.zzz) * k.xxx + k.yyy); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.x), value * j.y, pow(value * k.x + k.y, j.z)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xx), value * j.yy, pow(value * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xxx), value * j.yyy, pow(value * k.xxx + k.yyy, j.zzz)); +} + +/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear. +/// +/// Remap illustration: +/// +/// 543210 +/// ~~~~~~ +/// ..xxx. +/// yy...y +/// +/// @param [in] a The input 1D coordinates to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxRemapForQuad(FfxUInt32 a) +{ + return FfxUInt32x2(ffxBitfieldExtract(a, 1u, 3u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), a, 1u)); +} + +/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. +/// +/// The 64-wide lane indices to 8x8 remapping is performed as follows: +/// +/// 00 01 08 09 10 11 18 19 +/// 02 03 0a 0b 12 13 1a 1b +/// 04 05 0c 0d 14 15 1c 1d +/// 06 07 0e 0f 16 17 1e 1f +/// 20 21 28 29 30 31 38 39 +/// 22 23 2a 2b 32 33 3a 3b +/// 24 25 2c 2d 34 35 3c 3d +/// 26 27 2e 2f 36 37 3e 3f +/// +/// @param [in] a The input 1D coordinate to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxRemapForWaveReduction(FfxUInt32 a) +{ + return FfxUInt32x2(ffxBitfieldInsertMask(ffxBitfieldExtract(a, 2u, 3u), a, 1u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), ffxBitfieldExtract(a, 1u, 2u), 2u)); +} diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_gpu_common.h.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_gpu_common.h.meta new file mode 100644 index 0000000..c3f75bc --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_gpu_common.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: ba1d1b587566f614886a766a2607565d +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_gpu_common_half.h b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_gpu_common_half.h new file mode 100644 index 0000000..1cb780b --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_gpu_common_half.h @@ -0,0 +1,2981 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#if FFX_HALF +#if FFX_HLSL_SM >= 62 +/// A define value for 16bit positive infinity. +/// +/// @ingroup GPUCore +#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0x7c00u) + +/// A define value for 16bit negative infinity. +/// +/// @ingroup GPUCore +#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0xfc00u) +#else +/// A define value for 16bit positive infinity. +/// +/// @ingroup GPUCore +#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16(0x7c00u) + +/// A define value for 16bit negative infinity. +/// +/// @ingroup GPUCore +#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16(0xfc00u) +#endif // #if FFX_HLSL_SM>=62 + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16 ffxMin(FfxFloat16 x, FfxFloat16 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxMin(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxMin(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxMin(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16 ffxMin(FfxInt16 x, FfxInt16 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x2 ffxMin(FfxInt16x2 x, FfxInt16x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x3 ffxMin(FfxInt16x3 x, FfxInt16x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x4 ffxMin(FfxInt16x4 x, FfxInt16x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16 ffxMin(FfxUInt16 x, FfxUInt16 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxMin(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxMin(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxMin(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return min(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16 ffxMax(FfxFloat16 x, FfxFloat16 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxMax(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxMax(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxMax(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16 ffxMax(FfxInt16 x, FfxInt16 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x2 ffxMax(FfxInt16x2 x, FfxInt16x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x3 ffxMax(FfxInt16x3 x, FfxInt16x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x4 ffxMax(FfxInt16x4 x, FfxInt16x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16 ffxMax(FfxUInt16 x, FfxUInt16 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxMax(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxMax(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxMax(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return max(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat16 ffxPow(FfxFloat16 x, FfxFloat16 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPow(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxPow(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxPow(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return pow(x, y); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat16 ffxSqrt(FfxFloat16 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxSqrt(FfxFloat16x2 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxSqrt(FfxFloat16x3 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxSqrt(FfxFloat16x4 x) +{ + return sqrt(x); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat16 ffxCopySignBitHalf(FfxFloat16 d, FfxFloat16 s) +{ + return FFX_TO_FLOAT16(FFX_TO_UINT16(d) | (FFX_TO_UINT16(s) & FFX_BROADCAST_UINT16(0x8000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxCopySignBitHalf(FfxFloat16x2 d, FfxFloat16x2 s) +{ + return FFX_TO_FLOAT16X2(FFX_TO_UINT16X2(d) | (FFX_TO_UINT16X2(s) & FFX_BROADCAST_UINT16X2(0x8000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxCopySignBitHalf(FfxFloat16x3 d, FfxFloat16x3 s) +{ + return FFX_TO_FLOAT16X3(FFX_TO_UINT16X3(d) | (FFX_TO_UINT16X3(s) & FFX_BROADCAST_UINT16X3(0x8000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxCopySignBitHalf(FfxFloat16x4 d, FfxFloat16x4 s) +{ + return FFX_TO_FLOAT16X4(FFX_TO_UINT16X4(d) | (FFX_TO_UINT16X4(s) & FFX_BROADCAST_UINT16X4(0x8000u))); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat16 ffxIsSignedHalf(FfxFloat16 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxIsSignedHalf(FfxFloat16x2 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxIsSignedHalf(FfxFloat16x3 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxIsSignedHalf(FfxFloat16x4 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat16 ffxIsGreaterThanZeroHalf(FfxFloat16 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxIsGreaterThanZeroHalf(FfxFloat16x2 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxIsGreaterThanZeroHalf(FfxFloat16x3 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxIsGreaterThanZeroHalf(FfxFloat16x4 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF)); +} + +/// Convert a 16bit floating point value to sortable integer. +/// +/// - If sign bit=0, flip the sign bit (positives). +/// - If sign bit=1, flip all bits (negatives). +/// +/// The function has the side effects that: +/// - Larger integers are more positive values. +/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). +/// +/// @param [in] x The floating point value to make sortable. +/// +/// @returns +/// The sortable integer value. +/// +/// @ingroup GPUCore +FfxUInt16 ffxFloatToSortableIntegerHalf(FfxUInt16 x) +{ + return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000)); +} + +/// Convert a sortable integer to a 16bit floating point value. +/// +/// The function has the side effects that: +/// - If sign bit=1, flip the sign bit (positives). +/// - If sign bit=0, flip all bits (negatives). +/// +/// @param [in] x The sortable integer value to make floating point. +/// +/// @returns +/// The floating point value. +/// +/// @ingroup GPUCore +FfxUInt16 ffxSortableIntegerToFloatHalf(FfxUInt16 x) +{ + return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000)); +} + +/// Convert a pair of 16bit floating point values to a pair of sortable integers. +/// +/// - If sign bit=0, flip the sign bit (positives). +/// - If sign bit=1, flip all bits (negatives). +/// +/// The function has the side effects that: +/// - Larger integers are more positive values. +/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). +/// +/// @param [in] x The floating point values to make sortable. +/// +/// @returns +/// The sortable integer values. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxFloatToSortableIntegerHalf(FfxUInt16x2 x) +{ + return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000)); +} + +/// Convert a pair of sortable integers to a pair of 16bit floating point values. +/// +/// The function has the side effects that: +/// - If sign bit=1, flip the sign bit (positives). +/// - If sign bit=0, flip all bits (negatives). +/// +/// @param [in] x The sortable integer values to make floating point. +/// +/// @returns +/// The floating point values. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxSortableIntegerToFloatHalf(FfxUInt16x2 x) +{ + return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000)); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y0 [Zero] X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesZeroY0ZeroX0(FfxUInt32x2 i) +{ + return ((i.x) & 0xffu) | ((i.y << 16) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y1 [Zero] X1 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesZeroY1ZeroX1(FfxUInt32x2 i) +{ + return ((i.x >> 8) & 0xffu) | ((i.y << 8) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y2 [Zero] X2 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesZeroY2ZeroX2(FfxUInt32x2 i) +{ + return ((i.x >> 16) & 0xffu) | ((i.y) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y3 [Zero] X3 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesZeroY3ZeroX3(FfxUInt32x2 i) +{ + return ((i.x >> 24) & 0xffu) | ((i.y >> 8) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 Y1 X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3Y2Y1X0(FfxUInt32x2 i) +{ + return ((i.x) & 0x000000ffu) | (i.y & 0xffffff00u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 Y1 X2 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3Y2Y1X2(FfxUInt32x2 i) +{ + return ((i.x >> 16) & 0x000000ffu) | (i.y & 0xffffff00u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 X0 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3Y2X0Y0(FfxUInt32x2 i) +{ + return ((i.x << 8) & 0x0000ff00u) | (i.y & 0xffff00ffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 X2 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3Y2X2Y0(FfxUInt32x2 i) +{ + return ((i.x >> 8) & 0x0000ff00u) | (i.y & 0xffff00ffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 X0 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3X0Y1Y0(FfxUInt32x2 i) +{ + return ((i.x << 16) & 0x00ff0000u) | (i.y & 0xff00ffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 X2 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3X2Y1Y0(FfxUInt32x2 i) +{ + return ((i.x) & 0x00ff0000u) | (i.y & 0xff00ffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// X0 Y2 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesX0Y2Y1Y0(FfxUInt32x2 i) +{ + return ((i.x << 24) & 0xff000000u) | (i.y & 0x00ffffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// X2 Y2 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesX2Y2Y1Y0(FfxUInt32x2 i) +{ + return ((i.x << 8) & 0xff000000u) | (i.y & 0x00ffffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y2 X2 Y0 X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY2X2Y0X0(FfxUInt32x2 i) +{ + return ((i.x) & 0x00ff00ffu) | ((i.y << 8) & 0xff00ff00u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y2 Y0 X2 X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY2Y0X2X0(FfxUInt32x2 i) +{ + return (((i.x) & 0xffu) | ((i.x >> 8) & 0xff00u) | ((i.y << 16) & 0xff0000u) | ((i.y << 8) & 0xff000000u)); +} + +/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}. +/// +/// @param [in] x The first float16x2 value to pack. +/// @param [in] y The second float16x2 value to pack. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxPackX0Y0X1Y1UnsignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y) +{ + x *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0); + y *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0); + return FFX_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(x)), FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(y))))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], +/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// r=ffxPermuteUByte0Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteUByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], +/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// r=ffxPermuteUByte1Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteUByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], +/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. +/// +/// r=ffxPermuteUByte2Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteUByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], +/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. +/// +/// r=ffxPermuteUByte3Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteUByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteUByte0Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteUByte1Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteUByte2Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteUByte3Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); +} + +/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}. +/// +/// @param [in] x The first float16x2 value to pack. +/// @param [in] y The second float16x2 value to pack. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxPackX0Y0X1Y1SignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y) +{ + x = x * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0); + y = y * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0); + return FFX_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(x)), FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(y))))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], +/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], +/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], +/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], +/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], +/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteZeroBasedSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], +/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteZeroBasedSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], +/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteZeroBasedSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], +/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteZeroBasedSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteSByte0Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteSByte1Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteSByte2Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteSByte3Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteZeroBasedSByte0Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteZeroBasedSByte1Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteZeroBasedSByte2Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteZeroBasedSByte3Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Calculate a half-precision low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16 ffxApproximateSqrtHalf(FfxFloat16 a) +{ + return FFX_TO_FLOAT16((FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1)) + FFX_BROADCAST_UINT16(0x1de2)); +} + +/// Calculate a half-precision low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxApproximateSqrtHalf(FfxFloat16x2 a) +{ + return FFX_TO_FLOAT16X2((FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1)) + FFX_BROADCAST_UINT16X2(0x1de2)); +} + +/// Calculate a half-precision low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxApproximateSqrtHalf(FfxFloat16x3 a) +{ + return FFX_TO_FLOAT16X3((FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1)) + FFX_BROADCAST_UINT16X3(0x1de2)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16 ffxApproximateReciprocalHalf(FfxFloat16 a) +{ + return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x7784) - FFX_TO_UINT16(a)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxApproximateReciprocalHalf(FfxFloat16x2 a) +{ + return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x7784) - FFX_TO_UINT16X2(a)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxApproximateReciprocalHalf(FfxFloat16x3 a) +{ + return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x7784) - FFX_TO_UINT16X3(a)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxApproximateReciprocalHalf(FfxFloat16x4 a) +{ + return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x7784) - FFX_TO_UINT16X4(a)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat16 ffxApproximateReciprocalMediumHalf(FfxFloat16 a) +{ + FfxFloat16 b = FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x778d) - FFX_TO_UINT16(a)); + return b * (-b * a + FFX_BROADCAST_FLOAT16(2.0)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxApproximateReciprocalMediumHalf(FfxFloat16x2 a) +{ + FfxFloat16x2 b = FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x778d) - FFX_TO_UINT16X2(a)); + return b * (-b * a + FFX_BROADCAST_FLOAT16X2(2.0)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxApproximateReciprocalMediumHalf(FfxFloat16x3 a) +{ + FfxFloat16x3 b = FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x778d) - FFX_TO_UINT16X3(a)); + return b * (-b * a + FFX_BROADCAST_FLOAT16X3(2.0)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxApproximateReciprocalMediumHalf(FfxFloat16x4 a) +{ + FfxFloat16x4 b = FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x778d) - FFX_TO_UINT16X4(a)); + return b * (-b * a + FFX_BROADCAST_FLOAT16X4(2.0)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16 ffxApproximateReciprocalSquareRootHalf(FfxFloat16 a) +{ + return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x59a3) - (FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1))); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x2 a) +{ + return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x59a3) - (FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1))); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x3 a) +{ + return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x59a3) - (FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1))); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x4 a) +{ + return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x59a3) - (FFX_TO_UINT16X4(a) >> FFX_BROADCAST_UINT16X4(1))); +} + +/// An approximation of sine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate sine for. +/// +/// @returns +/// The approximate sine of value. +FfxFloat16 ffxParabolicSinHalf(FfxFloat16 x) +{ + return x * abs(x) - x; +} + +/// An approximation of sine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate sine for. +/// +/// @returns +/// The approximate sine of value. +FfxFloat16x2 ffxParabolicSinHalf(FfxFloat16x2 x) +{ + return x * abs(x) - x; +} + +/// An approximation of cosine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate cosine for. +/// +/// @returns +/// The approximate cosine of value. +FfxFloat16 ffxParabolicCosHalf(FfxFloat16 x) +{ + x = ffxFract(x * FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16(0.75)); + x = x * FFX_BROADCAST_FLOAT16(2.0) - FFX_BROADCAST_FLOAT16(1.0); + return ffxParabolicSinHalf(x); +} + +/// An approximation of cosine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate cosine for. +/// +/// @returns +/// The approximate cosine of value. +FfxFloat16x2 ffxParabolicCosHalf(FfxFloat16x2 x) +{ + x = ffxFract(x * FFX_BROADCAST_FLOAT16X2(0.5) + FFX_BROADCAST_FLOAT16X2(0.75)); + x = x * FFX_BROADCAST_FLOAT16X2(2.0) - FFX_BROADCAST_FLOAT16X2(1.0); + return ffxParabolicSinHalf(x); +} + +/// An approximation of both sine and cosine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate cosine for. +/// +/// @returns +/// A FfxFloat32x2 containing approximations of both sine and cosine of value. +FfxFloat16x2 ffxParabolicSinCosHalf(FfxFloat16 x) +{ + FfxFloat16 y = ffxFract(x * FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16(0.75)); + y = y * FFX_BROADCAST_FLOAT16(2.0) - FFX_BROADCAST_FLOAT16(1.0); + return ffxParabolicSinHalf(FfxFloat16x2(x, y)); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt16 ffxZeroOneAndHalf(FfxUInt16 x, FfxUInt16 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxZeroOneAndHalf(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxZeroOneAndHalf(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxZeroOneAndHalf(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return min(x, y); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt16 ffxZeroOneNotHalf(FfxUInt16 x) +{ + return x ^ FFX_BROADCAST_UINT16(1); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxZeroOneNotHalf(FfxUInt16x2 x) +{ + return x ^ FFX_BROADCAST_UINT16X2(1); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxZeroOneNotHalf(FfxUInt16x3 x) +{ + return x ^ FFX_BROADCAST_UINT16X3(1); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxZeroOneNotHalf(FfxUInt16x4 x) +{ + return x ^ FFX_BROADCAST_UINT16X4(1); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt16 ffxZeroOneOrHalf(FfxUInt16 x, FfxUInt16 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxZeroOneOrHalf(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxZeroOneOrHalf(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxZeroOneOrHalf(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return max(x, y); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPUCore +FfxUInt16 ffxZeroOneFloat16ToUint16(FfxFloat16 x) +{ + return FFX_TO_UINT16(x * FFX_TO_FLOAT16(FFX_TO_UINT16(1))); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxZeroOneFloat16x2ToUint16x2(FfxFloat16x2 x) +{ + return FFX_TO_UINT16X2(x * FFX_TO_FLOAT16X2(FfxUInt16x2(1, 1))); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxZeroOneFloat16x3ToUint16x3(FfxFloat16x3 x) +{ + return FFX_TO_UINT16X3(x * FFX_TO_FLOAT16X3(FfxUInt16x3(1, 1, 1))); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxZeroOneFloat16x4ToUint16x4(FfxFloat16x4 x) +{ + return FFX_TO_UINT16X4(x * FFX_TO_FLOAT16X4(FfxUInt16x4(1, 1, 1, 1))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneUint16ToFloat16(FfxUInt16 x) +{ + return FFX_TO_FLOAT16(x * FFX_TO_UINT16(FFX_TO_FLOAT16(1.0))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneUint16x2ToFloat16x2(FfxUInt16x2 x) +{ + return FFX_TO_FLOAT16X2(x * FFX_TO_UINT16X2(FfxUInt16x2(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneUint16x3ToFloat16x3(FfxUInt16x3 x) +{ + return FFX_TO_FLOAT16X3(x * FFX_TO_UINT16X3(FfxUInt16x3(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneUint16x4ToFloat16x4(FfxUInt16x4 x) +{ + return FFX_TO_FLOAT16X4(x * FFX_TO_UINT16X4(FfxUInt16x4(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneAndHalf(FfxFloat16 x, FfxFloat16 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneAndHalf(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneAndHalf(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneAndHalf(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return min(x, y); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxSignedZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y) +{ + return (-x) * y + FFX_BROADCAST_FLOAT16(1.0); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxSignedZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return (-x) * y + FFX_BROADCAST_FLOAT16X2(1.0); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxSignedZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return (-x) * y + FFX_BROADCAST_FLOAT16X3(1.0); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxSignedZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return (-x) * y + FFX_BROADCAST_FLOAT16X4(1.0); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + return ffxSaturate(x * y + z); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x2 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x3 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x4 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF)); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneNotHalf(FfxFloat16 x) +{ + return FFX_BROADCAST_FLOAT16(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneNotHalf(FfxFloat16x2 x) +{ + return FFX_BROADCAST_FLOAT16X2(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneNotHalf(FfxFloat16x3 x) +{ + return FFX_BROADCAST_FLOAT16X3(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneNotHalf(FfxFloat16x4 x) +{ + return FFX_BROADCAST_FLOAT16X4(1.0) - x; +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneOrHalf(FfxFloat16 x, FfxFloat16 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneOrHalf(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneOrHalf(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneOrHalf(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return max(x, y); +} + +/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneSelectHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + FfxFloat16 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneSelectHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + FfxFloat16x2 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneSelectHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + FfxFloat16x3 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneSelectHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + FfxFloat16x4 r = (-x) * z + z; + return x * y + r; +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneIsSignedHalf(FfxFloat16 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneIsSignedHalf(FfxFloat16x2 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneIsSignedHalf(FfxFloat16x3 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneIsSignedHalf(FfxFloat16x4 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] c The color to convert to Rec. 709. +/// +/// @returns +/// The color in Rec.709 space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxRec709FromLinearHalf(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); + return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] c The color to convert to Rec. 709. +/// +/// @returns +/// The color in Rec.709 space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxRec709FromLinearHalf(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); + return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] c The color to convert to Rec. 709. +/// +/// @returns +/// The color in Rec.709 space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxRec709FromLinearHalf(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); + return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. +/// +/// @param [in] c The value to convert to gamma space from linear. +/// @param [in] rcpX The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxGammaFromLinearHalf(FfxFloat16 c, FfxFloat16 rcpX) +{ + return pow(c, FFX_BROADCAST_FLOAT16(rcpX)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. +/// +/// @param [in] c The value to convert to gamma space from linear. +/// @param [in] rcpX The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxGammaFromLinearHalf(FfxFloat16x2 c, FfxFloat16 rcpX) +{ + return pow(c, FFX_BROADCAST_FLOAT16X2(rcpX)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. +/// +/// @param [in] c The value to convert to gamma space from linear. +/// @param [in] rcpX The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxGammaFromLinearHalf(FfxFloat16x3 c, FfxFloat16 rcpX) +{ + return pow(c, FFX_BROADCAST_FLOAT16X3(rcpX)); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] c The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxSrgbFromLinearHalf(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); + return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] c The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxSrgbFromLinearHalf(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); + return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] c The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxSrgbFromLinearHalf(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); + return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy); +} + +/// Compute the square root of a value. +/// +/// @param [in] c The value to compute the square root for. +/// +/// @returns +/// A square root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxSquareRootHalf(FfxFloat16 c) +{ + return sqrt(c); +} + +/// Compute the square root of a value. +/// +/// @param [in] c The value to compute the square root for. +/// +/// @returns +/// A square root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxSquareRootHalf(FfxFloat16x2 c) +{ + return sqrt(c); +} + +/// Compute the square root of a value. +/// +/// @param [in] c The value to compute the square root for. +/// +/// @returns +/// A square root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxSquareRootHalf(FfxFloat16x3 c) +{ + return sqrt(c); +} + +/// Compute the cube root of a value. +/// +/// @param [in] c The value to compute the cube root for. +/// +/// @returns +/// A cube root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxCubeRootHalf(FfxFloat16 c) +{ + return pow(c, FFX_BROADCAST_FLOAT16(1.0 / 3.0)); +} + +/// Compute the cube root of a value. +/// +/// @param [in] c The value to compute the cube root for. +/// +/// @returns +/// A cube root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxCubeRootHalf(FfxFloat16x2 c) +{ + return pow(c, FFX_BROADCAST_FLOAT16X2(1.0 / 3.0)); +} + +/// Compute the cube root of a value. +/// +/// @param [in] c The value to compute the cube root for. +/// +/// @returns +/// A cube root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxCubeRootHalf(FfxFloat16x3 c) +{ + return pow(c, FFX_BROADCAST_FLOAT16X3(1.0 / 3.0)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] c The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxLinearFromRec709Half(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] c The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxLinearFromRec709Half(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] c The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxLinearFromRec709Half(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in gamma space. +/// @param [in] x The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxLinearFromGammaHalf(FfxFloat16 c, FfxFloat16 x) +{ + return pow(c, FFX_BROADCAST_FLOAT16(x)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in gamma space. +/// @param [in] x The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxLinearFromGammaHalf(FfxFloat16x2 c, FfxFloat16 x) +{ + return pow(c, FFX_BROADCAST_FLOAT16X2(x)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in gamma space. +/// @param [in] x The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x) +{ + return pow(c, FFX_BROADCAST_FLOAT16X3(x)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz)); +} + +/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear. +/// +/// Remap illustration: +/// +/// 543210 +/// ~~~~~~ +/// ..xxx. +/// yy...y +/// +/// @param [in] a The input 1D coordinates to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxRemapForQuadHalf(FfxUInt32 a) +{ + return FfxUInt16x2(ffxBitfieldExtract(a, 1u, 3u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), a, 1u)); +} + +/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. +/// +/// The 64-wide lane indices to 8x8 remapping is performed as follows: +/// +/// 00 01 08 09 10 11 18 19 +/// 02 03 0a 0b 12 13 1a 1b +/// 04 05 0c 0d 14 15 1c 1d +/// 06 07 0e 0f 16 17 1e 1f +/// 20 21 28 29 30 31 38 39 +/// 22 23 2a 2b 32 33 3a 3b +/// 24 25 2c 2d 34 35 3c 3d +/// 26 27 2e 2f 36 37 3e 3f +/// +/// @param [in] a The input 1D coordinate to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxRemapForWaveReductionHalf(FfxUInt32 a) +{ + return FfxUInt16x2(ffxBitfieldInsertMask(ffxBitfieldExtract(a, 2u, 3u), a, 1u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), ffxBitfieldExtract(a, 1u, 2u), 2u)); +} + +#endif // FFX_HALF diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_gpu_common_half.h.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_gpu_common_half.h.meta new file mode 100644 index 0000000..77dab01 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_gpu_common_half.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 37f23fcc81aaddb4bbaab23496484e92 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_hlsl.h b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_hlsl.h new file mode 100644 index 0000000..28827d9 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_hlsl.h @@ -0,0 +1,1898 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// @defgroup HLSLCore HLSL Core +/// HLSL core defines and functions +/// +/// @ingroup FfxHLSL + +#define DECLARE_SRV_REGISTER(regIndex) t##regIndex +#define DECLARE_UAV_REGISTER(regIndex) u##regIndex +#define DECLARE_CB_REGISTER(regIndex) b##regIndex +#define FFX_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex)) +#define FFX_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) +#define FFX_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) + +/// A define for abstracting select functionality for pre/post HLSL 21 +/// +/// @ingroup HLSLCore +#if __HLSL_VERSION >= 2021 + +#define FFX_SELECT(cond, arg1, arg2) select(cond, arg1, arg2) + +#else // #if __HLSL_VERSION >= 2021 + +#define FFX_SELECT(cond, arg1, arg2) cond ? arg1 : arg2 + +#endif // #if __HLSL_VERSION >= 2021 + +/// A define for abstracting shared memory between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_GROUPSHARED groupshared + +/// A define for abstracting compute memory barriers between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync() + +/// A define for abstracting compute atomic additions between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_ADD(x, y) InterlockedAdd(x, y) + +/// A define for abstracting compute atomic additions between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_ADD_RETURN(x, y, r) InterlockedAdd(x, y, r) + +/// A define for abstracting compute atomic OR between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_OR(x, y) InterlockedOr(x, y) + +/// A define for abstracting compute atomic min between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_MIN(x, y) InterlockedMin(x, y) + +/// A define for abstracting compute atomic max between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_MAX(x, y) InterlockedMax(x, y) + +/// A define added to accept static markup on functions to aid CPU/GPU portability of code. +/// +/// @ingroup HLSLCore +#define FFX_STATIC static + +/// A define for abstracting loop unrolling between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_UNROLL [unroll] + +/// A define for abstracting a 'greater than' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_GREATER_THAN(x, y) x > y + +/// A define for abstracting a 'greater than or equal' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_GREATER_THAN_EQUAL(x, y) x >= y + +/// A define for abstracting a 'less than' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_LESS_THAN(x, y) x < y + +/// A define for abstracting a 'less than or equal' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_LESS_THAN_EQUAL(x, y) x <= y + +/// A define for abstracting an 'equal' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_EQUAL(x, y) x == y + +/// A define for abstracting a 'not equal' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_NOT_EQUAL(x, y) x != y + +/// A define for abstracting matrix multiply operations between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_MATRIX_MULTIPLY(a, b) mul(a, b) + +/// A define for abstracting vector transformations between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_TRANSFORM_VECTOR(a, b) mul(a, b) + +/// A define for abstracting modulo operations between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_MODULO(a, b) (fmod(a, b)) + +/// Broadcast a scalar value to a 1-dimensional floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 2-dimensional floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 3-dimensional floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 4-dimensional floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 1-dimensional unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_UINT32(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 2-dimensional unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_UINT32X2(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_UINT32X3(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_UINT32X4(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 1-dimensional signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_INT32(x) FfxInt32(x) + +/// Broadcast a scalar value to a 2-dimensional signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_INT32X2(x) FfxInt32(x) + +/// Broadcast a scalar value to a 3-dimensional signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_INT32X3(x) FfxInt32(x) + +/// Broadcast a scalar value to a 4-dimensional signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_INT32X4(x) FfxInt32(x) + +/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_FLOAT16(a) FFX_MIN16_F(a) + +/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_FLOAT16X2(a) FFX_MIN16_F(a) + +/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_FLOAT16X3(a) FFX_MIN16_F(a) + +/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_FLOAT16X4(a) FFX_MIN16_F(a) + +/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_UINT16(a) FFX_MIN16_U(a) + +/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_UINT16X2(a) FFX_MIN16_U(a) + +/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_UINT16X3(a) FFX_MIN16_U(a) + +/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_UINT16X4(a) FFX_MIN16_U(a) + +/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_INT16(a) FFX_MIN16_I(a) + +/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_INT16X2(a) FFX_MIN16_I(a) + +/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_INT16X3(a) FFX_MIN16_I(a) + +/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_INT16X4(a) FFX_MIN16_I(a) + +/// Convert FfxFloat32 to half (in lower 16-bits of output). +/// +/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf +/// +/// The function supports denormals. +/// +/// Some conversion rules are to make computations possibly "safer" on the GPU, +/// -INF & -NaN -> -65504 +/// +INF & +NaN -> +65504 +/// +/// @param [in] f The 32bit floating point value to convert. +/// +/// @returns +/// The closest 16bit floating point value to f. +/// +/// @ingroup HLSLCore +#define ffxF32ToF16 f32tof16 + +/// Pack 2x32-bit floating point values in a single 32bit value. +/// +/// This function first converts each component of value into their nearest 16-bit floating +/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the +/// 32bit unsigned integer respectively. +/// +/// @param [in] value A 2-dimensional floating point value to convert and pack. +/// +/// @returns +/// A packed 32bit value containing 2 16bit floating point values. +/// +/// @ingroup HLSLCore +FfxUInt32 ffxPackHalf2x16(FfxFloat32x2 value) +{ + return ffxF32ToF16(value.x) | (ffxF32ToF16(value.y) << 16); +} + +/// Broadcast a scalar value to a 2-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional floating point vector with value in each component. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxBroadcast2(FfxFloat32 value) +{ + return FfxFloat32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional floating point vector with value in each component. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxBroadcast3(FfxFloat32 value) +{ + return FfxFloat32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional floating point vector with value in each component. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxBroadcast4(FfxFloat32 value) +{ + return FfxFloat32x4(value, value, value, value); +} + +/// Broadcast a scalar value to a 2-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional signed integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxInt32x2 ffxBroadcast2(FfxInt32 value) +{ + return FfxInt32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional signed integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxInt32x3 ffxBroadcast3(FfxInt32 value) +{ + return FfxInt32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional signed integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxInt32x4 ffxBroadcast4(FfxInt32 value) +{ + return FfxInt32x4(value, value, value, value); +} + +/// Broadcast a scalar value to a 2-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxUInt32x2 ffxBroadcast2(FfxUInt32 value) +{ + return FfxUInt32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxBroadcast3(FfxUInt32 value) +{ + return FfxUInt32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxUInt32x4 ffxBroadcast4(FfxUInt32 value) +{ + return FfxUInt32x4(value, value, value, value); +} + +FfxUInt32 ffxBitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits) +{ + FfxUInt32 mask = (1u << bits) - 1; + return (src >> off) & mask; +} + +FfxUInt32 ffxBitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask) +{ + return (ins & mask) | (src & (~mask)); +} + +FfxUInt32 ffxBitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits) +{ + FfxUInt32 mask = (1u << bits) - 1; + return (ins & mask) | (src & (~mask)); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSLCore +FfxUInt32 ffxAsUInt32(FfxFloat32 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSLCore +FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSLCore +FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxAsFloat(FfxUInt32 x) +{ + return asfloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x) +{ + return asfloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x) +{ + return asfloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x) +{ + return asfloat(x); +} + +/// Compute the inverse of a value. +/// +/// @param [in] x The value to calulate the inverse of. +/// +/// @returns +/// The inverse of x. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxReciprocal(FfxFloat32 x) +{ + return rcp(x); +} + +/// Compute the inverse of a value. +/// +/// @param [in] x The value to calulate the inverse of. +/// +/// @returns +/// The inverse of x. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxReciprocal(FfxFloat32x2 x) +{ + return rcp(x); +} + +/// Compute the inverse of a value. +/// +/// @param [in] x The value to calulate the inverse of. +/// +/// @returns +/// The inverse of x. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxReciprocal(FfxFloat32x3 x) +{ + return rcp(x); +} + +/// Compute the inverse of a value. +/// +/// @param [in] x The value to calulate the inverse of. +/// +/// @returns +/// The inverse of x. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxReciprocal(FfxFloat32x4 x) +{ + return rcp(x); +} + +/// Compute the inverse square root of a value. +/// +/// @param [in] x The value to calulate the inverse square root of. +/// +/// @returns +/// The inverse square root of x. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxRsqrt(FfxFloat32 x) +{ + return rsqrt(x); +} + +/// Compute the inverse square root of a value. +/// +/// @param [in] x The value to calulate the inverse square root of. +/// +/// @returns +/// The inverse square root of x. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxRsqrt(FfxFloat32x2 x) +{ + return rsqrt(x); +} + +/// Compute the inverse square root of a value. +/// +/// @param [in] x The value to calulate the inverse square root of. +/// +/// @returns +/// The inverse square root of x. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxRsqrt(FfxFloat32x3 x) +{ + return rsqrt(x); +} + +/// Compute the inverse square root of a value. +/// +/// @param [in] x The value to calulate the inverse square root of. +/// +/// @returns +/// The inverse square root of x. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxRsqrt(FfxFloat32x4 x) +{ + return rsqrt(x); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t) +{ + return lerp(x, y, t); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxSaturate(FfxFloat32 x) +{ + return saturate(x); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxSaturate(FfxFloat32x2 x) +{ + return saturate(x); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxSaturate(FfxFloat32x3 x) +{ + return saturate(x); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxSaturate(FfxFloat32x4 x) +{ + return saturate(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxFract(FfxFloat32 x) +{ + return x - floor(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxFract(FfxFloat32x2 x) +{ + return x - floor(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxFract(FfxFloat32x3 x) +{ + return x - floor(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxFract(FfxFloat32x4 x) +{ + return x - floor(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxRound(FfxFloat32 x) +{ + return round(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxRound(FfxFloat32x2 x) +{ + return round(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxRound(FfxFloat32x3 x) +{ + return round(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxRound(FfxFloat32x4 x) +{ + return round(x); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) +{ + return max(x, max(y, z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z) +{ + return max(min(x, y), min(max(x, y), z)); + // return min(max(min(y, z), x), max(y, z)); + // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z) +{ + return max(min(x, y), min(max(x, y), z)); + // return min(max(min(y, z), x), max(y, z)); + // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) +{ + return min(x, min(y, z)); +} + + +FfxUInt32 ffxAShrSU1(FfxUInt32 a, FfxUInt32 b) +{ + return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); +} + +FfxUInt32 ffxPackF32(FfxFloat32x2 v){ + FfxUInt32x2 p = FfxUInt32x2(ffxF32ToF16(FfxFloat32x2(v).x), ffxF32ToF16(FfxFloat32x2(v).y)); + return p.x | (p.y << 16); +} + +FfxFloat32x2 ffxUnpackF32(FfxUInt32 a){ + return f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16)); +} + +FfxUInt32x2 ffxPackF32x2(FfxFloat32x4 v){ + return FfxUInt32x2(ffxPackF32(v.xy), ffxPackF32(v.zw)); +} + +FfxFloat32x4 ffxUnpackF32x2(FfxUInt32x2 a){ + return FfxFloat32x4(ffxUnpackF32(a.x), ffxUnpackF32(a.y)); +} + +//============================================================================================================================== +// HLSL HALF +//============================================================================================================================== +//============================================================================================================================== +// Need to use manual unpack to get optimal execution (don't use packed types in buffers directly). +// Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/ +FFX_MIN16_F2 ffxUint32ToFloat16x2(FfxUInt32 x) +{ + FfxFloat32x2 t = f16tof32(FfxUInt32x2(x & 0xFFFF, x >> 16)); + return FFX_MIN16_F2(t); +} +FFX_MIN16_F4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x) +{ + return FFX_MIN16_F4(ffxUint32ToFloat16x2(x.x), ffxUint32ToFloat16x2(x.y)); +} +FFX_MIN16_U2 ffxUint32ToUint16x2(FfxUInt32 x) +{ + FfxUInt32x2 t = FfxUInt32x2(x & 0xFFFF, x >> 16); + return FFX_MIN16_U2(t); +} +FFX_MIN16_U4 ffxUint32x2ToUint16x4(FfxUInt32x2 x) +{ + return FFX_MIN16_U4(ffxUint32ToUint16x2(x.x), ffxUint32ToUint16x2(x.y)); +} + +FfxUInt32x2 ffxFloat16x4ToUint32x2(FFX_MIN16_F4 v) +{ + FfxUInt32x2 result; + result.x = ffxF32ToF16(v.x) | (ffxF32ToF16(v.y) << 16); + result.y = ffxF32ToF16(v.z) | (ffxF32ToF16(v.w) << 16); + return result; +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32 ffxInvertSafe(FfxFloat32 v){ + FfxFloat32 s = FfxFloat32(sign(v)); + FfxFloat32 s2 = s*s; + return s2/(v + s2 - 1.0); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x2 ffxInvertSafe(FfxFloat32x2 v){ + FfxFloat32x2 s = FfxFloat32x2(sign(v)); + FfxFloat32x2 s2 = s*s; + return s2/(v + s2 - FfxFloat32x2(1.0, 1.0)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x3 ffxInvertSafe(FfxFloat32x3 v){ + FfxFloat32x3 s = FfxFloat32x3(sign(v)); + FfxFloat32x3 s2 = s*s; + return s2/(v + s2 - FfxFloat32x3(1.0, 1.0, 1.0)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x4 ffxInvertSafe(FfxFloat32x4 v){ + FfxFloat32x4 s = FfxFloat32x4(sign(v)); + FfxFloat32x4 s2 = s*s; + return s2/(v + s2 - FfxFloat32x4(1.0, 1.0, 1.0, 1.0)); +} + +#define FFX_UINT32_TO_FLOAT16X2(x) ffxUint32ToFloat16x2(FfxUInt32(x)) +#if FFX_HALF + +#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x)) +#define FFX_UINT32_TO_UINT16X2(x) ffxUint32ToUint16x2(FfxUInt32(x)) +#define FFX_UINT32X2_TO_UINT16X4(x) ffxUint32x2ToUint16x4(FfxUInt32x2(x)) + +FfxUInt32 ffxPackF16(FfxFloat16x2 v){ + FfxUInt32x2 p = FfxUInt32x2(ffxF32ToF16(FfxFloat32x2(v).x), ffxF32ToF16(FfxFloat32x2(v).y)); + return p.x | (p.y << 16); +} + +FfxFloat16x2 ffxUnpackF16(FfxUInt32 a){ + return FfxFloat16x2(f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16))); +} + +//------------------------------------------------------------------------------------------------------------------------------ +FfxUInt32 FFX_MIN16_F2ToUint32(FFX_MIN16_F2 x) +{ + return ffxF32ToF16(x.x) + (ffxF32ToF16(x.y) << 16); +} +FfxUInt32x2 FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4 x) +{ + return FfxUInt32x2(FFX_MIN16_F2ToUint32(x.xy), FFX_MIN16_F2ToUint32(x.zw)); +} +FfxUInt32 FFX_MIN16_U2ToUint32(FFX_MIN16_U2 x) +{ + return FfxUInt32(x.x) + (FfxUInt32(x.y) << 16); +} +FfxUInt32x2 FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4 x) +{ + return FfxUInt32x2(FFX_MIN16_U2ToUint32(x.xy), FFX_MIN16_U2ToUint32(x.zw)); +} +#define FFX_FLOAT16X2_TO_UINT32(x) FFX_MIN16_F2ToUint32(FFX_MIN16_F2(x)) +#define FFX_FLOAT16X4_TO_UINT32X2(x) FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4(x)) +#define FFX_UINT16X2_TO_UINT32(x) FFX_MIN16_U2ToUint32(FFX_MIN16_U2(x)) +#define FFX_UINT16X4_TO_UINT32X2(x) FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4(x)) + +#if (FFX_HLSL_SM >= 62) && !defined(FFX_NO_16_BIT_CAST) +#define FFX_TO_UINT16(x) asuint16(x) +#define FFX_TO_UINT16X2(x) asuint16(x) +#define FFX_TO_UINT16X3(x) asuint16(x) +#define FFX_TO_UINT16X4(x) asuint16(x) +#else +#define FFX_TO_UINT16(a) FFX_MIN16_U(ffxF32ToF16(FfxFloat32(a))) +#define FFX_TO_UINT16X2(a) FFX_MIN16_U2(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y)) +#define FFX_TO_UINT16X3(a) FFX_MIN16_U3(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z)) +#define FFX_TO_UINT16X4(a) FFX_MIN16_U4(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z), FFX_TO_UINT16((a).w)) +#endif // #if (FFX_HLSL_SM>=62) && !defined(FFX_NO_16_BIT_CAST) + +#if (FFX_HLSL_SM >= 62) && !defined(FFX_NO_16_BIT_CAST) +#define FFX_TO_FLOAT16(x) asfloat16(x) +#define FFX_TO_FLOAT16X2(x) asfloat16(x) +#define FFX_TO_FLOAT16X3(x) asfloat16(x) +#define FFX_TO_FLOAT16X4(x) asfloat16(x) +#else +#define FFX_TO_FLOAT16(a) FFX_MIN16_F(f16tof32(FfxUInt32(a))) +#define FFX_TO_FLOAT16X2(a) FFX_MIN16_F2(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y)) +#define FFX_TO_FLOAT16X3(a) FFX_MIN16_F3(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z)) +#define FFX_TO_FLOAT16X4(a) FFX_MIN16_F4(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z), FFX_TO_FLOAT16((a).w)) +#endif // #if (FFX_HLSL_SM>=62) && !defined(FFX_NO_16_BIT_CAST) + +//============================================================================================================================== +#define FFX_BROADCAST_FLOAT16(a) FFX_MIN16_F(a) +#define FFX_BROADCAST_FLOAT16X2(a) FFX_MIN16_F(a) +#define FFX_BROADCAST_FLOAT16X3(a) FFX_MIN16_F(a) +#define FFX_BROADCAST_FLOAT16X4(a) FFX_MIN16_F(a) + +//------------------------------------------------------------------------------------------------------------------------------ +#define FFX_BROADCAST_INT16(a) FFX_MIN16_I(a) +#define FFX_BROADCAST_INT16X2(a) FFX_MIN16_I(a) +#define FFX_BROADCAST_INT16X3(a) FFX_MIN16_I(a) +#define FFX_BROADCAST_INT16X4(a) FFX_MIN16_I(a) + +//------------------------------------------------------------------------------------------------------------------------------ +#define FFX_BROADCAST_UINT16(a) FFX_MIN16_U(a) +#define FFX_BROADCAST_UINT16X2(a) FFX_MIN16_U(a) +#define FFX_BROADCAST_UINT16X3(a) FFX_MIN16_U(a) +#define FFX_BROADCAST_UINT16X4(a) FFX_MIN16_U(a) + +//============================================================================================================================== +FFX_MIN16_U ffxAbsHalf(FFX_MIN16_U a) +{ + return FFX_MIN16_U(abs(FFX_MIN16_I(a))); +} +FFX_MIN16_U2 ffxAbsHalf(FFX_MIN16_U2 a) +{ + return FFX_MIN16_U2(abs(FFX_MIN16_I2(a))); +} +FFX_MIN16_U3 ffxAbsHalf(FFX_MIN16_U3 a) +{ + return FFX_MIN16_U3(abs(FFX_MIN16_I3(a))); +} +FFX_MIN16_U4 ffxAbsHalf(FFX_MIN16_U4 a) +{ + return FFX_MIN16_U4(abs(FFX_MIN16_I4(a))); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxClampHalf(FFX_MIN16_F x, FFX_MIN16_F n, FFX_MIN16_F m) +{ + return max(n, min(x, m)); +} +FFX_MIN16_F2 ffxClampHalf(FFX_MIN16_F2 x, FFX_MIN16_F2 n, FFX_MIN16_F2 m) +{ + return max(n, min(x, m)); +} +FFX_MIN16_F3 ffxClampHalf(FFX_MIN16_F3 x, FFX_MIN16_F3 n, FFX_MIN16_F3 m) +{ + return max(n, min(x, m)); +} +FFX_MIN16_F4 ffxClampHalf(FFX_MIN16_F4 x, FFX_MIN16_F4 n, FFX_MIN16_F4 m) +{ + return max(n, min(x, m)); +} +//------------------------------------------------------------------------------------------------------------------------------ +// V_FRACT_F16 (note DX frac() is different). +FFX_MIN16_F ffxFract(FFX_MIN16_F x) +{ + return x - floor(x); +} +FFX_MIN16_F2 ffxFract(FFX_MIN16_F2 x) +{ + return x - floor(x); +} +FFX_MIN16_F3 ffxFract(FFX_MIN16_F3 x) +{ + return x - floor(x); +} +FFX_MIN16_F4 ffxFract(FFX_MIN16_F4 x) +{ + return x - floor(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxLerp(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 a) +{ + return lerp(x, y, a); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxMax3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z) +{ + return max(x, max(y, z)); +} +FFX_MIN16_F2 ffxMax3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z) +{ + return max(x, max(y, z)); +} +FFX_MIN16_F3 ffxMax3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z) +{ + return max(x, max(y, z)); +} +FFX_MIN16_F4 ffxMax3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z) +{ + return max(x, max(y, z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxMin3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z) +{ + return min(x, min(y, z)); +} +FFX_MIN16_F2 ffxMin3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z) +{ + return min(x, min(y, z)); +} +FFX_MIN16_F3 ffxMin3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z) +{ + return min(x, min(y, z)); +} +FFX_MIN16_F4 ffxMin3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z) +{ + return min(x, min(y, z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxMed3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_F2 ffxMed3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_F3 ffxMed3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_F4 ffxMed3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_I ffxMed3Half(FFX_MIN16_I x, FFX_MIN16_I y, FFX_MIN16_I z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_I2 ffxMed3Half(FFX_MIN16_I2 x, FFX_MIN16_I2 y, FFX_MIN16_I2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_I3 ffxMed3Half(FFX_MIN16_I3 x, FFX_MIN16_I3 y, FFX_MIN16_I3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_I4 ffxMed3Half(FFX_MIN16_I4 x, FFX_MIN16_I4 y, FFX_MIN16_I4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxReciprocalHalf(FFX_MIN16_F x) +{ + return rcp(x); +} +FFX_MIN16_F2 ffxReciprocalHalf(FFX_MIN16_F2 x) +{ + return rcp(x); +} +FFX_MIN16_F3 ffxReciprocalHalf(FFX_MIN16_F3 x) +{ + return rcp(x); +} +FFX_MIN16_F4 ffxReciprocalHalf(FFX_MIN16_F4 x) +{ + return rcp(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxReciprocalSquareRootHalf(FFX_MIN16_F x) +{ + return rsqrt(x); +} +FFX_MIN16_F2 ffxReciprocalSquareRootHalf(FFX_MIN16_F2 x) +{ + return rsqrt(x); +} +FFX_MIN16_F3 ffxReciprocalSquareRootHalf(FFX_MIN16_F3 x) +{ + return rsqrt(x); +} +FFX_MIN16_F4 ffxReciprocalSquareRootHalf(FFX_MIN16_F4 x) +{ + return rsqrt(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxSaturate(FFX_MIN16_F x) +{ + return saturate(x); +} +FFX_MIN16_F2 ffxSaturate(FFX_MIN16_F2 x) +{ + return saturate(x); +} +FFX_MIN16_F3 ffxSaturate(FFX_MIN16_F3 x) +{ + return saturate(x); +} +FFX_MIN16_F4 ffxSaturate(FFX_MIN16_F4 x) +{ + return saturate(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_U ffxBitShiftRightHalf(FFX_MIN16_U a, FFX_MIN16_U b) +{ + return FFX_MIN16_U(FFX_MIN16_I(a) >> FFX_MIN16_I(b)); +} +FFX_MIN16_U2 ffxBitShiftRightHalf(FFX_MIN16_U2 a, FFX_MIN16_U2 b) +{ + return FFX_MIN16_U2(FFX_MIN16_I2(a) >> FFX_MIN16_I2(b)); +} +FFX_MIN16_U3 ffxBitShiftRightHalf(FFX_MIN16_U3 a, FFX_MIN16_U3 b) +{ + return FFX_MIN16_U3(FFX_MIN16_I3(a) >> FFX_MIN16_I3(b)); +} +FFX_MIN16_U4 ffxBitShiftRightHalf(FFX_MIN16_U4 a, FFX_MIN16_U4 b) +{ + return FFX_MIN16_U4(FFX_MIN16_I4(a) >> FFX_MIN16_I4(b)); +} +#endif // FFX_HALF + +//============================================================================================================================== +// HLSL WAVE +//============================================================================================================================== +#if defined(FFX_WAVE) +// Where 'x' must be a compile time literal. +FfxFloat32 ffxWaveXorF1(FfxFloat32 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxFloat32x2 ffxWaveXorF2(FfxFloat32x2 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxFloat32x3 ffxWaveXorF3(FfxFloat32x3 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxFloat32x4 ffxWaveXorF4(FfxFloat32x4 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32 ffxWaveXorU1(FfxUInt32 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32x2 ffxWaveXorU1(FfxUInt32x2 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32x3 ffxWaveXorU1(FfxUInt32x3 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32x4 ffxWaveXorU1(FfxUInt32x4 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxBoolean ffxWaveIsFirstLane() +{ + return WaveIsFirstLane(); +} +FfxUInt32 ffxWaveLaneIndex() +{ + return WaveGetLaneIndex(); +} +FfxBoolean ffxWaveReadAtLaneIndexB1(FfxBoolean v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, x); +} +FfxUInt32 ffxWavePrefixCountBits(FfxBoolean v) +{ + return WavePrefixCountBits(v); +} +FfxUInt32 ffxWaveActiveCountBits(FfxBoolean v) +{ + return WaveActiveCountBits(v); +} +FfxUInt32 ffxWaveReadLaneFirstU1(FfxUInt32 v) +{ + return WaveReadLaneFirst(v); +} +FfxUInt32x2 ffxWaveReadLaneFirstU2(FfxUInt32x2 v) +{ + return WaveReadLaneFirst(v); +} +FfxBoolean ffxWaveReadLaneFirstB1(FfxBoolean v) +{ + return WaveReadLaneFirst(v); +} +FfxUInt32 ffxWaveOr(FfxUInt32 a) +{ + return WaveActiveBitOr(a); +} +FfxUInt32 ffxWaveMin(FfxUInt32 a) +{ + return WaveActiveMin(a); +} +FfxFloat32 ffxWaveMin(FfxFloat32 a) +{ + return WaveActiveMin(a); +} +FfxUInt32 ffxWaveMax(FfxUInt32 a) +{ + return WaveActiveMax(a); +} +FfxFloat32 ffxWaveMax(FfxFloat32 a) +{ + return WaveActiveMax(a); +} +FfxUInt32 ffxWaveSum(FfxUInt32 a) +{ + return WaveActiveSum(a); +} +FfxFloat32 ffxWaveSum(FfxFloat32 a) +{ + return WaveActiveSum(a); +} +FfxUInt32 ffxWaveLaneCount() +{ + return WaveGetLaneCount(); +} +FfxBoolean ffxWaveAllTrue(FfxBoolean v) +{ + return WaveActiveAllTrue(v); +} +FfxFloat32 ffxQuadReadX(FfxFloat32 v) +{ + return QuadReadAcrossX(v); +} +FfxFloat32x2 ffxQuadReadX(FfxFloat32x2 v) +{ + return QuadReadAcrossX(v); +} +FfxFloat32 ffxQuadReadY(FfxFloat32 v) +{ + return QuadReadAcrossY(v); +} +FfxFloat32x2 ffxQuadReadY(FfxFloat32x2 v) +{ + return QuadReadAcrossY(v); +} + +#if FFX_HALF +FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x) +{ + return FFX_UINT32_TO_FLOAT16X2(WaveReadLaneAt(FFX_FLOAT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x)); +} +FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x) +{ + return FFX_UINT32X2_TO_FLOAT16X4(WaveReadLaneAt(FFX_FLOAT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x)); +} +FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x) +{ + return FFX_UINT32_TO_UINT16X2(WaveReadLaneAt(FFX_UINT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x)); +} +FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x) +{ + return FFX_UINT32X2_TO_UINT16X4(WaveReadLaneAt(FFX_UINT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x)); +} +#endif // FFX_HALF +#endif // #if defined(FFX_WAVE) diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_hlsl.h.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_hlsl.h.meta new file mode 100644 index 0000000..33529b2 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_hlsl.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 17eb39589723b2e4da28b7b868648bb5 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_portability.h b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_portability.h new file mode 100644 index 0000000..12147b9 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_portability.h @@ -0,0 +1,46 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +void ffxOpAAddOneF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d = a + ffxBroadcast3(b); +} + +void ffxOpACpyF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a) +{ + d = a; +} + +void ffxOpAMulF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) +{ + d = a * b; +} + +void ffxOpAMulOneF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d = a * b; +} + +void ffxOpARcpF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a) +{ + d = ffxReciprocal(a); +} diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_portability.h.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_portability.h.meta new file mode 100644 index 0000000..0fb0f51 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/ffx_core_portability.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 44bbc01a8b7e1fd4faf626a0ea016ae3 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1.meta new file mode 100644 index 0000000..c05e372 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: d4c52ea2f2f803f439c092b6bdc94d6a +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1.h b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1.h new file mode 100644 index 0000000..82ebf21 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1.h @@ -0,0 +1,1252 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// @defgroup FfxGPUFsr1 FidelityFX FSR1 +/// FidelityFX Super Resolution 1 GPU documentation +/// +/// @ingroup FfxGPUEffects + +/// Setup required constant values for EASU (works on CPU or GPU). +/// +/// @param [out] con0 +/// @param [out] con1 +/// @param [out] con2 +/// @param [out] con3 +/// @param [in] inputViewportInPixelsX The rendered image resolution being upscaled in X dimension. +/// @param [in] inputViewportInPixelsY The rendered image resolution being upscaled in Y dimension. +/// @param [in] inputSizeInPixelsX The resolution of the resource containing the input image (useful for dynamic resolution) in X dimension. +/// @param [in] inputSizeInPixelsY The resolution of the resource containing the input image (useful for dynamic resolution) in Y dimension. +/// @param [in] outputSizeInPixelsX The display resolution which the input image gets upscaled to in X dimension. +/// @param [in] outputSizeInPixelsY The display resolution which the input image gets upscaled to in Y dimension. +/// +/// @ingroup FfxGPUFsr1 +FFX_STATIC void ffxFsrPopulateEasuConstants( + FFX_PARAMETER_INOUT FfxUInt32x4 con0, + FFX_PARAMETER_INOUT FfxUInt32x4 con1, + FFX_PARAMETER_INOUT FfxUInt32x4 con2, + FFX_PARAMETER_INOUT FfxUInt32x4 con3, + FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsY, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsY, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsY) +{ + // Output integer position to a pixel position in viewport. + con0[0] = ffxAsUInt32(inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX)); + con0[1] = ffxAsUInt32(inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY)); + con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5)); + con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5)); + + // Viewport pixel position to normalized image space. + // This is used to get upper-left of 'F' tap. + con1[0] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsX)); + con1[1] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsY)); + + // Centers of gather4, first offset from upper-left of 'F'. + // +---+---+ + // | | | + // +--(0)--+ + // | b | c | + // +---F---+---+---+ + // | e | f | g | h | + // +--(1)--+--(2)--+ + // | i | j | k | l | + // +---+---+---+---+ + // | n | o | + // +--(3)--+ + // | | | + // +---+---+ + con1[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX)); + con1[3] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsY)); + + // These are from (0) instead of 'F'. + con2[0] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsX)); + con2[1] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY)); + con2[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX)); + con2[3] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY)); + con3[0] = ffxAsUInt32(FfxFloat32(0.0) * ffxReciprocal(inputSizeInPixelsX)); + con3[1] = ffxAsUInt32(FfxFloat32(4.0) * ffxReciprocal(inputSizeInPixelsY)); + con3[2] = con3[3] = 0; +} + +/// Setup required constant values for EASU (works on CPU or GPU). +/// +/// @param [out] con0 +/// @param [out] con1 +/// @param [out] con2 +/// @param [out] con3 +/// @param [in] inputViewportInPixelsX The resolution of the input in the X dimension. +/// @param [in] inputViewportInPixelsY The resolution of the input in the Y dimension. +/// @param [in] inputSizeInPixelsX The input size in pixels in the X dimension. +/// @param [in] inputSizeInPixelsY The input size in pixels in the Y dimension. +/// @param [in] outputSizeInPixelsX The output size in pixels in the X dimension. +/// @param [in] outputSizeInPixelsY The output size in pixels in the Y dimension. +/// @param [in] inputOffsetInPixelsX The input image offset in the X dimension into the resource containing it (useful for dynamic resolution). +/// @param [in] inputOffsetInPixelsY The input image offset in the Y dimension into the resource containing it (useful for dynamic resolution). +/// +/// @ingroup FfxGPUFsr1 +FFX_STATIC void ffxFsrPopulateEasuConstantsOffset( + FFX_PARAMETER_INOUT FfxUInt32x4 con0, + FFX_PARAMETER_INOUT FfxUInt32x4 con1, + FFX_PARAMETER_INOUT FfxUInt32x4 con2, + FFX_PARAMETER_INOUT FfxUInt32x4 con3, + FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsY, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsY, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsY, + FFX_PARAMETER_IN FfxFloat32 inputOffsetInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputOffsetInPixelsY) +{ + ffxFsrPopulateEasuConstants( + con0, + con1, + con2, + con3, + inputViewportInPixelsX, + inputViewportInPixelsY, + inputSizeInPixelsX, + inputSizeInPixelsY, + outputSizeInPixelsX, + outputSizeInPixelsY); + + // override + con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5) + inputOffsetInPixelsX); + con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5) + inputOffsetInPixelsY); +} + +#if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT) +// Input callback prototypes, need to be implemented by calling shader +FfxFloat32x4 FsrEasuRF(FfxFloat32x2 p); +FfxFloat32x4 FsrEasuGF(FfxFloat32x2 p); +FfxFloat32x4 FsrEasuBF(FfxFloat32x2 p); + +// Filtering for a given tap for the scalar. +void fsrEasuTapFloat( + FFX_PARAMETER_INOUT FfxFloat32x3 accumulatedColor, // Accumulated color, with negative lobe. + FFX_PARAMETER_INOUT FfxFloat32 accumulatedWeight, // Accumulated weight. + FFX_PARAMETER_IN FfxFloat32x2 pixelOffset, // Pixel offset from resolve position to tap. + FFX_PARAMETER_IN FfxFloat32x2 gradientDirection, // Gradient direction. + FFX_PARAMETER_IN FfxFloat32x2 length, // Length. + FFX_PARAMETER_IN FfxFloat32 negativeLobeStrength, // Negative lobe strength. + FFX_PARAMETER_IN FfxFloat32 clippingPoint, // Clipping point. + FFX_PARAMETER_IN FfxFloat32x3 color) // Tap color. +{ + // Rotate offset by direction. + FfxFloat32x2 rotatedOffset; + rotatedOffset.x = (pixelOffset.x * (gradientDirection.x)) + (pixelOffset.y * gradientDirection.y); + rotatedOffset.y = (pixelOffset.x * (-gradientDirection.y)) + (pixelOffset.y * gradientDirection.x); + + // Anisotropy. + rotatedOffset *= length; + + // Compute distance^2. + FfxFloat32 distanceSquared = rotatedOffset.x * rotatedOffset.x + rotatedOffset.y * rotatedOffset.y; + + // Limit to the window as at corner, 2 taps can easily be outside. + distanceSquared = ffxMin(distanceSquared, clippingPoint); + + // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x. + // (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2 + // |_______________________________________| |_______________| + // base window + // The general form of the 'base' is, + // (a*(b*x^2-1)^2-(a-1)) + // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe. + FfxFloat32 weightB = FfxFloat32(2.0 / 5.0) * distanceSquared + FfxFloat32(-1.0); + FfxFloat32 weightA = negativeLobeStrength * distanceSquared + FfxFloat32(-1.0); + weightB *= weightB; + weightA *= weightA; + weightB = FfxFloat32(25.0 / 16.0) * weightB + FfxFloat32(-(25.0 / 16.0 - 1.0)); + FfxFloat32 weight = weightB * weightA; + + // Do weighted average. + accumulatedColor += color * weight; + accumulatedWeight += weight; +} + +// Accumulate direction and length. +void fsrEasuSetFloat( + FFX_PARAMETER_INOUT FfxFloat32x2 direction, + FFX_PARAMETER_INOUT FfxFloat32 length, + FFX_PARAMETER_IN FfxFloat32x2 pp, + FFX_PARAMETER_IN FfxBoolean biS, + FFX_PARAMETER_IN FfxBoolean biT, + FFX_PARAMETER_IN FfxBoolean biU, + FFX_PARAMETER_IN FfxBoolean biV, + FFX_PARAMETER_IN FfxFloat32 lA, + FFX_PARAMETER_IN FfxFloat32 lB, + FFX_PARAMETER_IN FfxFloat32 lC, + FFX_PARAMETER_IN FfxFloat32 lD, + FFX_PARAMETER_IN FfxFloat32 lE) +{ + // Compute bilinear weight, branches factor out as predicates are compiler time immediates. + // s t + // u v + FfxFloat32 weight = FfxFloat32(0.0); + if (biS) + weight = (FfxFloat32(1.0) - pp.x) * (FfxFloat32(1.0) - pp.y); + if (biT) + weight = pp.x * (FfxFloat32(1.0) - pp.y); + if (biU) + weight = (FfxFloat32(1.0) - pp.x) * pp.y; + if (biV) + weight = pp.x * pp.y; + + // Direction is the '+' diff. + // a + // b c d + // e + // Then takes magnitude from abs average of both sides of 'c'. + // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms. + FfxFloat32 dc = lD - lC; + FfxFloat32 cb = lC - lB; + FfxFloat32 lengthX = max(abs(dc), abs(cb)); + lengthX = ffxApproximateReciprocal(lengthX); + FfxFloat32 directionX = lD - lB; + direction.x += directionX * weight; + lengthX = ffxSaturate(abs(directionX) * lengthX); + lengthX *= lengthX; + length += lengthX * weight; + + // Repeat for the y axis. + FfxFloat32 ec = lE - lC; + FfxFloat32 ca = lC - lA; + FfxFloat32 lengthY = max(abs(ec), abs(ca)); + lengthY = ffxApproximateReciprocal(lengthY); + FfxFloat32 directionY = lE - lA; + direction.y += directionY * weight; + lengthY = ffxSaturate(abs(directionY) * lengthY); + lengthY *= lengthY; + length += lengthY * weight; +} + +/// Apply edge-aware spatial upsampling using 32bit floating point precision calculations. +/// +/// @param [out] outPixel The computed color of a pixel. +/// @param [in] integerPosition Integer pixel position within the output. +/// @param [in] con0 The first constant value generated by ffxFsrPopulateEasuConstants. +/// @param [in] con1 The second constant value generated by ffxFsrPopulateEasuConstants. +/// @param [in] con2 The third constant value generated by ffxFsrPopulateEasuConstants. +/// @param [in] con3 The fourth constant value generated by ffxFsrPopulateEasuConstants. +/// +/// @ingroup FSR +void ffxFsrEasuFloat( + FFX_PARAMETER_OUT FfxFloat32x3 pix, + FFX_PARAMETER_IN FfxUInt32x2 ip, + FFX_PARAMETER_IN FfxUInt32x4 con0, + FFX_PARAMETER_IN FfxUInt32x4 con1, + FFX_PARAMETER_IN FfxUInt32x4 con2, + FFX_PARAMETER_IN FfxUInt32x4 con3) +{ + // Get position of 'f'. + FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw); + FfxFloat32x2 fp = floor(pp); + pp -= fp; + + // 12-tap kernel. + // b c + // e f g h + // i j k l + // n o + // Gather 4 ordering. + // a b + // r g + // For packed FP16, need either {rg} or {ab} so using the following setup for gather in all versions, + // a b <- unused (z) + // r g + // a b a b + // r g r g + // a b + // r g <- unused (z) + // Allowing dead-code removal to remove the 'z's. + FfxFloat32x2 p0 = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw); + + // These are from p0 to avoid pulling two constants on pre-Navi hardware. + FfxFloat32x2 p1 = p0 + ffxAsFloat(con2.xy); + FfxFloat32x2 p2 = p0 + ffxAsFloat(con2.zw); + FfxFloat32x2 p3 = p0 + ffxAsFloat(con3.xy); + FfxFloat32x4 bczzR = FsrEasuRF(p0); + FfxFloat32x4 bczzG = FsrEasuGF(p0); + FfxFloat32x4 bczzB = FsrEasuBF(p0); + FfxFloat32x4 ijfeR = FsrEasuRF(p1); + FfxFloat32x4 ijfeG = FsrEasuGF(p1); + FfxFloat32x4 ijfeB = FsrEasuBF(p1); + FfxFloat32x4 klhgR = FsrEasuRF(p2); + FfxFloat32x4 klhgG = FsrEasuGF(p2); + FfxFloat32x4 klhgB = FsrEasuBF(p2); + FfxFloat32x4 zzonR = FsrEasuRF(p3); + FfxFloat32x4 zzonG = FsrEasuGF(p3); + FfxFloat32x4 zzonB = FsrEasuBF(p3); + + // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD). + FfxFloat32x4 bczzL = bczzB * ffxBroadcast4(0.5) + (bczzR * ffxBroadcast4(0.5) + bczzG); + FfxFloat32x4 ijfeL = ijfeB * ffxBroadcast4(0.5) + (ijfeR * ffxBroadcast4(0.5) + ijfeG); + FfxFloat32x4 klhgL = klhgB * ffxBroadcast4(0.5) + (klhgR * ffxBroadcast4(0.5) + klhgG); + FfxFloat32x4 zzonL = zzonB * ffxBroadcast4(0.5) + (zzonR * ffxBroadcast4(0.5) + zzonG); + + // Rename. + FfxFloat32 bL = bczzL.x; + FfxFloat32 cL = bczzL.y; + FfxFloat32 iL = ijfeL.x; + FfxFloat32 jL = ijfeL.y; + FfxFloat32 fL = ijfeL.z; + FfxFloat32 eL = ijfeL.w; + FfxFloat32 kL = klhgL.x; + FfxFloat32 lL = klhgL.y; + FfxFloat32 hL = klhgL.z; + FfxFloat32 gL = klhgL.w; + FfxFloat32 oL = zzonL.z; + FfxFloat32 nL = zzonL.w; + + // Accumulate for bilinear interpolation. + FfxFloat32x2 dir = ffxBroadcast2(0.0); + FfxFloat32 len = FfxFloat32(0.0); + fsrEasuSetFloat(dir, len, pp, FFX_TRUE, FFX_FALSE, FFX_FALSE, FFX_FALSE, bL, eL, fL, gL, jL); + fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_TRUE, FFX_FALSE, FFX_FALSE, cL, fL, gL, hL, kL); + fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_FALSE, FFX_TRUE, FFX_FALSE, fL, iL, jL, kL, nL); + fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_FALSE, FFX_FALSE, FFX_TRUE, gL, jL, kL, lL, oL); + + // Normalize with approximation, and cleanup close to zero. + FfxFloat32x2 dir2 = dir * dir; + FfxFloat32 dirR = dir2.x + dir2.y; + FfxBoolean zro = dirR < FfxFloat32(1.0 / 32768.0); + dirR = ffxApproximateReciprocalSquareRoot(dirR); + dirR = zro ? FfxFloat32(1.0) : dirR; + dir.x = zro ? FfxFloat32(1.0) : dir.x; + dir *= ffxBroadcast2(dirR); + + // Transform from {0 to 2} to {0 to 1} range, and shape with square. + len = len * FfxFloat32(0.5); + len *= len; + + // Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}. + FfxFloat32 stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocal(max(abs(dir.x), abs(dir.y))); + + // Anisotropic length after rotation, + // x := 1.0 lerp to 'stretch' on edges + // y := 1.0 lerp to 2x on edges + FfxFloat32x2 len2 = FfxFloat32x2(FfxFloat32(1.0) + (stretch - FfxFloat32(1.0)) * len, FfxFloat32(1.0) + FfxFloat32(-0.5) * len); + + // Based on the amount of 'edge', + // the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}. + FfxFloat32 lob = FfxFloat32(0.5) + FfxFloat32((1.0 / 4.0 - 0.04) - 0.5) * len; + + // Set distance^2 clipping point to the end of the adjustable window. + FfxFloat32 clp = ffxApproximateReciprocal(lob); + + // Accumulation mixed with min/max of 4 nearest. + // b c + // e f g h + // i j k l + // n o + FfxFloat32x3 min4 = + ffxMin(ffxMin3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)), + FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); + FfxFloat32x3 max4 = + max(ffxMax3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)), FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); + + // Accumulation. + FfxFloat32x3 aC = ffxBroadcast3(0.0); + FfxFloat32 aW = FfxFloat32(0.0); + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.x, bczzG.x, bczzB.x)); // b + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.y, bczzG.y, bczzB.y)); // c + fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.x, ijfeG.x, ijfeB.x)); // i + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)); // j + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z)); // f + fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.w, ijfeG.w, ijfeB.w)); // e + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); // k + fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.y, klhgG.y, klhgB.y)); // l + fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.z, klhgG.z, klhgB.z)); // h + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w)); // g + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.z, zzonG.z, zzonB.z)); // o + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.w, zzonG.w, zzonB.w)); // n + + // Normalize and dering. + pix = ffxMin(max4, max(min4, aC * ffxBroadcast3(ffxReciprocal(aW)))); +} +#endif // #if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT) + +#if defined(FFX_GPU) && FFX_HALF == 1 && defined(FFX_FSR_EASU_HALF) +// Input callback prototypes, need to be implemented by calling shader +FfxFloat16x4 FsrEasuRH(FfxFloat32x2 p); +FfxFloat16x4 FsrEasuGH(FfxFloat32x2 p); +FfxFloat16x4 FsrEasuBH(FfxFloat32x2 p); + +// This runs 2 taps in parallel. +void FsrEasuTapH( + FFX_PARAMETER_INOUT FfxFloat16x2 aCR, + FFX_PARAMETER_INOUT FfxFloat16x2 aCG, + FFX_PARAMETER_INOUT FfxFloat16x2 aCB, + FFX_PARAMETER_INOUT FfxFloat16x2 aW, + FFX_PARAMETER_IN FfxFloat16x2 offX, + FFX_PARAMETER_IN FfxFloat16x2 offY, + FFX_PARAMETER_IN FfxFloat16x2 dir, + FFX_PARAMETER_IN FfxFloat16x2 len, + FFX_PARAMETER_IN FfxFloat16 lob, + FFX_PARAMETER_IN FfxFloat16 clp, + FFX_PARAMETER_IN FfxFloat16x2 cR, + FFX_PARAMETER_IN FfxFloat16x2 cG, + FFX_PARAMETER_IN FfxFloat16x2 cB) +{ + FfxFloat16x2 vX, vY; + vX = offX * dir.xx + offY * dir.yy; + vY = offX * (-dir.yy) + offY * dir.xx; + vX *= len.x; + vY *= len.y; + FfxFloat16x2 d2 = vX * vX + vY * vY; + d2 = min(d2, FFX_BROADCAST_FLOAT16X2(clp)); + FfxFloat16x2 wB = FFX_BROADCAST_FLOAT16X2(2.0 / 5.0) * d2 + FFX_BROADCAST_FLOAT16X2(-1.0); + FfxFloat16x2 wA = FFX_BROADCAST_FLOAT16X2(lob) * d2 + FFX_BROADCAST_FLOAT16X2(-1.0); + wB *= wB; + wA *= wA; + wB = FFX_BROADCAST_FLOAT16X2(25.0 / 16.0) * wB + FFX_BROADCAST_FLOAT16X2(-(25.0 / 16.0 - 1.0)); + FfxFloat16x2 w = wB * wA; + aCR += cR * w; + aCG += cG * w; + aCB += cB * w; + aW += w; +} + +// This runs 2 taps in parallel. +void FsrEasuSetH( + FFX_PARAMETER_INOUT FfxFloat16x2 dirPX, + FFX_PARAMETER_INOUT FfxFloat16x2 dirPY, + FFX_PARAMETER_INOUT FfxFloat16x2 lenP, + FFX_PARAMETER_IN FfxFloat16x2 pp, + FFX_PARAMETER_IN FfxBoolean biST, + FFX_PARAMETER_IN FfxBoolean biUV, + FFX_PARAMETER_IN FfxFloat16x2 lA, + FFX_PARAMETER_IN FfxFloat16x2 lB, + FFX_PARAMETER_IN FfxFloat16x2 lC, + FFX_PARAMETER_IN FfxFloat16x2 lD, + FFX_PARAMETER_IN FfxFloat16x2 lE) +{ + FfxFloat16x2 w = FFX_BROADCAST_FLOAT16X2(0.0); + + if (biST) + w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFX_BROADCAST_FLOAT16X2(FFX_BROADCAST_FLOAT16(1.0) - pp.y); + + if (biUV) + w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFX_BROADCAST_FLOAT16X2(pp.y); + + // ABS is not free in the packed FP16 path. + FfxFloat16x2 dc = lD - lC; + FfxFloat16x2 cb = lC - lB; + FfxFloat16x2 lenX = max(abs(dc), abs(cb)); + lenX = ffxReciprocalHalf(lenX); + + FfxFloat16x2 dirX = lD - lB; + dirPX += dirX * w; + lenX = ffxSaturate(abs(dirX) * lenX); + lenX *= lenX; + lenP += lenX * w; + FfxFloat16x2 ec = lE - lC; + FfxFloat16x2 ca = lC - lA; + FfxFloat16x2 lenY = max(abs(ec), abs(ca)); + lenY = ffxReciprocalHalf(lenY); + FfxFloat16x2 dirY = lE - lA; + dirPY += dirY * w; + lenY = ffxSaturate(abs(dirY) * lenY); + lenY *= lenY; + lenP += lenY * w; +} + +void FsrEasuH( + FFX_PARAMETER_OUT FfxFloat16x3 pix, + FFX_PARAMETER_IN FfxUInt32x2 ip, + FFX_PARAMETER_IN FfxUInt32x4 con0, + FFX_PARAMETER_IN FfxUInt32x4 con1, + FFX_PARAMETER_IN FfxUInt32x4 con2, + FFX_PARAMETER_IN FfxUInt32x4 con3) +{ + FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw); + FfxFloat32x2 fp = floor(pp); + pp -= fp; + FfxFloat16x2 ppp = FfxFloat16x2(pp); + + FfxFloat32x2 p0 = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw); + FfxFloat32x2 p1 = p0 + ffxAsFloat(con2.xy); + FfxFloat32x2 p2 = p0 + ffxAsFloat(con2.zw); + FfxFloat32x2 p3 = p0 + ffxAsFloat(con3.xy); + FfxFloat16x4 bczzR = FsrEasuRH(p0); + FfxFloat16x4 bczzG = FsrEasuGH(p0); + FfxFloat16x4 bczzB = FsrEasuBH(p0); + FfxFloat16x4 ijfeR = FsrEasuRH(p1); + FfxFloat16x4 ijfeG = FsrEasuGH(p1); + FfxFloat16x4 ijfeB = FsrEasuBH(p1); + FfxFloat16x4 klhgR = FsrEasuRH(p2); + FfxFloat16x4 klhgG = FsrEasuGH(p2); + FfxFloat16x4 klhgB = FsrEasuBH(p2); + FfxFloat16x4 zzonR = FsrEasuRH(p3); + FfxFloat16x4 zzonG = FsrEasuGH(p3); + FfxFloat16x4 zzonB = FsrEasuBH(p3); + + FfxFloat16x4 bczzL = bczzB * FFX_BROADCAST_FLOAT16X4(0.5) + (bczzR * FFX_BROADCAST_FLOAT16X4(0.5) + bczzG); + FfxFloat16x4 ijfeL = ijfeB * FFX_BROADCAST_FLOAT16X4(0.5) + (ijfeR * FFX_BROADCAST_FLOAT16X4(0.5) + ijfeG); + FfxFloat16x4 klhgL = klhgB * FFX_BROADCAST_FLOAT16X4(0.5) + (klhgR * FFX_BROADCAST_FLOAT16X4(0.5) + klhgG); + FfxFloat16x4 zzonL = zzonB * FFX_BROADCAST_FLOAT16X4(0.5) + (zzonR * FFX_BROADCAST_FLOAT16X4(0.5) + zzonG); + FfxFloat16 bL = bczzL.x; + FfxFloat16 cL = bczzL.y; + FfxFloat16 iL = ijfeL.x; + FfxFloat16 jL = ijfeL.y; + FfxFloat16 fL = ijfeL.z; + FfxFloat16 eL = ijfeL.w; + FfxFloat16 kL = klhgL.x; + FfxFloat16 lL = klhgL.y; + FfxFloat16 hL = klhgL.z; + FfxFloat16 gL = klhgL.w; + FfxFloat16 oL = zzonL.z; + FfxFloat16 nL = zzonL.w; + + // This part is different, accumulating 2 taps in parallel. + FfxFloat16x2 dirPX = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 dirPY = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 lenP = FFX_BROADCAST_FLOAT16X2(0.0); + FsrEasuSetH(dirPX, + dirPY, + lenP, + ppp, + FfxBoolean(true), + FfxBoolean(false), + FfxFloat16x2(bL, cL), + FfxFloat16x2(eL, fL), + FfxFloat16x2(fL, gL), + FfxFloat16x2(gL, hL), + FfxFloat16x2(jL, kL)); + FsrEasuSetH(dirPX, + dirPY, + lenP, + ppp, + FfxBoolean(false), + FfxBoolean(true), + FfxFloat16x2(fL, gL), + FfxFloat16x2(iL, jL), + FfxFloat16x2(jL, kL), + FfxFloat16x2(kL, lL), + FfxFloat16x2(nL, oL)); + FfxFloat16x2 dir = FfxFloat16x2(dirPX.r + dirPX.g, dirPY.r + dirPY.g); + FfxFloat16 len = lenP.r + lenP.g; + + FfxFloat16x2 dir2 = dir * dir; + FfxFloat16 dirR = dir2.x + dir2.y; + FfxUInt32 zro = FfxUInt32(dirR < FFX_BROADCAST_FLOAT16(1.0 / 32768.0)); + dirR = ffxApproximateReciprocalSquareRootHalf(dirR); + dirR = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dirR; + dir.x = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dir.x; + dir *= FFX_BROADCAST_FLOAT16X2(dirR); + len = len * FFX_BROADCAST_FLOAT16(0.5); + len *= len; + FfxFloat16 stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocalHalf(max(abs(dir.x), abs(dir.y))); + FfxFloat16x2 len2 = + FfxFloat16x2(FFX_BROADCAST_FLOAT16(1.0) + (stretch - FFX_BROADCAST_FLOAT16(1.0)) * len, FFX_BROADCAST_FLOAT16(1.0) + FFX_BROADCAST_FLOAT16(-0.5) * len); + FfxFloat16 lob = FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16((1.0 / 4.0 - 0.04) - 0.5) * len; + FfxFloat16 clp = ffxApproximateReciprocalHalf(lob); + + // FP16 is different, using packed trick to do min and max in same operation. + FfxFloat16x2 bothR = + max(max(FfxFloat16x2(-ijfeR.z, ijfeR.z), FfxFloat16x2(-klhgR.w, klhgR.w)), max(FfxFloat16x2(-ijfeR.y, ijfeR.y), FfxFloat16x2(-klhgR.x, klhgR.x))); + FfxFloat16x2 bothG = + max(max(FfxFloat16x2(-ijfeG.z, ijfeG.z), FfxFloat16x2(-klhgG.w, klhgG.w)), max(FfxFloat16x2(-ijfeG.y, ijfeG.y), FfxFloat16x2(-klhgG.x, klhgG.x))); + FfxFloat16x2 bothB = + max(max(FfxFloat16x2(-ijfeB.z, ijfeB.z), FfxFloat16x2(-klhgB.w, klhgB.w)), max(FfxFloat16x2(-ijfeB.y, ijfeB.y), FfxFloat16x2(-klhgB.x, klhgB.x))); + + // This part is different for FP16, working pairs of taps at a time. + FfxFloat16x2 pR = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 pG = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 pB = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 pW = FFX_BROADCAST_FLOAT16X2(0.0); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, 1.0) - ppp.xx, FfxFloat16x2(-1.0, -1.0) - ppp.yy, dir, len2, lob, clp, bczzR.xy, bczzG.xy, bczzB.xy); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(-1.0, 0.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, ijfeR.xy, ijfeG.xy, ijfeB.xy); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, -1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, ijfeR.zw, ijfeG.zw, ijfeB.zw); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 2.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, klhgR.xy, klhgG.xy, klhgB.xy); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(2.0, 1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, klhgR.zw, klhgG.zw, klhgB.zw); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 0.0) - ppp.xx, FfxFloat16x2(2.0, 2.0) - ppp.yy, dir, len2, lob, clp, zzonR.zw, zzonG.zw, zzonB.zw); + FfxFloat16x3 aC = FfxFloat16x3(pR.x + pR.y, pG.x + pG.y, pB.x + pB.y); + FfxFloat16 aW = pW.x + pW.y; + + // Slightly different for FP16 version due to combined min and max. + pix = min(FfxFloat16x3(bothR.y, bothG.y, bothB.y), max(-FfxFloat16x3(bothR.x, bothG.x, bothB.x), aC * FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(aW)))); +} +#endif // #if defined(FFX_GPU) && defined(FFX_HALF) && defined(FFX_FSR_EASU_HALF) + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [RCAS] ROBUST CONTRAST ADAPTIVE SHARPENING +// +//------------------------------------------------------------------------------------------------------------------------------ +// CAS uses a simplified mechanism to convert local contrast into a variable amount of sharpness. +// RCAS uses a more exact mechanism, solving for the maximum local sharpness possible before clipping. +// RCAS also has a built in process to limit sharpening of what it detects as possible noise. +// RCAS sharper does not support scaling, as it should be applied after EASU scaling. +// Pass EASU output straight into RCAS, no color conversions necessary. +//------------------------------------------------------------------------------------------------------------------------------ +// RCAS is based on the following logic. +// RCAS uses a 5 tap filter in a cross pattern (same as CAS), +// w n +// w 1 w for taps w m e +// w s +// Where 'w' is the negative lobe weight. +// output = (w*(n+e+w+s)+m)/(4*w+1) +// RCAS solves for 'w' by seeing where the signal might clip out of the {0 to 1} input range, +// 0 == (w*(n+e+w+s)+m)/(4*w+1) -> w = -m/(n+e+w+s) +// 1 == (w*(n+e+w+s)+m)/(4*w+1) -> w = (1-m)/(n+e+w+s-4*1) +// Then chooses the 'w' which results in no clipping, limits 'w', and multiplies by the 'sharp' amount. +// This solution above has issues with MSAA input as the steps along the gradient cause edge detection issues. +// So RCAS uses 4x the maximum and 4x the minimum (depending on equation)in place of the individual taps. +// As well as switching from 'm' to either the minimum or maximum (depending on side), to help in energy conservation. +// This stabilizes RCAS. +// RCAS does a simple highpass which is normalized against the local contrast then shaped, +// 0.25 +// 0.25 -1 0.25 +// 0.25 +// This is used as a noise detection filter, to reduce the effect of RCAS on grain, and focus on real edges. +// +// GLSL example for the required callbacks : +// +// FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p){return FfxFloat16x4(imageLoad(imgSrc,FfxInt32x2(p)));} +// void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b) +// { +// //do any simple input color conversions here or leave empty if none needed +// } +// +// FsrRcasCon need to be called from the CPU or GPU to set up constants. +// Including a GPU example here, the 'con' value would be stored out to a constant buffer. +// +// FfxUInt32x4 con; +// FsrRcasCon(con, +// 0.0); // The scale is {0.0 := maximum sharpness, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. +// --------------- +// RCAS sharpening supports a CAS-like pass-through alpha via, +// #define FSR_RCAS_PASSTHROUGH_ALPHA 1 +// RCAS also supports a define to enable a more expensive path to avoid some sharpening of noise. +// Would suggest it is better to apply film grain after RCAS sharpening (and after scaling) instead of using this define, +// #define FSR_RCAS_DENOISE 1 +//============================================================================================================================== +// This is set at the limit of providing unnatural results for sharpening. +#define FSR_RCAS_LIMIT (0.25-(1.0/16.0)) +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// CONSTANT SETUP +//============================================================================================================================== +// Call to setup required constant values (works on CPU or GPU). + FFX_STATIC void FsrRcasCon(FfxUInt32x4 con, + // The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. + FfxFloat32 sharpness) + { + // Transform from stops to linear value. + sharpness = exp2(-sharpness); + FfxFloat32x2 hSharp = {sharpness, sharpness}; + con[0] = ffxAsUInt32(sharpness); + con[1] = ffxPackHalf2x16(hSharp); + con[2] = 0; + con[3] = 0; + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// NON-PACKED 32-BIT VERSION +//============================================================================================================================== +#if defined(FFX_GPU)&&defined(FSR_RCAS_F) + // Input callback prototypes that need to be implemented by calling shader + FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p); + void FsrRcasInputF(inout FfxFloat32 r,inout FfxFloat32 g,inout FfxFloat32 b); +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasF(out FfxFloat32 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. + out FfxFloat32 pixG, + out FfxFloat32 pixB, +#ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out FfxFloat32 pixA, +#endif + FfxUInt32x2 ip, // Integer pixel position in output. + FfxUInt32x4 con) + { // Constant generated by RcasSetup(). + // Algorithm uses minimal 3x3 pixel neighborhood. + // b + // d e f + // h + FfxInt32x2 sp = FfxInt32x2(ip); + FfxFloat32x3 b = FsrRcasLoadF(sp + FfxInt32x2(0, -1)).rgb; + FfxFloat32x3 d = FsrRcasLoadF(sp + FfxInt32x2(-1, 0)).rgb; +#ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat32x4 ee = FsrRcasLoadF(sp); + FfxFloat32x3 e = ee.rgb; + pixA = ee.a; +#else + FfxFloat32x3 e = FsrRcasLoadF(sp).rgb; +#endif + FfxFloat32x3 f = FsrRcasLoadF(sp + FfxInt32x2(1, 0)).rgb; + FfxFloat32x3 h = FsrRcasLoadF(sp + FfxInt32x2(0, 1)).rgb; + // Rename (32-bit) or regroup (16-bit). + FfxFloat32 bR = b.r; + FfxFloat32 bG = b.g; + FfxFloat32 bB = b.b; + FfxFloat32 dR = d.r; + FfxFloat32 dG = d.g; + FfxFloat32 dB = d.b; + FfxFloat32 eR = e.r; + FfxFloat32 eG = e.g; + FfxFloat32 eB = e.b; + FfxFloat32 fR = f.r; + FfxFloat32 fG = f.g; + FfxFloat32 fB = f.b; + FfxFloat32 hR = h.r; + FfxFloat32 hG = h.g; + FfxFloat32 hB = h.b; + // Run optional input transform. + FsrRcasInputF(bR, bG, bB); + FsrRcasInputF(dR, dG, dB); + FsrRcasInputF(eR, eG, eB); + FsrRcasInputF(fR, fG, fB); + FsrRcasInputF(hR, hG, hB); + // Luma times 2. + FfxFloat32 bL = bB * FfxFloat32(0.5) + (bR * FfxFloat32(0.5) + bG); + FfxFloat32 dL = dB * FfxFloat32(0.5) + (dR * FfxFloat32(0.5) + dG); + FfxFloat32 eL = eB * FfxFloat32(0.5) + (eR * FfxFloat32(0.5) + eG); + FfxFloat32 fL = fB * FfxFloat32(0.5) + (fR * FfxFloat32(0.5) + fG); + FfxFloat32 hL = hB * FfxFloat32(0.5) + (hR * FfxFloat32(0.5) + hG); + // Noise detection. + FfxFloat32 nz = FfxFloat32(0.25) * bL + FfxFloat32(0.25) * dL + FfxFloat32(0.25) * fL + FfxFloat32(0.25) * hL - eL; + nz = ffxSaturate(abs(nz) * ffxApproximateReciprocalMedium(ffxMax3(ffxMax3(bL, dL, eL), fL, hL) - ffxMin3(ffxMin3(bL, dL, eL), fL, hL))); + nz = FfxFloat32(-0.5) * nz + FfxFloat32(1.0); + // Min and max of ring. + FfxFloat32 mn4R = ffxMin(ffxMin3(bR, dR, fR), hR); + FfxFloat32 mn4G = ffxMin(ffxMin3(bG, dG, fG), hG); + FfxFloat32 mn4B = ffxMin(ffxMin3(bB, dB, fB), hB); + FfxFloat32 mx4R = max(ffxMax3(bR, dR, fR), hR); + FfxFloat32 mx4G = max(ffxMax3(bG, dG, fG), hG); + FfxFloat32 mx4B = max(ffxMax3(bB, dB, fB), hB); + // Immediate constants for peak range. + FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0); + // Limiters, these need to be high precision RCPs. + FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R); + FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G); + FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B); + FfxFloat32 hitMaxR = (peakC.x - mx4R) * ffxReciprocal(FfxFloat32(4.0) * mn4R + peakC.y); + FfxFloat32 hitMaxG = (peakC.x - mx4G) * ffxReciprocal(FfxFloat32(4.0) * mn4G + peakC.y); + FfxFloat32 hitMaxB = (peakC.x - mx4B) * ffxReciprocal(FfxFloat32(4.0) * mn4B + peakC.y); + FfxFloat32 lobeR = max(-hitMinR, hitMaxR); + FfxFloat32 lobeG = max(-hitMinG, hitMaxG); + FfxFloat32 lobeB = max(-hitMinB, hitMaxB); + FfxFloat32 lobe = max(FfxFloat32(-FSR_RCAS_LIMIT), ffxMin(ffxMax3(lobeR, lobeG, lobeB), FfxFloat32(0.0))) * ffxAsFloat + (con.x); + // Apply noise removal. +#ifdef FSR_RCAS_DENOISE + lobe *= nz; +#endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + FfxFloat32 rcpL = ffxApproximateReciprocalMedium(FfxFloat32(4.0) * lobe + FfxFloat32(1.0)); + pixR = (lobe * bR + lobe * dR + lobe * hR + lobe * fR + eR) * rcpL; + pixG = (lobe * bG + lobe * dG + lobe * hG + lobe * fG + eG) * rcpL; + pixB = (lobe * bB + lobe * dB + lobe * hB + lobe * fB + eB) * rcpL; + return; + } +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// NON-PACKED 16-BIT VERSION +//============================================================================================================================== +#if defined(FFX_GPU) && FFX_HALF == 1 && defined(FSR_RCAS_H) + // Input callback prototypes that need to be implemented by calling shader + FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p); + void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b); +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasH( + out FfxFloat16 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. + out FfxFloat16 pixG, + out FfxFloat16 pixB, + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out FfxFloat16 pixA, + #endif + FfxUInt32x2 ip, // Integer pixel position in output. + FfxUInt32x4 con){ // Constant generated by RcasSetup(). + // Sharpening algorithm uses minimal 3x3 pixel neighborhood. + // b + // d e f + // h + FfxInt16x2 sp=FfxInt16x2(ip); + FfxFloat16x3 b=FsrRcasLoadH(sp+FfxInt16x2( 0,-1)).rgb; + FfxFloat16x3 d=FsrRcasLoadH(sp+FfxInt16x2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat16x4 ee=FsrRcasLoadH(sp); + FfxFloat16x3 e=ee.rgb;pixA=ee.a; + #else + FfxFloat16x3 e=FsrRcasLoadH(sp).rgb; + #endif + FfxFloat16x3 f=FsrRcasLoadH(sp+FfxInt16x2( 1, 0)).rgb; + FfxFloat16x3 h=FsrRcasLoadH(sp+FfxInt16x2( 0, 1)).rgb; + // Rename (32-bit) or regroup (16-bit). + FfxFloat16 bR=b.r; + FfxFloat16 bG=b.g; + FfxFloat16 bB=b.b; + FfxFloat16 dR=d.r; + FfxFloat16 dG=d.g; + FfxFloat16 dB=d.b; + FfxFloat16 eR=e.r; + FfxFloat16 eG=e.g; + FfxFloat16 eB=e.b; + FfxFloat16 fR=f.r; + FfxFloat16 fG=f.g; + FfxFloat16 fB=f.b; + FfxFloat16 hR=h.r; + FfxFloat16 hG=h.g; + FfxFloat16 hB=h.b; + // Run optional input transform. + FsrRcasInputH(bR,bG,bB); + FsrRcasInputH(dR,dG,dB); + FsrRcasInputH(eR,eG,eB); + FsrRcasInputH(fR,fG,fB); + FsrRcasInputH(hR,hG,hB); + // Luma times 2. + FfxFloat16 bL=bB*FFX_BROADCAST_FLOAT16(0.5)+(bR*FFX_BROADCAST_FLOAT16(0.5)+bG); + FfxFloat16 dL=dB*FFX_BROADCAST_FLOAT16(0.5)+(dR*FFX_BROADCAST_FLOAT16(0.5)+dG); + FfxFloat16 eL=eB*FFX_BROADCAST_FLOAT16(0.5)+(eR*FFX_BROADCAST_FLOAT16(0.5)+eG); + FfxFloat16 fL=fB*FFX_BROADCAST_FLOAT16(0.5)+(fR*FFX_BROADCAST_FLOAT16(0.5)+fG); + FfxFloat16 hL=hB*FFX_BROADCAST_FLOAT16(0.5)+(hR*FFX_BROADCAST_FLOAT16(0.5)+hG); + // Noise detection. + FfxFloat16 nz=FFX_BROADCAST_FLOAT16(0.25)*bL+FFX_BROADCAST_FLOAT16(0.25)*dL+FFX_BROADCAST_FLOAT16(0.25)*fL+FFX_BROADCAST_FLOAT16(0.25)*hL-eL; + nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL))); + nz=FFX_BROADCAST_FLOAT16(-0.5)*nz+FFX_BROADCAST_FLOAT16(1.0); + // Min and max of ring. + FfxFloat16 mn4R=min(ffxMin3Half(bR,dR,fR),hR); + FfxFloat16 mn4G=min(ffxMin3Half(bG,dG,fG),hG); + FfxFloat16 mn4B=min(ffxMin3Half(bB,dB,fB),hB); + FfxFloat16 mx4R=max(ffxMax3Half(bR,dR,fR),hR); + FfxFloat16 mx4G=max(ffxMax3Half(bG,dG,fG),hG); + FfxFloat16 mx4B=max(ffxMax3Half(bB,dB,fB),hB); + // Immediate constants for peak range. + FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); + // Limiters, these need to be high precision RCPs. + FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R); + FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G); + FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B); + FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y); + FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y); + FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y); + FfxFloat16 lobeR=max(-hitMinR,hitMaxR); + FfxFloat16 lobeG=max(-hitMinG,hitMaxG); + FfxFloat16 lobeB=max(-hitMinB,hitMaxB); + FfxFloat16 lobe=max(FFX_BROADCAST_FLOAT16(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFX_BROADCAST_FLOAT16(0.0)))*FFX_UINT32_TO_FLOAT16X2(con.y).x; + // Apply noise removal. + #ifdef FSR_RCAS_DENOISE + lobe*=nz; + #endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + FfxFloat16 rcpL=ffxApproximateReciprocalMediumHalf(FFX_BROADCAST_FLOAT16(4.0)*lobe+FFX_BROADCAST_FLOAT16(1.0)); + pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; + pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; + pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL; +} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// PACKED 16-BIT VERSION +//============================================================================================================================== +#if defined(FFX_GPU)&& FFX_HALF == 1 && defined(FSR_RCAS_HX2) + // Input callback prototypes that need to be implemented by the calling shader + FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p); + void FsrRcasInputHx2(inout FfxFloat16x2 r,inout FfxFloat16x2 g,inout FfxFloat16x2 b); +//------------------------------------------------------------------------------------------------------------------------------ + // Can be used to convert from packed Structures of Arrays to Arrays of Structures for store. + void FsrRcasDepackHx2(out FfxFloat16x4 pix0,out FfxFloat16x4 pix1,FfxFloat16x2 pixR,FfxFloat16x2 pixG,FfxFloat16x2 pixB){ + #ifdef FFX_HLSL + // Invoke a slower path for DX only, since it won't allow uninitialized values. + pix0.a=pix1.a=0.0; + #endif + pix0.rgb=FfxFloat16x3(pixR.x,pixG.x,pixB.x); + pix1.rgb=FfxFloat16x3(pixR.y,pixG.y,pixB.y);} +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasHx2( + // Output values are for 2 8x8 tiles in a 16x8 region. + // pix.x = left 8x8 tile + // pix.y = right 8x8 tile + // This enables later processing to easily be packed as well. + out FfxFloat16x2 pixR, + out FfxFloat16x2 pixG, + out FfxFloat16x2 pixB, + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out FfxFloat16x2 pixA, + #endif + FfxUInt32x2 ip, // Integer pixel position in output. + FfxUInt32x4 con){ // Constant generated by RcasSetup(). + // No scaling algorithm uses minimal 3x3 pixel neighborhood. + FfxInt16x2 sp0=FfxInt16x2(ip); + FfxFloat16x3 b0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0,-1)).rgb; + FfxFloat16x3 d0=FsrRcasLoadHx2(sp0+FfxInt16x2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat16x4 ee0=FsrRcasLoadHx2(sp0); + FfxFloat16x3 e0=ee0.rgb;pixA.r=ee0.a; + #else + FfxFloat16x3 e0=FsrRcasLoadHx2(sp0).rgb; + #endif + FfxFloat16x3 f0=FsrRcasLoadHx2(sp0+FfxInt16x2( 1, 0)).rgb; + FfxFloat16x3 h0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0, 1)).rgb; + FfxInt16x2 sp1=sp0+FfxInt16x2(8,0); + FfxFloat16x3 b1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0,-1)).rgb; + FfxFloat16x3 d1=FsrRcasLoadHx2(sp1+FfxInt16x2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat16x4 ee1=FsrRcasLoadHx2(sp1); + FfxFloat16x3 e1=ee1.rgb;pixA.g=ee1.a; + #else + FfxFloat16x3 e1=FsrRcasLoadHx2(sp1).rgb; + #endif + FfxFloat16x3 f1=FsrRcasLoadHx2(sp1+FfxInt16x2( 1, 0)).rgb; + FfxFloat16x3 h1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0, 1)).rgb; + // Arrays of Structures to Structures of Arrays conversion. + FfxFloat16x2 bR=FfxFloat16x2(b0.r,b1.r); + FfxFloat16x2 bG=FfxFloat16x2(b0.g,b1.g); + FfxFloat16x2 bB=FfxFloat16x2(b0.b,b1.b); + FfxFloat16x2 dR=FfxFloat16x2(d0.r,d1.r); + FfxFloat16x2 dG=FfxFloat16x2(d0.g,d1.g); + FfxFloat16x2 dB=FfxFloat16x2(d0.b,d1.b); + FfxFloat16x2 eR=FfxFloat16x2(e0.r,e1.r); + FfxFloat16x2 eG=FfxFloat16x2(e0.g,e1.g); + FfxFloat16x2 eB=FfxFloat16x2(e0.b,e1.b); + FfxFloat16x2 fR=FfxFloat16x2(f0.r,f1.r); + FfxFloat16x2 fG=FfxFloat16x2(f0.g,f1.g); + FfxFloat16x2 fB=FfxFloat16x2(f0.b,f1.b); + FfxFloat16x2 hR=FfxFloat16x2(h0.r,h1.r); + FfxFloat16x2 hG=FfxFloat16x2(h0.g,h1.g); + FfxFloat16x2 hB=FfxFloat16x2(h0.b,h1.b); + // Run optional input transform. + FsrRcasInputHx2(bR,bG,bB); + FsrRcasInputHx2(dR,dG,dB); + FsrRcasInputHx2(eR,eG,eB); + FsrRcasInputHx2(fR,fG,fB); + FsrRcasInputHx2(hR,hG,hB); + // Luma times 2. + FfxFloat16x2 bL=bB*FFX_BROADCAST_FLOAT16X2(0.5)+(bR*FFX_BROADCAST_FLOAT16X2(0.5)+bG); + FfxFloat16x2 dL=dB*FFX_BROADCAST_FLOAT16X2(0.5)+(dR*FFX_BROADCAST_FLOAT16X2(0.5)+dG); + FfxFloat16x2 eL=eB*FFX_BROADCAST_FLOAT16X2(0.5)+(eR*FFX_BROADCAST_FLOAT16X2(0.5)+eG); + FfxFloat16x2 fL=fB*FFX_BROADCAST_FLOAT16X2(0.5)+(fR*FFX_BROADCAST_FLOAT16X2(0.5)+fG); + FfxFloat16x2 hL=hB*FFX_BROADCAST_FLOAT16X2(0.5)+(hR*FFX_BROADCAST_FLOAT16X2(0.5)+hG); + // Noise detection. + FfxFloat16x2 nz=FFX_BROADCAST_FLOAT16X2(0.25)*bL+FFX_BROADCAST_FLOAT16X2(0.25)*dL+FFX_BROADCAST_FLOAT16X2(0.25)*fL+FFX_BROADCAST_FLOAT16X2(0.25)*hL-eL; + nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL))); + nz=FFX_BROADCAST_FLOAT16X2(-0.5)*nz+FFX_BROADCAST_FLOAT16X2(1.0); + // Min and max of ring. + FfxFloat16x2 mn4R=min(ffxMin3Half(bR,dR,fR),hR); + FfxFloat16x2 mn4G=min(ffxMin3Half(bG,dG,fG),hG); + FfxFloat16x2 mn4B=min(ffxMin3Half(bB,dB,fB),hB); + FfxFloat16x2 mx4R=max(ffxMax3Half(bR,dR,fR),hR); + FfxFloat16x2 mx4G=max(ffxMax3Half(bG,dG,fG),hG); + FfxFloat16x2 mx4B=max(ffxMax3Half(bB,dB,fB),hB); + // Immediate constants for peak range. + FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); + // Limiters, these need to be high precision RCPs. + FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R); + FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G); + FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B); + FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y); + FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y); + FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y); + FfxFloat16x2 lobeR=max(-hitMinR,hitMaxR); + FfxFloat16x2 lobeG=max(-hitMinG,hitMaxG); + FfxFloat16x2 lobeB=max(-hitMinB,hitMaxB); + FfxFloat16x2 lobe=max(FFX_BROADCAST_FLOAT16X2(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFX_BROADCAST_FLOAT16X2(0.0)))*FFX_BROADCAST_FLOAT16X2(FFX_UINT32_TO_FLOAT16X2(con.y).x); + // Apply noise removal. + #ifdef FSR_RCAS_DENOISE + lobe*=nz; + #endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + FfxFloat16x2 rcpL=ffxApproximateReciprocalMediumHalf(FFX_BROADCAST_FLOAT16X2(4.0)*lobe+FFX_BROADCAST_FLOAT16X2(1.0)); + pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; + pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; + pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [LFGA] LINEAR FILM GRAIN APPLICATOR +// +//------------------------------------------------------------------------------------------------------------------------------ +// Adding output-resolution film grain after scaling is a good way to mask both rendering and scaling artifacts. +// Suggest using tiled blue noise as film grain input, with peak noise frequency set for a specific look and feel. +// The 'Lfga*()' functions provide a convenient way to introduce grain. +// These functions limit grain based on distance to signal limits. +// This is done so that the grain is temporally energy preserving, and thus won't modify image tonality. +// Grain application should be done in a linear colorspace. +// The grain should be temporally changing, but have a temporal sum per pixel that adds to zero (non-biased). +//------------------------------------------------------------------------------------------------------------------------------ +// Usage, +// FsrLfga*( +// color, // In/out linear colorspace color {0 to 1} ranged. +// grain, // Per pixel grain texture value {-0.5 to 0.5} ranged, input is 3-channel to support colored grain. +// amount); // Amount of grain (0 to 1} ranged. +//------------------------------------------------------------------------------------------------------------------------------ +// Example if grain texture is monochrome: 'FsrLfgaF(color,ffxBroadcast3(grain),amount)' +//============================================================================================================================== +#if defined(FFX_GPU) + // Maximum grain is the minimum distance to the signal limit. + void FsrLfgaF(inout FfxFloat32x3 c, FfxFloat32x3 t, FfxFloat32 a) + { + c += (t * ffxBroadcast3(a)) * ffxMin(ffxBroadcast3(1.0) - c, c); + } +#endif +//============================================================================================================================== +#if defined(FFX_GPU)&& FFX_HALF == 1 + // Half precision version (slower). + void FsrLfgaH(inout FfxFloat16x3 c, FfxFloat16x3 t, FfxFloat16 a) + { + c += (t * FFX_BROADCAST_FLOAT16X3(a)) * min(FFX_BROADCAST_FLOAT16X3(1.0) - c, c); + } + //------------------------------------------------------------------------------------------------------------------------------ + // Packed half precision version (faster). + void FsrLfgaHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 tR,FfxFloat16x2 tG,FfxFloat16x2 tB,FfxFloat16 a){ + cR+=(tR*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cR,cR);cG+=(tG*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cG,cG);cB+=(tB*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cB,cB);} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [SRTM] SIMPLE REVERSIBLE TONE-MAPPER +// +//------------------------------------------------------------------------------------------------------------------------------ +// This provides a way to take linear HDR color {0 to FP16_MAX} and convert it into a temporary {0 to 1} ranged post-tonemapped linear. +// The tonemapper preserves RGB ratio, which helps maintain HDR color bleed during filtering. +//------------------------------------------------------------------------------------------------------------------------------ +// Reversible tonemapper usage, +// FsrSrtm*(color); // {0 to FP16_MAX} converted to {0 to 1}. +// FsrSrtmInv*(color); // {0 to 1} converted into {0 to 32768, output peak safe for FP16}. +//============================================================================================================================== +#if defined(FFX_GPU) + void FsrSrtmF(inout FfxFloat32x3 c) + { + c *= ffxBroadcast3(ffxReciprocal(ffxMax3(c.r, c.g, c.b) + FfxFloat32(1.0))); + } + // The extra max solves the c=1.0 case (which is a /0). + void FsrSrtmInvF(inout FfxFloat32x3 c){c*=ffxBroadcast3(ffxReciprocal(max(FfxFloat32(1.0/32768.0),FfxFloat32(1.0)-ffxMax3(c.r,c.g,c.b))));} +#endif +//============================================================================================================================== +#if defined(FFX_GPU )&& FFX_HALF == 1 + void FsrSrtmH(inout FfxFloat16x3 c) + { + c *= FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(ffxMax3Half(c.r, c.g, c.b) + FFX_BROADCAST_FLOAT16(1.0))); + } + void FsrSrtmInvH(inout FfxFloat16x3 c) + { + c *= FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(max(FFX_BROADCAST_FLOAT16(1.0 / 32768.0), FFX_BROADCAST_FLOAT16(1.0) - ffxMax3Half(c.r, c.g, c.b)))); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrSrtmHx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB) + { + FfxFloat16x2 rcp = ffxReciprocalHalf(ffxMax3Half(cR, cG, cB) + FFX_BROADCAST_FLOAT16X2(1.0)); + cR *= rcp; + cG *= rcp; + cB *= rcp; + } + void FsrSrtmInvHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB) + { + FfxFloat16x2 rcp=ffxReciprocalHalf(max(FFX_BROADCAST_FLOAT16X2(1.0/32768.0),FFX_BROADCAST_FLOAT16X2(1.0)-ffxMax3Half(cR,cG,cB))); + cR*=rcp; + cG*=rcp; + cB*=rcp; + } +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [TEPD] TEMPORAL ENERGY PRESERVING DITHER +// +//------------------------------------------------------------------------------------------------------------------------------ +// Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion. +// Gamma 2.0 is used so that the conversion back to linear is just to square the color. +// The conversion comes in 8-bit and 10-bit modes, designed for output to 8-bit UNORM or 10:10:10:2 respectively. +// Given good non-biased temporal blue noise as dither input, +// the output dither will temporally conserve energy. +// This is done by choosing the linear nearest step point instead of perceptual nearest. +// See code below for details. +//------------------------------------------------------------------------------------------------------------------------------ +// DX SPEC RULES FOR FLOAT->UNORM 8-BIT CONVERSION +// =============================================== +// - Output is 'FfxUInt32(floor(saturate(n)*255.0+0.5))'. +// - Thus rounding is to nearest. +// - NaN gets converted to zero. +// - INF is clamped to {0.0 to 1.0}. +//============================================================================================================================== +#if defined(FFX_GPU) + // Hand tuned integer position to dither value, with more values than simple checkerboard. + // Only 32-bit has enough precision for this compddation. + // Output is {0 to <1}. + FfxFloat32 FsrTepdDitF(FfxUInt32x2 p, FfxUInt32 f) + { + FfxFloat32 x = FfxFloat32(p.x + f); + FfxFloat32 y = FfxFloat32(p.y); + // The 1.61803 golden ratio. + FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); + // Number designed to provide a good visual pattern. + FfxFloat32 b = FfxFloat32(1.0 / 3.69); + x = x * a + (y * b); + return ffxFract(x); + } + //------------------------------------------------------------------------------------------------------------------------------ + // This version is 8-bit gamma 2.0. + // The 'c' input is {0 to 1}. + // Output is {0 to 1} ready for image store. + void FsrTepdC8F(inout FfxFloat32x3 c, FfxFloat32 dit) + { + FfxFloat32x3 n = ffxSqrt(c); + n = floor(n * ffxBroadcast3(255.0)) * ffxBroadcast3(1.0 / 255.0); + FfxFloat32x3 a = n * n; + FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 255.0); + b = b * b; + // Ratio of 'a' to 'b' required to produce 'c'. + // ffxApproximateReciprocal() won't work here (at least for very high dynamic ranges). + // ffxApproximateReciprocalMedium() is an IADD,FMA,MUL. + FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b); + // Use the ratio as a cutoff to choose 'a' or 'b'. + // ffxIsGreaterThanZero() is a MUL. + c = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 255.0)); + } + //------------------------------------------------------------------------------------------------------------------------------ + // This version is 10-bit gamma 2.0. + // The 'c' input is {0 to 1}. + // Output is {0 to 1} ready for image store. + void FsrTepdC10F(inout FfxFloat32x3 c, FfxFloat32 dit) + { + FfxFloat32x3 n = ffxSqrt(c); + n = floor(n * ffxBroadcast3(1023.0)) * ffxBroadcast3(1.0 / 1023.0); + FfxFloat32x3 a = n * n; + FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 1023.0); + b = b * b; + FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b); + c = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 1023.0)); + } +#endif +//============================================================================================================================== +#if defined(FFX_GPU)&& FFX_HALF == 1 + FfxFloat16 FsrTepdDitH(FfxUInt32x2 p, FfxUInt32 f) + { + FfxFloat32 x = FfxFloat32(p.x + f); + FfxFloat32 y = FfxFloat32(p.y); + FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); + FfxFloat32 b = FfxFloat32(1.0 / 3.69); + x = x * a + (y * b); + return FfxFloat16(ffxFract(x)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC8H(inout FfxFloat16x3 c, FfxFloat16 dit) + { + FfxFloat16x3 n = sqrt(c); + n = floor(n * FFX_BROADCAST_FLOAT16X3(255.0)) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0); + FfxFloat16x3 a = n * n; + FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 255.0); + b = b * b; + FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b); + c = ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC10H(inout FfxFloat16x3 c, FfxFloat16 dit) + { + FfxFloat16x3 n = sqrt(c); + n = floor(n * FFX_BROADCAST_FLOAT16X3(1023.0)) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0); + FfxFloat16x3 a = n * n; + FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0); + b = b * b; + FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b); + c = ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0)); + } + //============================================================================================================================== + // This computes dither for positions 'p' and 'p+{8,0}'. + FfxFloat16x2 FsrTepdDitHx2(FfxUInt32x2 p, FfxUInt32 f) + { + FfxFloat32x2 x; + x.x = FfxFloat32(p.x + f); + x.y = x.x + FfxFloat32(8.0); + FfxFloat32 y = FfxFloat32(p.y); + FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); + FfxFloat32 b = FfxFloat32(1.0 / 3.69); + x = x * ffxBroadcast2(a) + ffxBroadcast2(y * b); + return FfxFloat16x2(ffxFract(x)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC8Hx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB, FfxFloat16x2 dit) + { + FfxFloat16x2 nR = sqrt(cR); + FfxFloat16x2 nG = sqrt(cG); + FfxFloat16x2 nB = sqrt(cB); + nR = floor(nR * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + nG = floor(nG * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + nB = floor(nB * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + FfxFloat16x2 aR = nR * nR; + FfxFloat16x2 aG = nG * nG; + FfxFloat16x2 aB = nB * nB; + FfxFloat16x2 bR = nR + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + bR = bR * bR; + FfxFloat16x2 bG = nG + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + bG = bG * bG; + FfxFloat16x2 bB = nB + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + bB = bB * bB; + FfxFloat16x2 rR = (cR - bR) * ffxApproximateReciprocalMediumHalf(aR - bR); + FfxFloat16x2 rG = (cG - bG) * ffxApproximateReciprocalMediumHalf(aG - bG); + FfxFloat16x2 rB = (cB - bB) * ffxApproximateReciprocalMediumHalf(aB - bB); + cR = ffxSaturate(nR + ffxIsGreaterThanZeroHalf(dit - rR) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0)); + cG = ffxSaturate(nG + ffxIsGreaterThanZeroHalf(dit - rG) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0)); + cB = ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC10Hx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 dit){ + FfxFloat16x2 nR=sqrt(cR); + FfxFloat16x2 nG=sqrt(cG); + FfxFloat16x2 nB=sqrt(cB); + nR=floor(nR*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0); + nG=floor(nG*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0); + nB=floor(nB*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0); + FfxFloat16x2 aR=nR*nR; + FfxFloat16x2 aG=nG*nG; + FfxFloat16x2 aB=nB*nB; + FfxFloat16x2 bR=nR+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bR=bR*bR; + FfxFloat16x2 bG=nG+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bG=bG*bG; + FfxFloat16x2 bB=nB+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bB=bB*bB; + FfxFloat16x2 rR=(cR-bR)*ffxApproximateReciprocalMediumHalf(aR-bR); + FfxFloat16x2 rG=(cG-bG)*ffxApproximateReciprocalMediumHalf(aG-bG); + FfxFloat16x2 rB=(cB-bB)*ffxApproximateReciprocalMediumHalf(aB-bB); + cR=ffxSaturate(nR+ffxIsGreaterThanZeroHalf(dit-rR)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0)); + cG=ffxSaturate(nG+ffxIsGreaterThanZeroHalf(dit-rG)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0)); + cB = ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 1023.0)); +} +#endif diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1.h.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1.h.meta new file mode 100644 index 0000000..268208a --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 26bcd1d17b5c164499ee1a76186f6003 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_callbacks_hlsl.h b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_callbacks_hlsl.h new file mode 100644 index 0000000..03e6093 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_callbacks_hlsl.h @@ -0,0 +1,257 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "ffx_fsr1_resources.h" + +#if defined(FFX_GPU) +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic push +#pragma dxc diagnostic ignored "-Wambig-lit-shift" +#endif //__hlsl_dx_compiler +#include "ffx_core.h" +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic pop +#endif //__hlsl_dx_compiler + +#ifndef FFX_PREFER_WAVE64 +#define FFX_PREFER_WAVE64 +#endif // #ifndef FFX_PREFER_WAVE64 + +#pragma warning(disable: 3205) // conversion from larger type to smaller + +#define DECLARE_SRV_REGISTER(regIndex) t##regIndex +#define DECLARE_UAV_REGISTER(regIndex) u##regIndex +#define DECLARE_CB_REGISTER(regIndex) b##regIndex +#define FFX_FSR1_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex)) +#define FFX_FSR1_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) +#define FFX_FSR1_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) + +#if defined(FSR1_BIND_CB_FSR1) + cbuffer cbFSR1 : FFX_FSR1_DECLARE_CB(FSR1_BIND_CB_FSR1) + { + FfxUInt32x4 const0; + FfxUInt32x4 const1; + FfxUInt32x4 const2; + FfxUInt32x4 const3; + FfxUInt32x4 sample; + #define FFX_FSR1_CONSTANT_BUFFER_1_SIZE 20 // Number of 32-bit values. This must be kept in sync with the cbFSR1 size. + }; +#else + #define const0 0 + #define const1 0 + #define const2 0 + #define const3 0 + #define sample 0 +#endif + +#if defined(FFX_GPU) +#define FFX_FSR1_ROOTSIG_STRINGIFY(p) FFX_FSR1_ROOTSIG_STR(p) +#define FFX_FSR1_ROOTSIG_STR(p) #p +#define FFX_FSR1_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR1_ROOTSIG_STRINGIFY(FFX_FSR1_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR1_ROOTSIG_STRINGIFY(FFX_FSR1_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "CBV(b0), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] + +#if defined(FFX_FSR1_EMBED_ROOTSIG) +#define FFX_FSR1_EMBED_ROOTSIG_CONTENT FFX_FSR1_ROOTSIG +#else +#define FFX_FSR1_EMBED_ROOTSIG_CONTENT +#endif // #if FFX_FSR1_EMBED_ROOTSIG +#endif // #if defined(FFX_GPU) + + +FfxUInt32x4 Const0() +{ + return const0; +} + +FfxUInt32x4 Const1() +{ + return const1; +} + +FfxUInt32x4 Const2() +{ + return const2; +} + +FfxUInt32x4 Const3() +{ + return const3; +} + +FfxUInt32x4 EASUSample() +{ + return sample; +} + +FfxUInt32x4 RCasSample() +{ + return sample; +} + +FfxUInt32x4 RCasConfig() +{ + return const0; +} + +SamplerState s_LinearClamp : register(s0); + + // SRVs + #if defined FSR1_BIND_SRV_INPUT_COLOR + Texture2D r_input_color : FFX_FSR1_DECLARE_SRV(FSR1_BIND_SRV_INPUT_COLOR); + #endif + #if defined FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR + Texture2D r_internal_upscaled_color : FFX_FSR1_DECLARE_SRV(FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR); + #endif + #if defined FSR1_BIND_SRV_UPSCALED_OUTPUT + Texture2D r_upscaled_output : FFX_FSR1_DECLARE_SRV(FSR1_BIND_SRV_UPSCALED_OUTPUT); + #endif + + // UAV declarations + #if defined FSR1_BIND_UAV_INPUT_COLOR + RWTexture2D rw_input_color : FFX_FSR1_DECLARE_UAV(FSR1_BIND_UAV_INPUT_COLOR); + #endif + #if defined FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR + RWTexture2D rw_internal_upscaled_color : FFX_FSR1_DECLARE_UAV(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR); + #endif + #if defined FSR1_BIND_UAV_UPSCALED_OUTPUT + RWTexture2D rw_upscaled_output : FFX_FSR1_DECLARE_UAV(FSR1_BIND_UAV_UPSCALED_OUTPUT); + #endif + +#if FFX_HALF + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat16x4 GatherEasuRed(FfxFloat32x2 fPxPos) + { + return (FfxFloat16x4)r_input_color.GatherRed(s_LinearClamp, fPxPos, FfxInt32x2(0,0)); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat16x4 GatherEasuGreen(FfxFloat32x2 fPxPos) + { + return (FfxFloat16x4)r_input_color.GatherGreen(s_LinearClamp, fPxPos, FfxInt32x2(0, 0)); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat16x4 GatherEasuBlue(FfxFloat32x2 fPxPos) + { + return (FfxFloat16x4)r_input_color.GatherBlue(s_LinearClamp, fPxPos, FfxInt32x2(0, 0)); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + +#if FFX_FSR1_OPTION_APPLY_RCAS + #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) + void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat16x3 fColor) + { + rw_internal_upscaled_color[iPxPos] = FfxFloat32x4(fColor, 1.f); + } + #endif // #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) +#else + #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat16x3 fColor) + { + rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f); + } + #endif // #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) +#endif // #if FFX_FSR1_OPTION_APPLY_RCAS + +#if defined(FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR) + FfxFloat16x4 LoadRCas_Input(FfxInt16x2 iPxPos) + { + return (FfxFloat16x4)r_internal_upscaled_color[iPxPos]; + } +#endif // defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) + +#if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + void StoreRCasOutputHx2(FfxInt16x2 iPxPos, FfxFloat16x2 fColorR, FfxFloat16x2 fColorG, FfxFloat16x2 fColorB, FfxFloat16x2 fColorA) + { + rw_upscaled_output[iPxPos] = FfxFloat32x4(fColorR.x, fColorG.x, fColorB.x, fColorA.x); + iPxPos.x += 8; + rw_upscaled_output[iPxPos] = FfxFloat32x4(fColorR.y, fColorG.y, fColorB.y, fColorA.y); + } +#endif // defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + +#else // FFX_HALF + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat32x4 GatherEasuRed(FfxFloat32x2 fPxPos) + { + return r_input_color.GatherRed(s_LinearClamp, fPxPos, FfxInt32x2(0, 0)); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat32x4 GatherEasuGreen(FfxFloat32x2 fPxPos) + { + return r_input_color.GatherGreen(s_LinearClamp, fPxPos, FfxInt32x2(0, 0)); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat32x4 GatherEasuBlue(FfxFloat32x2 fPxPos) + { + return r_input_color.GatherBlue(s_LinearClamp, fPxPos, FfxInt32x2(0, 0)); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + + +#if FFX_FSR1_OPTION_APPLY_RCAS + #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) + void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) + { + rw_internal_upscaled_color[iPxPos] = FfxFloat32x4(fColor, 1.f); + } + #endif // #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) +#else + #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) + { + rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f); + } + #endif // #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) +#endif // #if FFX_FSR1_OPTION_APPLY_RCAS + +#if defined(FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR) + FfxFloat32x4 LoadRCas_Input(FfxInt32x2 iPxPos) + { + return r_internal_upscaled_color[iPxPos]; + } +#endif // defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) + +#if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + void StoreRCasOutput(FfxInt32x2 iPxPos, FfxFloat32x4 fColor) + { + rw_upscaled_output[iPxPos] = fColor; + } +#endif // defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + +#endif // FFX_HALF + +#endif // #if defined(FFX_GPU) diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_callbacks_hlsl.h.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_callbacks_hlsl.h.meta new file mode 100644 index 0000000..bec9399 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_callbacks_hlsl.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: cb13a977b4e7b034a880c7c287c47240 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_easu.h b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_easu.h new file mode 100644 index 0000000..26f94c9 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_easu.h @@ -0,0 +1,98 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define GROUP_SIZE 8 +#define FSR_RCAS_DENOISE 1 + +#include "../ffx_core.h" + +#if FFX_HALF + + #define FFX_FSR_EASU_HALF 1 + FfxFloat16x4 FsrEasuRH(FfxFloat32x2 p) { return GatherEasuRed(p); } + FfxFloat16x4 FsrEasuGH(FfxFloat32x2 p) { return GatherEasuGreen(p); } + FfxFloat16x4 FsrEasuBH(FfxFloat32x2 p) { return GatherEasuBlue(p); } + +#else + + #define FFX_FSR_EASU_FLOAT 1 + FfxFloat32x4 FsrEasuRF(FfxFloat32x2 p) { return GatherEasuRed(p); } + FfxFloat32x4 FsrEasuGF(FfxFloat32x2 p) { return GatherEasuGreen(p); } + FfxFloat32x4 FsrEasuBF(FfxFloat32x2 p) { return GatherEasuBlue(p); } + +#endif // FFX_HALF + +#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + #define FSR_RCAS_PASSTHROUGH_ALPHA +#endif // FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + +#include "fsr1/ffx_fsr1.h" + +void CurrFilter(FfxUInt32x2 pos) +{ +#if FFX_HALF + + FfxFloat16x3 c; + FsrEasuH(c, pos, Const0(), Const1(), Const2(), Const3()); + if (EASUSample().x == 1) + { + c *= c; + } + +#if FFX_FSR1_OPTION_SRGB_CONVERSIONS + // Apply gamma if this is an sRGB format (auto-degamma'd on sampler read) + c = pow(c, FfxFloat16x3(1.0 / 2.2, 1.0 / 2.2, 1.0 / 2.2)); +#endif // FFX_FSR1_OPTION_SRGB_CONVERSIONS + + StoreEASUOutput(pos, c); + +#else + + FfxFloat32x3 c; + ffxFsrEasuFloat(c, pos, Const0(), Const1(), Const2(), Const3()); + if (EASUSample().x == 1) + { + c *= c; + } + +#if FFX_FSR1_OPTION_SRGB_CONVERSIONS + // Apply gamma if this is an sRGB format (auto-degamma'd on sampler read) + c = pow(c, FfxFloat32x3(1.f / 2.2f, 1.f / 2.2f, 1.f / 2.2f)); +#endif // FFX_FSR1_OPTION_SRGB_CONVERSIONS + + StoreEASUOutput(pos, c); + +#endif // FFX_HALF +} + +void EASU(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) +{ + // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. + FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u); + CurrFilter(gxy); + gxy.x += 8u; + CurrFilter(gxy); + gxy.y += 8u; + CurrFilter(gxy); + gxy.x -= 8u; + CurrFilter(gxy); +} diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_easu.h.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_easu.h.meta new file mode 100644 index 0000000..90d9720 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_easu.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 103b94b0da79ae4499eed56f12a629aa +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_rcas.h b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_rcas.h new file mode 100644 index 0000000..92a7161 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_rcas.h @@ -0,0 +1,118 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define GROUP_SIZE 8 +#define FSR_RCAS_DENOISE 1 + +#include "../ffx_core.h" + +#if FFX_HALF + #define FSR_RCAS_HX2 1 + FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p) + { + return LoadRCas_Input(p); + } + void FsrRcasInputHx2(inout FfxFloat16x2 r,inout FfxFloat16x2 g,inout FfxFloat16x2 b) {} +#else + #define FSR_RCAS_F 1 + FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) + { + return LoadRCas_Input(p); + } + void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) + { + } +#endif // FFX_HALF + +#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + #define FSR_RCAS_PASSTHROUGH_ALPHA +#endif // FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + +#include "fsr1/ffx_fsr1.h" + +void CurrFilter(FFX_MIN16_U2 pos) +{ +#if FFX_HALF + +#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + FfxFloat16x2 cr, cg, cb, ca; + FsrRcasHx2(cr, cg, cb, ca, pos, RCasConfig()); +#else + FfxFloat16x2 cr, cg, cb; + FsrRcasHx2(cr, cg, cb, pos, RCasConfig()); +#endif // FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + + if (RCasSample().x == 1) + { + cr *= cr; + cg *= cg; + cb *= cb; + } + +#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + StoreRCasOutputHx2(FfxInt16x2(pos), cr, cg, cb, ca); +#else + StoreRCasOutputHx2(FfxInt16x2(pos), cr, cg, cb, FfxFloat16x2(1.0, 1.0)); +#endif + +#else + +#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + FfxFloat32x4 c; + FsrRcasF(c.r, c.g, c.b, c.a, pos, RCasConfig()); +#else + FfxFloat32x3 c; + FsrRcasF(c.r, c.g, c.b, pos, RCasConfig()); +#endif // FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + if (RCasSample().x == 1) + { + c *= c; + } + +#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + StoreRCasOutput(FfxInt32x2(pos), c); +#else + StoreRCasOutput(FfxInt32x2(pos), FfxFloat32x4(c, 1.0)); +#endif + +#endif +} + +void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) +{ + // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. + FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u); +#if FFX_HALF + // packed version process left and right 8x8 tile, in total 16x8 region + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.y += 8u; + CurrFilter(FFX_MIN16_U2(gxy)); +#else + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.x += 8u; + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.y += 8u; + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.x -= 8u; + CurrFilter(FFX_MIN16_U2(gxy)); +#endif +} diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_rcas.h.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_rcas.h.meta new file mode 100644 index 0000000..b6cda13 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_rcas.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 758ff261fb9bc2f42b55fef5d20633d0 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_resources.h b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_resources.h new file mode 100644 index 0000000..eb74d74 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_resources.h @@ -0,0 +1,38 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR1_RESOURCES_H +#define FFX_FSR1_RESOURCES_H + +#if defined(FFX_CPU) || defined(FFX_GPU) +#define FFX_FSR1_RESOURCE_IDENTIFIER_NULL 0 +#define FFX_FSR1_RESOURCE_IDENTIFIER_INPUT_COLOR 1 +#define FFX_FSR1_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 2 +#define FFX_FSR1_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 3 + +#define FFX_FSR1_RESOURCE_IDENTIFIER_COUNT 4 + +#define FFX_FSR1_CONSTANTBUFFER_IDENTIFIER_FSR1 0 + +#endif // #if defined(FFX_CPU) || defined(FFX_GPU) + +#endif //!defined( FFX_FSR1_RESOURCES_H ) diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_resources.h.meta b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_resources.h.meta new file mode 100644 index 0000000..a5234e3 --- /dev/null +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/FFX/fsr1/ffx_fsr1_resources.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 0d5909ec74b2cfa488ecd81feb780bf6 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/UniversalRenderPipeline.cs b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/UniversalRenderPipeline.cs index ce3f8e7..239a8e1 100644 --- a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/UniversalRenderPipeline.cs +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/UniversalRenderPipeline.cs @@ -140,7 +140,7 @@ namespace UnityEngine.Rendering.Universal { get { -#if !UNITY_2022_3_51 +#if !UNITY_2022_3_51 && !UNITY_2022_3_38 bool isMaxVisibleLights16 = GraphicsSettings.HasShaderDefine(BuiltinShaderDefine.SHADER_API_MAX_VISIBLE_LIGHTS_16); if (isMaxVisibleLights16) return k_MaxVisibleAdditionalLightsMobileShaderLevelLessThan45; diff --git a/Packages/packages-lock.json b/Packages/packages-lock.json index b61e7ef..290d304 100644 --- a/Packages/packages-lock.json +++ b/Packages/packages-lock.json @@ -1,7 +1,7 @@ { "dependencies": { "com.unity.burst": { - "version": "1.8.18", + "version": "1.8.16", "depth": 1, "source": "registry", "dependencies": { diff --git a/ProjectSettings/PackageManagerSettings.asset b/ProjectSettings/PackageManagerSettings.asset index bea66e2..086939c 100644 --- a/ProjectSettings/PackageManagerSettings.asset +++ b/ProjectSettings/PackageManagerSettings.asset @@ -1,5 +1,5 @@ %YAML 1.1 -%TAG !u! tag:yousandi.cn,2023: +%TAG !u! tag:unity3d.com,2011: --- !u!114 &1 MonoBehaviour: m_ObjectHideFlags: 61 @@ -21,7 +21,7 @@ MonoBehaviour: m_Registries: - m_Id: main m_Name: - m_Url: https://packages.tuanjie.cn + m_Url: https://packages.unity.cn m_Scopes: [] m_IsDefault: 1 m_Capabilities: 7 @@ -31,6 +31,6 @@ MonoBehaviour: m_RegistryInfoDraft: m_Modified: 0 m_ErrorMessage: - m_UserModificationsInstanceId: -854 - m_OriginalInstanceId: -856 + m_UserModificationsInstanceId: -838 + m_OriginalInstanceId: -840 m_LoadAssets: 0 diff --git a/ProjectSettings/ProjectVersion.txt b/ProjectSettings/ProjectVersion.txt index feff13a..f3fb7ab 100644 --- a/ProjectSettings/ProjectVersion.txt +++ b/ProjectSettings/ProjectVersion.txt @@ -1,3 +1,2 @@ -m_EditorVersion: 2022.3.48t2 -m_EditorVersionWithRevision: 2022.3.48t2 (635b9f68393f) -m_TuanjieEditorVersion: 1.3.5 +m_EditorVersion: 2022.3.38f1c1 +m_EditorVersionWithRevision: 2022.3.38f1c1 (b17906c7b2b6) diff --git a/ProjectSettings/ShaderGraphSettings.asset b/ProjectSettings/ShaderGraphSettings.asset index 8ad2e55..3250b06 100644 --- a/ProjectSettings/ShaderGraphSettings.asset +++ b/ProjectSettings/ShaderGraphSettings.asset @@ -1,5 +1,5 @@ %YAML 1.1 -%TAG !u! tag:yousandi.cn,2023: +%TAG !u! tag:unity3d.com,2011: --- !u!114 &1 MonoBehaviour: m_ObjectHideFlags: 61 diff --git a/UserSettings/EditorUserSettings.asset b/UserSettings/EditorUserSettings.asset index f62c9b3..e01ff9f 100644 --- a/UserSettings/EditorUserSettings.asset +++ b/UserSettings/EditorUserSettings.asset @@ -12,13 +12,13 @@ EditorUserSettings: value: 01060c075556080e0956597315775b14104e4179282b25662b2c1c31b7b63068 flags: 0 RecentlyUsedSceneGuid-2: - value: 5b00515154055e035459547042705a44154f1e2c2e2e7f677e7d4567e1b9376d + value: 57050d5004060d590f56087046700f4445164c7e7a7824312e7c4e6bb0e1376d flags: 0 RecentlyUsedSceneGuid-3: - value: 5a57555407545f0b5e0f5f764277594914161e2b787977692b714e64e4b2363b + value: 5b00515154055e035459547042705a44154f1e2c2e2e7f677e7d4567e1b9376d flags: 0 RecentlyUsedSceneGuid-4: - value: 57050d5004060d590f56087046700f4445164c7e7a7824312e7c4e6bb0e1376d + value: 5a57555407545f0b5e0f5f764277594914161e2b787977692b714e64e4b2363b flags: 0 vcSharedLogLevel: value: 0d5e400f0650