diff --git a/Assets/AEG FSR.meta b/Assets/AEG FSR.meta deleted file mode 100644 index 96e1a0e..0000000 --- a/Assets/AEG FSR.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: 9b9cfa7bca6ef7b49aabae089d9f28f3 -folderAsset: yes -DefaultImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Editor.meta b/Assets/AEG FSR/Editor.meta deleted file mode 100644 index e37e9a3..0000000 --- a/Assets/AEG FSR/Editor.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: f6231cfcf34ddbe47adc5be9ccd200cb -folderAsset: yes -DefaultImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Editor/FSR3_Editor.cs b/Assets/AEG FSR/Editor/FSR3_Editor.cs deleted file mode 100644 index da95a35..0000000 --- a/Assets/AEG FSR/Editor/FSR3_Editor.cs +++ /dev/null @@ -1,118 +0,0 @@ -using UnityEngine; -using UnityEditor; - -namespace AEG.FSR -{ - [CustomEditor(typeof(FSR3_BASE), editorForChildClasses: true)] - public class FSR3_Editor : Editor - { - public override void OnInspectorGUI() { - FSR3_BASE fsrScript = target as FSR3_BASE; - - EditorGUI.BeginChangeCheck(); - - EditorGUILayout.LabelField("FSR Settings", EditorStyles.boldLabel); - FSR_Quality fsrQuality = (FSR_Quality)EditorGUILayout.EnumPopup(Styles.qualityText, fsrScript.FSRQuality); - float AntiGhosting = EditorGUILayout.Slider(Styles.antiGhosting, fsrScript.antiGhosting, 0.0f, 1.0f); - - bool sharpening = EditorGUILayout.Toggle(Styles.sharpeningText, fsrScript.sharpening); - float sharpness = fsrScript.sharpness; - if(fsrScript.sharpening) { - EditorGUI.indentLevel++; - sharpness = EditorGUILayout.Slider(Styles.sharpnessText, fsrScript.sharpness, 0.0f, 1.0f); - EditorGUI.indentLevel--; - } - - EditorGUILayout.Space(); - - EditorGUILayout.LabelField("Transparency Settings", EditorStyles.boldLabel); - bool generateReactiveMask = EditorGUILayout.Toggle(Styles.reactiveMaskText, fsrScript.generateReactiveMask); - - float autoReactiveScale = fsrScript.autoReactiveScale; - float autoReactiveThreshold = fsrScript.autoReactiveThreshold; - float autoReactiveBinaryValue = fsrScript.autoReactiveBinaryValue; - - bool generateTCMask = false; - - float autoTcThreshold = fsrScript.autoTcThreshold; - float autoTcScale = fsrScript.autoTcScale; - float autoTcReactiveScale = fsrScript.autoTcReactiveScale; - float autoTcReactiveMax = fsrScript.autoTcReactiveMax; - - if(fsrScript.generateReactiveMask) { - EditorGUI.indentLevel++; - autoReactiveScale = EditorGUILayout.Slider(Styles.reactiveScaleText, fsrScript.autoReactiveScale, 0.0f, 1.0f); - autoReactiveThreshold = EditorGUILayout.Slider(Styles.reactiveThresholdText, fsrScript.autoReactiveThreshold, 0.0f, 1.0f); - autoReactiveBinaryValue = EditorGUILayout.Slider(Styles.reactiveBinaryValueText, fsrScript.autoReactiveBinaryValue, 0.0f, 1.0f); - EditorGUI.indentLevel--; - } - - - EditorGUILayout.Space(); - -#if UNITY_BIRP - EditorGUILayout.LabelField("MipMap Settings", EditorStyles.boldLabel); - bool autoTextureUpdate = EditorGUILayout.Toggle(Styles.autoTextureUpdateText, fsrScript.autoTextureUpdate); - float mipMapUpdateFrequency = fsrScript.mipMapUpdateFrequency; - if(fsrScript.autoTextureUpdate) { - EditorGUI.indentLevel++; - mipMapUpdateFrequency = EditorGUILayout.FloatField(Styles.autoUpdateFrequencyText, fsrScript.mipMapUpdateFrequency); - EditorGUI.indentLevel--; - } - float mipmapBiasOverride = EditorGUILayout.Slider(Styles.mipmapBiasText, fsrScript.mipmapBiasOverride, 0.0f, 1.0f); -#endif - if(EditorGUI.EndChangeCheck()) { - EditorUtility.SetDirty(fsrScript); - - Undo.RecordObject(target, "Changed Area Of Effect"); - fsrScript.FSRQuality = fsrQuality; - fsrScript.antiGhosting = AntiGhosting; - fsrScript.sharpening = sharpening; - fsrScript.sharpness = sharpness; - - fsrScript.generateReactiveMask = generateReactiveMask; - fsrScript.autoReactiveThreshold = autoReactiveThreshold; - fsrScript.autoReactiveScale = autoReactiveScale; - fsrScript.autoReactiveBinaryValue = autoReactiveBinaryValue; - - fsrScript.generateTCMask = generateTCMask; - fsrScript.autoTcThreshold = autoTcThreshold; - fsrScript.autoTcScale = autoTcScale; - fsrScript.autoTcReactiveScale = autoTcReactiveScale; - fsrScript.autoTcReactiveMax = autoTcReactiveMax; - -#if UNITY_BIRP - fsrScript.autoTextureUpdate = autoTextureUpdate; - fsrScript.mipMapUpdateFrequency = mipMapUpdateFrequency; - fsrScript.mipmapBiasOverride = mipmapBiasOverride; -#endif - } - } - - private static class Styles - { - public static GUIContent qualityText = new GUIContent("Quality", "Quality 1.5, Balanced 1.7, Performance 2, Ultra Performance 3"); - public static GUIContent antiGhosting = new GUIContent("Anti Ghosting", "The Anti Ghosting value between 0 and 1, where 0 is no Anti Ghosting and 1 is the maximum amount."); - public static GUIContent sharpeningText = new GUIContent("Sharpening", "Enable an additional (RCAS) sharpening pass in the fsr algorithm."); - public static GUIContent sharpnessText = new GUIContent("Sharpness", "The sharpness value between 0 and 1, where 0 is no additional sharpness and 1 is maximum additional sharpness."); - public static GUIContent hdrText = new GUIContent("HDR", "Instructs FSR to use HDR in the algorithm, for better quality."); - public static GUIContent reactiveMaskText = new GUIContent("Reactive Mask", ""); - public static GUIContent reactiveThresholdText = new GUIContent("Reactive Threshold", "Setting this value too small will cause visual instability. Larger values can cause ghosting. Recommended default value is 0.9f."); - public static GUIContent reactiveScaleText = new GUIContent("Reactive Scale", "Larger values result in more reactive pixels. Recommended default value is 0.2f"); - public static GUIContent reactiveBinaryValueText = new GUIContent("Reactive Binary Value", "Recommended default value is 0.5f."); - - - public static GUIContent tcMaskText = new GUIContent("Transparency and Composition Mask", ""); - - public static GUIContent autoTcThresholdText = new GUIContent("T&C Threshold", "Setting this value too small will cause visual instability. Larger values can cause ghosting. Recommended default value is 0.05f."); - public static GUIContent autotcScaleText = new GUIContent("T&C Scale", "Smaller values will increase stability at hard edges of translucent objects. Recommended default value is 1.0f."); - public static GUIContent autoTcReactiveScaleText = new GUIContent("T&C Reactive Scale", "Maximum value reactivity can reach. Recommended default value is 5.0f."); - public static GUIContent autoTcReactiveMaxText = new GUIContent("T&C Max", "Maximum value reactivity can reach. Recommended default value is 0.9f."); - - public static GUIContent mipmapBiasText = new GUIContent("Mipmap Bias Override", "An extra mipmap bias override for if AMD's recommended MipMap Bias values give artifacts"); - public static GUIContent autoTextureUpdateText = new GUIContent("Auto Texture Update", "Wether the mipmap biases of all textures in the scene should automatically be updated"); - public static GUIContent autoUpdateFrequencyText = new GUIContent("Update Frequency", "Interval in seconds in which the mipmap biases should be updated"); - public static GUIContent debugText = new GUIContent("Debug", "Enables debugging in the FSR algorithm, which can help catch certain errors."); - } - } -} diff --git a/Assets/AEG FSR/Editor/FSR3_Editor.cs.meta b/Assets/AEG FSR/Editor/FSR3_Editor.cs.meta deleted file mode 100644 index cf55868..0000000 --- a/Assets/AEG FSR/Editor/FSR3_Editor.cs.meta +++ /dev/null @@ -1,11 +0,0 @@ -fileFormatVersion: 2 -guid: f9c2618bb9b8fae4db30b591f0ea73bb -MonoImporter: - externalObjects: {} - serializedVersion: 2 - defaultReferences: [] - executionOrder: 0 - icon: {instanceID: 0} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Editor/PipelineDefines.cs b/Assets/AEG FSR/Editor/PipelineDefines.cs deleted file mode 100644 index a999636..0000000 --- a/Assets/AEG FSR/Editor/PipelineDefines.cs +++ /dev/null @@ -1,104 +0,0 @@ -using System.Collections.Generic; -using System.Linq; - -using UnityEditor; -using UnityEngine.Rendering; - -namespace AEG.FSR -{ - [InitializeOnLoad] - public class PipelineDefines - { - enum PipelineType - { - Unsupported, - BIRP, - URP, - HDRP - } - - static PipelineDefines() { - UpdateDefines(); - } - - /// - /// Update the unity pipeline defines for URP - /// - static void UpdateDefines() { - var pipeline = GetPipeline(); - - if(pipeline == PipelineType.URP) { - AddDefine("UNITY_URP"); - } else { - RemoveDefine("UNITY_URP"); - } - if(pipeline == PipelineType.HDRP) { - AddDefine("UNITY_HDRP"); - } else { - RemoveDefine("UNITY_HDRP"); - } - if(pipeline == PipelineType.BIRP) { - AddDefine("UNITY_BIRP"); - } else { - RemoveDefine("UNITY_BIRP"); - } - - AddDefine("AEG_FSR1"); - AddDefine("AEG_FSR3"); - } - - static PipelineType GetPipeline() { -#if UNITY_2019_1_OR_NEWER - if(GraphicsSettings.renderPipelineAsset != null) { - var srpType = GraphicsSettings.renderPipelineAsset.GetType().ToString(); - //HDRP - if(srpType.Contains("HDRenderPipelineAsset")) { - return PipelineType.HDRP; - } - //URP - else if(srpType.Contains("UniversalRenderPipelineAsset") || srpType.Contains("LightweightRenderPipelineAsset")) { - return PipelineType.URP; - } else - return PipelineType.Unsupported; - } -#elif UNITY_2017_1_OR_NEWER - if (GraphicsSettings.renderPipelineAsset != null) { - // SRP not supported before 2019 - return PipelineType.Unsupported; - } -#endif - //BIRP - return PipelineType.BIRP; - } - - static void AddDefine(string define) { - var definesList = GetDefines(); - if(!definesList.Contains(define)) { - definesList.Add(define); - SetDefines(definesList); - } - } - - public static void RemoveDefine(string define) { - var definesList = GetDefines(); - if(definesList.Contains(define)) { - definesList.Remove(define); - SetDefines(definesList); - } - } - - public static List GetDefines() { - var target = EditorUserBuildSettings.activeBuildTarget; - var buildTargetGroup = BuildPipeline.GetBuildTargetGroup(target); - var defines = PlayerSettings.GetScriptingDefineSymbolsForGroup(buildTargetGroup); - return defines.Split(';').ToList(); - } - - public static void SetDefines(List definesList) { - var target = EditorUserBuildSettings.activeBuildTarget; - var buildTargetGroup = BuildPipeline.GetBuildTargetGroup(target); - var defines = string.Join(";", definesList.ToArray()); - PlayerSettings.SetScriptingDefineSymbolsForGroup(buildTargetGroup, defines); - } - } -} \ No newline at end of file diff --git a/Assets/AEG FSR/Editor/PipelineDefines.cs.meta b/Assets/AEG FSR/Editor/PipelineDefines.cs.meta deleted file mode 100644 index a22bbec..0000000 --- a/Assets/AEG FSR/Editor/PipelineDefines.cs.meta +++ /dev/null @@ -1,11 +0,0 @@ -fileFormatVersion: 2 -guid: 4ac315fb3a6afc24e80d9d5ecd91c635 -MonoImporter: - externalObjects: {} - serializedVersion: 2 - defaultReferences: [] - executionOrder: 0 - icon: {instanceID: 0} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Editor/com.alteregogames.aeg-fsr.Editor.asmdef b/Assets/AEG FSR/Editor/com.alteregogames.aeg-fsr.Editor.asmdef deleted file mode 100644 index f967122..0000000 --- a/Assets/AEG FSR/Editor/com.alteregogames.aeg-fsr.Editor.asmdef +++ /dev/null @@ -1,18 +0,0 @@ -{ - "name": "com.alteregogames.aeg-fsr.Editor", - "rootNamespace": "AEG.FSR", - "references": [ - "com.alteregogames.aeg-fsr.Runtime" - ], - "includePlatforms": [ - "Editor" - ], - "excludePlatforms": [], - "allowUnsafeCode": false, - "overrideReferences": false, - "precompiledReferences": [], - "autoReferenced": true, - "defineConstraints": [], - "versionDefines": [], - "noEngineReferences": false -} \ No newline at end of file diff --git a/Assets/AEG FSR/Editor/com.alteregogames.aeg-fsr.Editor.asmdef.meta b/Assets/AEG FSR/Editor/com.alteregogames.aeg-fsr.Editor.asmdef.meta deleted file mode 100644 index 0ead3c5..0000000 --- a/Assets/AEG FSR/Editor/com.alteregogames.aeg-fsr.Editor.asmdef.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 1fe6fe8bb843c0c4eb419e45ed5e1f7c -AssemblyDefinitionImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/LICENSE.md b/Assets/AEG FSR/LICENSE.md deleted file mode 100644 index d47a00b..0000000 --- a/Assets/AEG FSR/LICENSE.md +++ /dev/null @@ -1 +0,0 @@ -All copyright lies with Alterego Games 2023. Do not use, copy, change without permission. \ No newline at end of file diff --git a/Assets/AEG FSR/LICENSE.md.meta b/Assets/AEG FSR/LICENSE.md.meta deleted file mode 100644 index a7a0076..0000000 --- a/Assets/AEG FSR/LICENSE.md.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 484ad65439636d047ba3c2b9a13b7361 -TextScriptImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Offline Documentation.pdf b/Assets/AEG FSR/Offline Documentation.pdf deleted file mode 100644 index afaa512..0000000 Binary files a/Assets/AEG FSR/Offline Documentation.pdf and /dev/null differ diff --git a/Assets/AEG FSR/Offline Documentation.pdf.meta b/Assets/AEG FSR/Offline Documentation.pdf.meta deleted file mode 100644 index 7f0cefe..0000000 --- a/Assets/AEG FSR/Offline Documentation.pdf.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: dc51fef46d95fca419783de0303e5df1 -DefaultImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Online Documentation.txt b/Assets/AEG FSR/Online Documentation.txt deleted file mode 100644 index 3e061a1..0000000 --- a/Assets/AEG FSR/Online Documentation.txt +++ /dev/null @@ -1 +0,0 @@ -https://docs.google.com/document/d/1vkKyCwL6TKdPUb2rWRMw7B3tEYyl_ZH9uyFqJqrQTwg \ No newline at end of file diff --git a/Assets/AEG FSR/Online Documentation.txt.meta b/Assets/AEG FSR/Online Documentation.txt.meta deleted file mode 100644 index b6265c9..0000000 --- a/Assets/AEG FSR/Online Documentation.txt.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 87fc18c5e13ffdb4880068d8047d3864 -TextScriptImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime.meta b/Assets/AEG FSR/Runtime.meta deleted file mode 100644 index 13e8d51..0000000 --- a/Assets/AEG FSR/Runtime.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: d8bedc0a77979514d97f1037e3343857 -folderAsset: yes -DefaultImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/BIRP.meta b/Assets/AEG FSR/Runtime/BIRP.meta deleted file mode 100644 index 733578c..0000000 --- a/Assets/AEG FSR/Runtime/BIRP.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: 443ab06e35b1e8b41956c8baab3a0eab -folderAsset: yes -DefaultImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/BIRP/FSR3_BIRP.cs b/Assets/AEG FSR/Runtime/BIRP/FSR3_BIRP.cs deleted file mode 100644 index 9e4339e..0000000 --- a/Assets/AEG FSR/Runtime/BIRP/FSR3_BIRP.cs +++ /dev/null @@ -1,390 +0,0 @@ -#if UNITY_BIRP -using System; -using UnityEngine; -using UnityEngine.Rendering; -using System.Collections; -using FidelityFX; -#if UNITY_EDITOR -using UnityEditor; -#endif -namespace AEG.FSR -{ - /// - /// FSR implementation for the Built-in RenderRipeline - /// - public class FSR3_BIRP : FSR3_BASE - { - // Commandbuffers - private CommandBuffer m_colorGrabPass; - private CommandBuffer m_fsrComputePass; - private CommandBuffer m_opaqueOnlyGrabPass; - private CommandBuffer m_afterOpaqueOnlyGrabPass; - private CommandBuffer m_blitToCamPass; - - // Rendertextures - private RenderTexture m_opaqueOnlyColorBuffer; - private RenderTexture m_afterOpaqueOnlyColorBuffer; - private RenderTexture m_reactiveMaskOutput; - private RenderTexture m_colorBuffer; - private RenderTexture m_fsrOutput; - - // Commandbuffer events - private const CameraEvent m_OPAQUE_ONLY_EVENT = CameraEvent.BeforeForwardAlpha; - private const CameraEvent m_AFTER_OPAQUE_ONLY_EVENT = CameraEvent.AfterForwardAlpha; - private const CameraEvent m_COLOR_EVENT = CameraEvent.BeforeImageEffects; - private const CameraEvent m_FSR_EVENT = CameraEvent.BeforeImageEffects; - private const CameraEvent m_BlITFSR_EVENT = CameraEvent.AfterImageEffects; - - private Matrix4x4 m_jitterMatrix; - private Matrix4x4 m_projectionMatrix; - - private readonly Fsr3.DispatchDescription m_dispatchDescription = new Fsr3.DispatchDescription(); - private readonly Fsr3.GenerateReactiveDescription m_genReactiveDescription = new Fsr3.GenerateReactiveDescription(); - private IFsr3Callbacks Callbacks { get; set; } = new Fsr3CallbacksBase(); - private Fsr3Context m_context; - - private bool initFirstFrame = false; - /// - /// - /// - protected override void InitializeFSR() { - base.InitializeFSR(); - m_mainCamera.depthTextureMode = DepthTextureMode.Depth | DepthTextureMode.MotionVectors; - - m_colorGrabPass = new CommandBuffer { name = "AMD FSR: Color Grab Pass" }; - m_opaqueOnlyGrabPass = new CommandBuffer { name = "AMD FSR: Opaque Only Grab Pass" }; - m_afterOpaqueOnlyGrabPass = new CommandBuffer { name = "AMD FSR: After Opaque Only Grab Pass" }; - m_fsrComputePass = new CommandBuffer { name = "AMD FSR: Compute Pass" }; - m_blitToCamPass = new CommandBuffer { name = "AMD FSR: Blit to Camera" }; - - SendMessage("RemovePPV2CommandBuffers", SendMessageOptions.DontRequireReceiver); - SetupResolution(); - - if(!m_fsrInitialized) { - Camera.onPreRender += OnPreRenderCamera; - Camera.onPostRender += OnPostRenderCamera; - } - - } - - /// - /// Sets up the buffers, initializes the fsr context, and sets up the command buffer - /// Must be recalled whenever the display resolution changes - /// - private void SetupCommandBuffer() { - ClearCommandBufferCoroutine(); - - if(m_colorBuffer) { - if(m_opaqueOnlyColorBuffer) { - m_opaqueOnlyColorBuffer.Release(); - m_afterOpaqueOnlyColorBuffer.Release(); - m_reactiveMaskOutput.Release(); - } - m_colorBuffer.Release(); - m_fsrOutput.Release(); - } - - m_renderWidth = (int)(m_displayWidth / m_scaleFactor); - m_renderHeight = (int)(m_displayHeight / m_scaleFactor); - - m_colorBuffer = new RenderTexture(m_renderWidth, m_renderHeight, 0, RenderTextureFormat.Default); - m_colorBuffer.Create(); - m_fsrOutput = new RenderTexture(m_displayWidth, m_displayHeight, 0, m_mainCamera.allowHDR ? RenderTextureFormat.DefaultHDR : RenderTextureFormat.Default); - m_fsrOutput.enableRandomWrite = true; - m_fsrOutput.Create(); - - - m_dispatchDescription.InputResourceSize = new Vector2Int(m_renderWidth, m_renderHeight); - m_dispatchDescription.Color = m_colorBuffer; - if(m_mainCamera.actualRenderingPath == RenderingPath.Forward) { - m_dispatchDescription.Depth = BuiltinRenderTextureType.Depth; - } else { - m_dispatchDescription.Depth = BuiltinRenderTextureType.ResolvedDepth; - } - m_dispatchDescription.MotionVectors = BuiltinRenderTextureType.MotionVectors; - m_dispatchDescription.Output = m_fsrOutput; - - if(generateReactiveMask) { - m_opaqueOnlyColorBuffer = new RenderTexture(m_colorBuffer); - m_opaqueOnlyColorBuffer.Create(); - m_afterOpaqueOnlyColorBuffer = new RenderTexture(m_colorBuffer); - m_afterOpaqueOnlyColorBuffer.Create(); - m_reactiveMaskOutput = new RenderTexture(m_colorBuffer); - m_reactiveMaskOutput.enableRandomWrite = true; - m_reactiveMaskOutput.Create(); - - m_genReactiveDescription.ColorOpaqueOnly = m_opaqueOnlyColorBuffer; - m_genReactiveDescription.ColorPreUpscale = m_afterOpaqueOnlyColorBuffer; - m_genReactiveDescription.OutReactive = m_reactiveMaskOutput; - m_dispatchDescription.Reactive = m_reactiveMaskOutput; - } else { - m_genReactiveDescription.ColorOpaqueOnly = null; - m_genReactiveDescription.ColorPreUpscale = null; - m_genReactiveDescription.OutReactive = null; - m_dispatchDescription.Reactive = null; - } - //Experimental! (disabled) - if(generateTCMask) { - if(generateReactiveMask) { - m_dispatchDescription.ColorOpaqueOnly = m_reactiveMaskOutput; - } else { - m_dispatchDescription.ColorOpaqueOnly = m_opaqueOnlyColorBuffer; - } - } else { - m_dispatchDescription.ColorOpaqueOnly = null; - } - - if(m_fsrComputePass != null) { - m_mainCamera.RemoveCommandBuffer(m_COLOR_EVENT, m_colorGrabPass); - m_mainCamera.RemoveCommandBuffer(m_FSR_EVENT, m_fsrComputePass); - m_mainCamera.RemoveCommandBuffer(m_BlITFSR_EVENT, m_blitToCamPass); - - if(m_opaqueOnlyGrabPass != null) { - m_mainCamera.RemoveCommandBuffer(m_OPAQUE_ONLY_EVENT, m_opaqueOnlyGrabPass); - m_mainCamera.RemoveCommandBuffer(m_AFTER_OPAQUE_ONLY_EVENT, m_afterOpaqueOnlyGrabPass); - } - } - - m_colorGrabPass.Clear(); - m_fsrComputePass.Clear(); - m_blitToCamPass.Clear(); - - m_colorGrabPass.Blit(BuiltinRenderTextureType.CameraTarget, m_colorBuffer); - - if(generateReactiveMask) { - m_opaqueOnlyGrabPass.Clear(); - m_opaqueOnlyGrabPass.Blit(BuiltinRenderTextureType.CameraTarget, m_opaqueOnlyColorBuffer); - - m_afterOpaqueOnlyGrabPass.Clear(); - m_afterOpaqueOnlyGrabPass.Blit(BuiltinRenderTextureType.CameraTarget, m_afterOpaqueOnlyColorBuffer); - } - - m_blitToCamPass.Blit(m_fsrOutput, BuiltinRenderTextureType.None); - - SendMessage("OverridePPV2TargetTexture", m_colorBuffer, SendMessageOptions.DontRequireReceiver); - buildCommandBuffers = StartCoroutine(BuildCommandBuffer()); - } - - /// - /// Built-in has no way to properly order commandbuffers, so we have to add them in the order we want ourselves. - /// - private Coroutine buildCommandBuffers; - private IEnumerator BuildCommandBuffer() { - SendMessage("RemovePPV2CommandBuffers", SendMessageOptions.DontRequireReceiver); - yield return null; - if(generateReactiveMask) { - if(m_opaqueOnlyGrabPass != null) { - m_mainCamera.AddCommandBuffer(m_OPAQUE_ONLY_EVENT, m_opaqueOnlyGrabPass); - m_mainCamera.AddCommandBuffer(m_AFTER_OPAQUE_ONLY_EVENT, m_afterOpaqueOnlyGrabPass); - } - } - yield return null; - SendMessage("AddPPV2CommandBuffer", SendMessageOptions.DontRequireReceiver); - yield return null; - if(m_fsrComputePass != null) { - m_mainCamera.AddCommandBuffer(m_COLOR_EVENT, m_colorGrabPass); - m_mainCamera.AddCommandBuffer(m_FSR_EVENT, m_fsrComputePass); - m_mainCamera.AddCommandBuffer(m_BlITFSR_EVENT, m_blitToCamPass); - } - - buildCommandBuffers = null; - } - - private void ClearCommandBufferCoroutine() { - if(buildCommandBuffers != null) { - StopCoroutine(buildCommandBuffers); - } - } - - private void OnPreRenderCamera(Camera camera) { - if(camera != m_mainCamera) { - return; - } - - // Set up the parameters to auto-generate a reactive mask - if(generateReactiveMask) { - m_genReactiveDescription.RenderSize = new Vector2Int(m_renderWidth, m_renderHeight); - m_genReactiveDescription.Scale = autoReactiveScale; - m_genReactiveDescription.CutoffThreshold = autoReactiveThreshold; - m_genReactiveDescription.BinaryValue = autoReactiveBinaryValue; - m_genReactiveDescription.Flags = reactiveFlags; - } - - m_dispatchDescription.Exposure = null; - m_dispatchDescription.PreExposure = 1; - m_dispatchDescription.EnableSharpening = sharpening; - m_dispatchDescription.Sharpness = sharpness; - m_dispatchDescription.MotionVectorScale.x = -m_renderWidth; - m_dispatchDescription.MotionVectorScale.y = -m_renderHeight; - m_dispatchDescription.RenderSize = new Vector2Int(m_renderWidth, m_renderHeight); - m_dispatchDescription.FrameTimeDelta = Time.deltaTime; - m_dispatchDescription.CameraNear = m_mainCamera.nearClipPlane; - m_dispatchDescription.CameraFar = m_mainCamera.farClipPlane; - m_dispatchDescription.CameraFovAngleVertical = m_mainCamera.fieldOfView * Mathf.Deg2Rad; - m_dispatchDescription.ViewSpaceToMetersFactor = 1.0f; - m_dispatchDescription.Reset = m_resetCamera; - - //Experimental! (disabled) - m_dispatchDescription.EnableAutoReactive = generateTCMask; - m_dispatchDescription.AutoTcThreshold = autoTcThreshold; - m_dispatchDescription.AutoTcScale = autoTcScale; - m_dispatchDescription.AutoReactiveScale = autoReactiveScale; - m_dispatchDescription.AutoReactiveMax = autoTcReactiveMax; - - m_resetCamera = false; - - if(SystemInfo.usesReversedZBuffer) { - // Swap the near and far clip plane distances as FSR3 expects this when using inverted depth - (m_dispatchDescription.CameraNear, m_dispatchDescription.CameraFar) = (m_dispatchDescription.CameraFar, m_dispatchDescription.CameraNear); - } - - JitterTAA(); - - m_mainCamera.targetTexture = m_colorBuffer; - - //Check if display resolution has changed - if(m_displayWidth != Display.main.renderingWidth || m_displayHeight != Display.main.renderingHeight || m_previousHDR != m_mainCamera.allowHDR) { - SetupResolution(); - } - - if(m_previousScaleFactor != m_scaleFactor || m_previousReactiveMask != generateReactiveMask || m_previousTCMask != generateTCMask || m_previousRenderingPath != m_mainCamera.actualRenderingPath || !initFirstFrame) { - initFirstFrame = true; - SetupFrameBuffers(); - } - UpdateDispatch(); - } - - - private void OnPostRenderCamera(Camera camera) { - if(camera != m_mainCamera) { - return; - } - - m_mainCamera.targetTexture = null; - - m_mainCamera.ResetProjectionMatrix(); - } - - /// - /// TAA Jitter - /// - private void JitterTAA() { - - int jitterPhaseCount = Fsr3.GetJitterPhaseCount(m_renderWidth, (int)(m_renderWidth * m_scaleFactor)); - - Fsr3.GetJitterOffset(out float jitterX, out float jitterY, Time.frameCount, jitterPhaseCount); - m_dispatchDescription.JitterOffset = new Vector2(jitterX, jitterY); - - jitterX = 2.0f * jitterX / (float)m_renderWidth; - jitterY = 2.0f * jitterY / (float)m_renderHeight; - - jitterX += UnityEngine.Random.Range(-0.001f * antiGhosting, 0.001f * antiGhosting); - jitterY += UnityEngine.Random.Range(-0.001f * antiGhosting, 0.001f * antiGhosting); - - m_jitterMatrix = Matrix4x4.Translate(new Vector2(jitterX, jitterY)); - m_projectionMatrix = m_mainCamera.projectionMatrix; - m_mainCamera.nonJitteredProjectionMatrix = m_projectionMatrix; - m_mainCamera.projectionMatrix = m_jitterMatrix * m_projectionMatrix; - m_mainCamera.useJitteredProjectionMatrixForTransparentRendering = true; - } - - /// - /// Creates new buffers and sends them to the plugin - /// - private void SetupFrameBuffers() { - m_previousScaleFactor = m_scaleFactor; - m_previousReactiveMask = generateReactiveMask; - m_previousTCMask = generateTCMask; - - SetupCommandBuffer(); - - m_previousRenderingPath = m_mainCamera.actualRenderingPath; - } - - /// - /// Creates new buffers, sends them to the plugin, and reintilized FSR to adjust the display size - /// - private void SetupResolution() { - m_displayWidth = Display.main.renderingWidth; - m_displayHeight = Display.main.renderingHeight; - - m_previousHDR = m_mainCamera.allowHDR; - - Fsr3.InitializationFlags flags = Fsr3.InitializationFlags.EnableAutoExposure; - - if(m_mainCamera.allowHDR) - flags |= Fsr3.InitializationFlags.EnableHighDynamicRange; - if(enableF16) - flags |= Fsr3.InitializationFlags.EnableFP16Usage; - - if(m_context != null) { - m_context.Destroy(); - m_context = null; - } - - m_context = Fsr3.CreateContext(new Vector2Int(m_displayWidth, m_displayHeight), new Vector2Int((int)(m_displayWidth), (int)(m_displayHeight)), Callbacks, flags); - - SetupFrameBuffers(); - } - - private void UpdateDispatch() { - if(m_fsrComputePass != null) { - m_fsrComputePass.Clear(); - if(generateReactiveMask) { - m_context.GenerateReactiveMask(m_genReactiveDescription, m_fsrComputePass); - } - m_context.Dispatch(m_dispatchDescription, m_fsrComputePass); - } - } - - /// - /// - /// - protected override void DisableFSR() { - base.DisableFSR(); - Camera.onPreRender -= OnPreRenderCamera; - Camera.onPostRender -= OnPostRenderCamera; - - initFirstFrame = false; - - ClearCommandBufferCoroutine(); - SendMessage("ResetPPV2CommandBuffer", SendMessageOptions.DontRequireReceiver); - SendMessage("ResetPPV2TargetTexture", SendMessageOptions.DontRequireReceiver); - - OnResetAllMipMaps(); - - if(m_mainCamera != null) { - m_mainCamera.targetTexture = null; - m_mainCamera.ResetProjectionMatrix(); - - if(m_opaqueOnlyGrabPass != null) { - m_mainCamera.RemoveCommandBuffer(m_OPAQUE_ONLY_EVENT, m_opaqueOnlyGrabPass); - m_mainCamera.RemoveCommandBuffer(m_AFTER_OPAQUE_ONLY_EVENT, m_afterOpaqueOnlyGrabPass); - } - if(m_fsrComputePass != null) { - m_mainCamera.RemoveCommandBuffer(m_COLOR_EVENT, m_colorGrabPass); - m_mainCamera.RemoveCommandBuffer(m_FSR_EVENT, m_fsrComputePass); - m_mainCamera.RemoveCommandBuffer(m_BlITFSR_EVENT, m_blitToCamPass); - } - } - - m_fsrComputePass = m_colorGrabPass = m_opaqueOnlyGrabPass = m_afterOpaqueOnlyGrabPass = m_blitToCamPass = null; - - if(m_colorBuffer) { - if(m_opaqueOnlyColorBuffer) { - m_opaqueOnlyColorBuffer.Release(); - m_afterOpaqueOnlyColorBuffer.Release(); - m_reactiveMaskOutput.Release(); - } - m_colorBuffer.Release(); - m_fsrOutput.Release(); - } - - if(m_context != null) { - m_context.Destroy(); - m_context = null; - } - } - } -} -#endif \ No newline at end of file diff --git a/Assets/AEG FSR/Runtime/BIRP/FSR3_BIRP.cs.meta b/Assets/AEG FSR/Runtime/BIRP/FSR3_BIRP.cs.meta deleted file mode 100644 index 6ec375a..0000000 --- a/Assets/AEG FSR/Runtime/BIRP/FSR3_BIRP.cs.meta +++ /dev/null @@ -1,11 +0,0 @@ -fileFormatVersion: 2 -guid: be4c116da5e2838419f6a3014139ae82 -MonoImporter: - externalObjects: {} - serializedVersion: 2 - defaultReferences: [] - executionOrder: 0 - icon: {instanceID: 0} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/BIRP/com.alteregogames.aeg-fsr.Runtime.BIRP.asmdef b/Assets/AEG FSR/Runtime/BIRP/com.alteregogames.aeg-fsr.Runtime.BIRP.asmdef deleted file mode 100644 index b767f23..0000000 --- a/Assets/AEG FSR/Runtime/BIRP/com.alteregogames.aeg-fsr.Runtime.BIRP.asmdef +++ /dev/null @@ -1,18 +0,0 @@ -{ - "name": "com.alteregogames.aeg-fsr.Runtime.BIRP", - "rootNamespace": "AEG.FSR", - "references": [ - "com.alteregogames.aeg-fsr.Runtime" - ], - "includePlatforms": [], - "excludePlatforms": [], - "allowUnsafeCode": false, - "overrideReferences": false, - "precompiledReferences": [], - "autoReferenced": true, - "defineConstraints": [ - "UNITY_BIRP" - ], - "versionDefines": [], - "noEngineReferences": false -} \ No newline at end of file diff --git a/Assets/AEG FSR/Runtime/BIRP/com.alteregogames.aeg-fsr.Runtime.BIRP.asmdef.meta b/Assets/AEG FSR/Runtime/BIRP/com.alteregogames.aeg-fsr.Runtime.BIRP.asmdef.meta deleted file mode 100644 index 47b3464..0000000 --- a/Assets/AEG FSR/Runtime/BIRP/com.alteregogames.aeg-fsr.Runtime.BIRP.asmdef.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 31d5deaa26a442749a9d3a76f80b4afa -AssemblyDefinitionImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Enums.cs b/Assets/AEG FSR/Runtime/Enums.cs deleted file mode 100644 index 518c1a7..0000000 --- a/Assets/AEG FSR/Runtime/Enums.cs +++ /dev/null @@ -1,16 +0,0 @@ -using System.Collections; -using System.Collections.Generic; -using UnityEngine; - -namespace AEG.FSR -{ - public enum FSR_Quality - { - Off, - TemporalAntiAliasingOnly, - Quality, - Balanced, - Performance, - UltraPerformance, - } -} diff --git a/Assets/AEG FSR/Runtime/Enums.cs.meta b/Assets/AEG FSR/Runtime/Enums.cs.meta deleted file mode 100644 index 7456127..0000000 --- a/Assets/AEG FSR/Runtime/Enums.cs.meta +++ /dev/null @@ -1,11 +0,0 @@ -fileFormatVersion: 2 -guid: 6e183616e0b3a2a428399eb209c6a3ef -MonoImporter: - externalObjects: {} - serializedVersion: 2 - defaultReferences: [] - executionOrder: 0 - icon: {instanceID: 0} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/FSR3_BASE.cs b/Assets/AEG FSR/Runtime/FSR3_BASE.cs deleted file mode 100644 index 2e2a285..0000000 --- a/Assets/AEG FSR/Runtime/FSR3_BASE.cs +++ /dev/null @@ -1,312 +0,0 @@ -using System; -using System.Runtime.InteropServices; -using System.Runtime.CompilerServices; - -using UnityEngine; -using FidelityFX; - -[assembly: InternalsVisibleTo("com.alteregogames.aeg-fsr.Runtime.BIRP")] -[assembly: InternalsVisibleTo("com.alteregogames.aeg-fsr.Runtime.URP")] -[assembly: InternalsVisibleTo("com.alteregogames.aeg-fsr.Runtime.HDRP")] - -namespace AEG.FSR -{ - /// - /// Base script for FSR - /// - [RequireComponent(typeof(Camera))] - public abstract class FSR3_BASE : MonoBehaviour - { - //Public Variables - public FSR_Quality FSRQuality = FSR_Quality.Balanced; - [Range(0, 1)] - public float antiGhosting = 0.0f; - public static float FSRScaleFactor; - - public bool sharpening = true; - public float sharpness = 0.5f; - - public bool enableF16; - public bool enableAutoExposure = true; - public bool generateReactiveMask = true; - public bool generateTCMask = false; - - public float autoReactiveScale = 0.9f; - public float autoReactiveThreshold = 0.05f; - public float autoReactiveBinaryValue = 0.5f; - - public float autoTcThreshold = 0.05f; - public float autoTcScale = 1f; - public float autoTcReactiveScale = 5f; - public float autoTcReactiveMax = 0.9f; - - public Fsr3.GenerateReactiveFlags reactiveFlags = Fsr3.GenerateReactiveFlags.ApplyTonemap | Fsr3.GenerateReactiveFlags.ApplyThreshold | Fsr3.GenerateReactiveFlags.UseComponentsMax; - - public float mipmapBiasOverride = 1.0f; - public bool autoTextureUpdate = true; - public float mipMapUpdateFrequency = 2f; - - //Protected Variables - protected bool m_fsrInitialized = false; - protected Camera m_mainCamera; - - protected float m_scaleFactor = 1.5f; - protected int m_renderWidth, m_renderHeight; - protected int m_displayWidth, m_displayHeight; - - protected float m_nearClipPlane, m_farClipPlane, m_fieldOfView; - - protected FSR_Quality m_previousFsrQuality; - protected bool m_previousHDR; - - protected bool m_previousReactiveMask; - protected bool m_previousTCMask; - protected float m_previousScaleFactor; - protected RenderingPath m_previousRenderingPath; - - //Mipmap variables - protected Texture[] m_allTextures; - protected ulong m_previousLength; - protected float m_mipMapBias; - protected float m_prevMipMapBias; - protected float m_mipMapTimer = float.MaxValue; - - public bool m_resetCamera = false; - - #region Public API - /// - /// Set FSR Quality settings. - /// Quality = 1.5, Balanced = 1.7, Performance = 2, Ultra Performance = 3 - /// - public void OnSetQuality(FSR_Quality value) { - m_previousFsrQuality = value; - FSRQuality = value; - - if(value == FSR_Quality.Off) { - Initialize(); - DisableFSR(); - m_scaleFactor = 1; - } else { - switch(value) { - case FSR_Quality.TemporalAntiAliasingOnly: - m_scaleFactor = 1.0f; - break; - case FSR_Quality.Quality: - m_scaleFactor = 1.5f; - break; - case FSR_Quality.Balanced: - m_scaleFactor = 1.7f; - break; - case FSR_Quality.Performance: - m_scaleFactor = 2.0f; - break; - case FSR_Quality.UltraPerformance: - m_scaleFactor = 3.0f; - break; - } - - Initialize(); - } - FSRScaleFactor = m_scaleFactor; - } - - public void OnSetAdaptiveQuality(float _value) { - m_scaleFactor = _value; - } - - /// - /// Checks wether FSR is compatible using the current build settings - /// - /// - public bool OnIsSupported() { - bool fsr2Compatible = SystemInfo.supportsComputeShaders; - enableF16 = SystemInfo.IsFormatSupported(UnityEngine.Experimental.Rendering.GraphicsFormat.R16_SFloat, UnityEngine.Experimental.Rendering.FormatUsage.Render); - - return fsr2Compatible; - } - - /// - /// Resets the camera for the next frame, clearing all the buffers saved from previous frames in order to prevent artifacts. - /// Should be called in or before PreRender oh the frame where the camera makes a jumpcut. - /// Is automatically disabled the frame after. - /// - public void OnResetCamera() { - m_resetCamera = true; - } - - /// - /// Updates a single texture to the set MipMap Bias. - /// Should be called when an object is instantiated, or when the ScaleFactor is changed. - /// - public void OnMipmapSingleTexture(Texture texture) { - texture.mipMapBias = m_mipMapBias; - } - - /// - /// Updates all textures currently loaded to the set MipMap Bias. - /// Should be called when a lot of new textures are loaded, or when the ScaleFactor is changed. - /// - public void OnMipMapAllTextures() { - m_allTextures = Resources.FindObjectsOfTypeAll(typeof(Texture)) as Texture[]; - for(int i = 0; i < m_allTextures.Length; i++) { - m_allTextures[i].mipMapBias = m_mipMapBias; - } - } - /// - /// Resets all currently loaded textures to the default mipmap bias. - /// - public void OnResetAllMipMaps() { - m_prevMipMapBias = -1; - - m_allTextures = Resources.FindObjectsOfTypeAll(typeof(Texture)) as Texture[]; - for(int i = 0; i < m_allTextures.Length; i++) { - m_allTextures[i].mipMapBias = 0; - } - m_allTextures = null; - } - #endregion - - protected virtual void Initialize() { - bool fsr2Compatible = OnIsSupported(); - - //Reset mipmap timer so mipmap are instantly updated if automatic mip map is turned on - m_mipMapTimer = float.MaxValue; - - if(m_fsrInitialized || !Application.isPlaying) { - return; - } - if(fsr2Compatible) { - InitializeFSR(); - m_fsrInitialized = true; - } else { - Debug.LogWarning($"FSR 2 is not supported"); - enabled = false; - } - - } - - /// - /// Initializes everything in order to run FSR - /// - protected virtual void InitializeFSR() { - m_mainCamera = GetComponent(); - } - - protected virtual void OnEnable() { -#if AMD_FIDELITY_FSR3_DEBUG - RegisterDebugCallback(OnDebugCallback); -#endif - - OnSetQuality(FSRQuality); - } - - protected virtual void Update() { - if(m_previousFsrQuality != FSRQuality) { - OnSetQuality(FSRQuality); - } - - if(!m_fsrInitialized) { - return; - } -#if UNITY_BIRP - if(autoTextureUpdate) { - UpdateMipMaps(); - } -#endif - } - - protected virtual void OnDisable() { - DisableFSR(); - } - - protected virtual void OnDestroy() { - DisableFSR(); - } - - /// - /// Disables FSR and cleans up - /// - protected virtual void DisableFSR() { - m_fsrInitialized = false; - } - - - #region Automatic Mip Map -#if UNITY_BIRP - /// - /// Automatically updates the mipmap of all loaded textures - /// - protected void UpdateMipMaps() { - m_mipMapTimer += Time.deltaTime; - - if(m_mipMapTimer > mipMapUpdateFrequency) { - m_mipMapTimer = 0; - - m_mipMapBias = (Mathf.Log((float)(m_renderWidth) / (float)(m_displayWidth), 2f) - 1) * mipmapBiasOverride; - - if(m_previousLength != Texture.currentTextureMemory || m_prevMipMapBias != m_mipMapBias) { - m_prevMipMapBias = m_mipMapBias; - m_previousLength = Texture.currentTextureMemory; - - OnMipMapAllTextures(); - } - } - } -#endif - #endregion - - - #region Debug - -#if AMD_FIDELITY_FSR3_DEBUG - /// - /// Register a callback to send debugging information - /// - /// - [DllImport(m_DLL, CallingConvention = CallingConvention.Cdecl)] - static extern void RegisterDebugCallback(debugCallback cb); - - /// - /// Delegate for a debug callback - /// - delegate void debugCallback(IntPtr request, int messageType, int color, int size); - enum Color { red, green, blue, black, white, yellow, orange }; - enum MessageType { Error, Warning, Info }; - - /// - /// Callback for debug messages send from the plugin - /// - /// Debug message - /// Message type - /// Color to use in the console - /// Size of the string - static void OnDebugCallback(IntPtr request, int messageType, int color, int size) - { - //Ptr to string - string debug_string = Marshal.PtrToStringAnsi(request, size); - - //Add Specified Color - debug_string = - String.Format("{0}{1}{2}", - $"", - debug_string, - "" - ); - - switch ((MessageType)messageType) - { - case MessageType.Error: - Debug.LogError(debug_string); - break; - case MessageType.Warning: - Debug.LogWarning(debug_string); - break; - default: - Debug.Log(debug_string); - break; - } - } -#endif - #endregion - } -} diff --git a/Assets/AEG FSR/Runtime/FSR3_BASE.cs.meta b/Assets/AEG FSR/Runtime/FSR3_BASE.cs.meta deleted file mode 100644 index 23a0ce8..0000000 --- a/Assets/AEG FSR/Runtime/FSR3_BASE.cs.meta +++ /dev/null @@ -1,11 +0,0 @@ -fileFormatVersion: 2 -guid: 270ff55c81825d04e992a41dc662766a -MonoImporter: - externalObjects: {} - serializedVersion: 2 - defaultReferences: [] - executionOrder: 0 - icon: {instanceID: 0} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins.meta b/Assets/AEG FSR/Runtime/Plugins.meta deleted file mode 100644 index e851466..0000000 --- a/Assets/AEG FSR/Runtime/Plugins.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: 59fd08c4d986c3f4eb0b9f256856db2a -folderAsset: yes -DefaultImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources.meta b/Assets/AEG FSR/Runtime/Plugins/Resources.meta deleted file mode 100644 index 68b3a1f..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: e9f0008aec6b7a64eb49f90e1a8e3c18 -folderAsset: yes -DefaultImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1.meta deleted file mode 100644 index f02f586..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: 48842dea9c7003c4db12eef2194986fb -folderAsset: yes -DefaultImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/EdgeAdaptiveScaleUpsampling.compute b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/EdgeAdaptiveScaleUpsampling.compute deleted file mode 100644 index 3b9901b..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/EdgeAdaptiveScaleUpsampling.compute +++ /dev/null @@ -1,73 +0,0 @@ -#pragma kernel KMain -#pragma kernel KInitialize - -#define A_GPU 1 -#define A_HLSL 1 -#define FSR_EASU_F 1 - -#define float float -#define float2 float2 -#define float3 float3 -#define float4 float4 - -#include "ffx_a.hlsl" - - -RWStructuredBuffer _EASUParameters; - -float4 _EASUViewportSize; -float4 _EASUInputImageSize; -float4 _EASUOutputSize; - -SamplerState s_linear_clamp_sampler; -Texture2D InputTexture; -RWTexture2D OutputTexture; - -AF4 FsrEasuRF(AF2 p) { AF4 res = InputTexture.GatherRed(s_linear_clamp_sampler, p, ASU2(0, 0)); return res; } -AF4 FsrEasuGF(AF2 p) { AF4 res = InputTexture.GatherGreen(s_linear_clamp_sampler, p, ASU2(0, 0)); return res; } -AF4 FsrEasuBF(AF2 p) { AF4 res = InputTexture.GatherBlue(s_linear_clamp_sampler, p, ASU2(0, 0)); return res; } - -#include "ffx_fsr1.hlsl" -//#include "UnityCG.cginc" - -void Upscale(AU2 pos) { - AF3 c; - FsrEasuF(c.rgb, pos, _EASUParameters[0], _EASUParameters[1], _EASUParameters[2], _EASUParameters[3]); - OutputTexture[pos] = AF4(c, 1); -} - -//Main -[numthreads(64, 1, 1)] -void KMain(uint3 LocalThreadId : SV_GroupThreadID, uint3 WorkGroupId : SV_GroupID, uint3 dispatchThreadId : SV_DispatchThreadID) -{ - // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. - AU2 gxy = ARmp8x8(LocalThreadId.x) + AU2(WorkGroupId.x<<3u, WorkGroupId.y<<3u); - -#ifdef _ALPHA - float2 uv = ((float2)gxy.xy + 0.5) * _EASUOutputSize.zw; - float alpha = InputImage.SampleLevel(s_linear_clamp_sampler, ClampAndScaleUVForBilinear(uv), 0).a; -#else - float alpha = 1.0; -#endif - - Upscale(gxy); -} - -//Init -[numthreads(1,1,1)] -void KInitialize() -{ - AU4 con0 = (AU4)0; - AU4 con1 = (AU4)0; - AU4 con2 = (AU4)0; - AU4 con3 = (AU4)0; - FsrEasuCon(con0,con1,con2,con3, - _EASUViewportSize.x, _EASUViewportSize.y, - _EASUInputImageSize.x,_EASUInputImageSize.y, - _EASUOutputSize.x, _EASUOutputSize.y); - - _EASUParameters[0] = con0; - _EASUParameters[1] = con1; - _EASUParameters[2] = con2; - _EASUParameters[3] = con3; -} \ No newline at end of file diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/EdgeAdaptiveScaleUpsampling.compute.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/EdgeAdaptiveScaleUpsampling.compute.meta deleted file mode 100644 index 571f815..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/EdgeAdaptiveScaleUpsampling.compute.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: 4b4ecaf3c46dfc545be5f6c2e858f12c -ComputeShaderImporter: - externalObjects: {} - currentAPIMask: 4194308 - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/LICENSE.txt b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/LICENSE.txt deleted file mode 100644 index 6c519aa..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/LICENSE.txt +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. - -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: - -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. - -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. \ No newline at end of file diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/LICENSE.txt.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/LICENSE.txt.meta deleted file mode 100644 index 795d6e0..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/LICENSE.txt.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 260d0d882c7ad0d41a7df2bc5a3cf5f1 -TextScriptImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/RobustContrastAdaptiveSharpen.compute b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/RobustContrastAdaptiveSharpen.compute deleted file mode 100644 index 5207209..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/RobustContrastAdaptiveSharpen.compute +++ /dev/null @@ -1,57 +0,0 @@ -#pragma kernel KMain -#pragma kernel KInitialize - -#define A_GPU 1 -#define A_HLSL 1 -#define FSR_RCAS_F 1 - -#define float float -#define float2 float2 -#define float3 float3 -#define float4 float4 - -#include "ffx_a.hlsl" - -RWStructuredBuffer _RCASParameters; - -float _RCASScale; - -SamplerState samLinearClamp; -Texture2D InputTexture; -RWTexture2D OutputTexture; - -AF4 FsrRcasLoadF(ASU2 p) { return InputTexture.Load(int3(ASU2(p), 0)); } -void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {} - -#include "ffx_fsr1.hlsl" -//#include "UnityCG.cginc" - -void Sharpen(AU2 pos) { - AF3 c; - FsrRcasF(c.r, c.g, c.b, pos, _RCASParameters[0]); - OutputTexture[pos] = AF4(c, 1); -} - -//Main -[numthreads(64, 1, 1)] -void KMain(uint3 LocalThreadId : SV_GroupThreadID, uint3 WorkGroupId : SV_GroupID, uint3 dispatchThreadId : SV_DispatchThreadID) -{ - // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. - AU2 gxy = ARmp8x8(LocalThreadId.x) + AU2(WorkGroupId.x << 3u, WorkGroupId.y << 3u); -#ifdef _ALPHA - float alpha = InputImage.SampleLevel(s_linear_clamp_sampler, gxy.xy, 0).a; -#else - float alpha = 1.0; -#endif - - Sharpen(gxy); -} - -//Init -[numthreads(1,1,1)] -void KInitialize() -{ - AU4 con; - FsrRcasCon(con, _RCASScale); - _RCASParameters[0] = con; -} \ No newline at end of file diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/RobustContrastAdaptiveSharpen.compute.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/RobustContrastAdaptiveSharpen.compute.meta deleted file mode 100644 index a87694e..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/RobustContrastAdaptiveSharpen.compute.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: 9cbfc0f225930d149ba504b5bd5906aa -ComputeShaderImporter: - externalObjects: {} - currentAPIMask: 4194308 - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/ffx_a.hlsl b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/ffx_a.hlsl deleted file mode 100644 index b6cd05a..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/ffx_a.hlsl +++ /dev/null @@ -1,2656 +0,0 @@ -//============================================================================================================================== -// -// [A] SHADER PORTABILITY 1.20210629 -// -//============================================================================================================================== -// FidelityFX Super Resolution Sample -// -// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files(the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions : -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -//------------------------------------------------------------------------------------------------------------------------------ -// MIT LICENSE -// =========== -// Copyright (c) 2014 Michal Drobot (for concepts used in "FLOAT APPROXIMATIONS"). -// ----------- -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// ----------- -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the -// Software. -// ----------- -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -//------------------------------------------------------------------------------------------------------------------------------ -// ABOUT -// ===== -// Common central point for high-level shading language and C portability for various shader headers. -//------------------------------------------------------------------------------------------------------------------------------ -// DEFINES -// ======= -// A_CPU ..... Include the CPU related code. -// A_GPU ..... Include the GPU related code. -// A_GLSL .... Using GLSL. -// A_HLSL .... Using HLSL. -// A_HLSL_6_2 Using HLSL 6.2 with new 'uint16_t' and related types (requires '-enable-16bit-types'). -// A_NO_16_BIT_CAST Don't use instructions that are not availabe in SPIR-V (needed for running A_HLSL_6_2 on Vulkan) -// A_GCC ..... Using a GCC compatible compiler (else assume MSVC compatible compiler by default). -// ======= -// A_BYTE .... Support 8-bit integer. -// A_HALF .... Support 16-bit integer and floating point. -// A_LONG .... Support 64-bit integer. -// A_DUBL .... Support 64-bit floating point. -// ======= -// A_WAVE .... Support wave-wide operations. -//------------------------------------------------------------------------------------------------------------------------------ -// To get #include "ffx_a.h" working in GLSL use '#extension GL_GOOGLE_include_directive:require'. -//------------------------------------------------------------------------------------------------------------------------------ -// SIMPLIFIED TYPE SYSTEM -// ====================== -// - All ints will be unsigned with exception of when signed is required. -// - Type naming simplified and shortened "A<#components>", -// - H = 16-bit float (half) -// - F = 32-bit float (float) -// - D = 64-bit float (double) -// - P = 1-bit integer (predicate, not using bool because 'B' is used for byte) -// - B = 8-bit integer (byte) -// - W = 16-bit integer (word) -// - U = 32-bit integer (unsigned) -// - L = 64-bit integer (long) -// - Using "AS<#components>" for signed when required. -//------------------------------------------------------------------------------------------------------------------------------ -// TODO -// ==== -// - Make sure 'ALerp*(a,b,m)' does 'b*m+(-a*m+a)' (2 ops). -//------------------------------------------------------------------------------------------------------------------------------ -// CHANGE LOG -// ========== -// 20200914 - Expanded wave ops and prx code. -// 20200713 - Added [ZOL] section, fixed serious bugs in sRGB and Rec.709 color conversion code, etc. -//============================================================================================================================== -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// COMMON -//============================================================================================================================== -#define A_2PI 6.28318530718 -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// -// CPU -// -// -//============================================================================================================================== -#ifdef A_CPU - // Supporting user defined overrides. - #ifndef A_RESTRICT - #define A_RESTRICT __restrict - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifndef A_STATIC - #define A_STATIC static - #endif -//------------------------------------------------------------------------------------------------------------------------------ - // Same types across CPU and GPU. - // Predicate uses 32-bit integer (C friendly bool). - typedef uint32_t AP1; - typedef float AF1; - typedef double AD1; - typedef uint8_t AB1; - typedef uint16_t AW1; - typedef uint32_t AU1; - typedef uint64_t AL1; - typedef int8_t ASB1; - typedef int16_t ASW1; - typedef int32_t ASU1; - typedef int64_t ASL1; -//------------------------------------------------------------------------------------------------------------------------------ - #define AD1_(a) ((AD1)(a)) - #define AF1_(a) ((AF1)(a)) - #define AL1_(a) ((AL1)(a)) - #define AU1_(a) ((AU1)(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define ASL1_(a) ((ASL1)(a)) - #define ASU1_(a) ((ASU1)(a)) -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AU1 AU1_AF1(AF1 a){union{AF1 f;AU1 u;}bits;bits.f=a;return bits.u;} -//------------------------------------------------------------------------------------------------------------------------------ - #define A_TRUE 1 - #define A_FALSE 0 -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// CPU/GPU PORTING -// -//------------------------------------------------------------------------------------------------------------------------------ -// Get CPU and GPU to share all setup code, without duplicate code paths. -// This uses a lower-case prefix for special vector constructs. -// - In C restrict pointers are used. -// - In the shading language, in/inout/out arguments are used. -// This depends on the ability to access a vector value in both languages via array syntax (aka color[2]). -//============================================================================================================================== -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// VECTOR ARGUMENT/RETURN/INITIALIZATION PORTABILITY -//============================================================================================================================== - #define retAD2 AD1 *A_RESTRICT - #define retAD3 AD1 *A_RESTRICT - #define retAD4 AD1 *A_RESTRICT - #define retAF2 AF1 *A_RESTRICT - #define retAF3 AF1 *A_RESTRICT - #define retAF4 AF1 *A_RESTRICT - #define retAL2 AL1 *A_RESTRICT - #define retAL3 AL1 *A_RESTRICT - #define retAL4 AL1 *A_RESTRICT - #define retAU2 AU1 *A_RESTRICT - #define retAU3 AU1 *A_RESTRICT - #define retAU4 AU1 *A_RESTRICT -//------------------------------------------------------------------------------------------------------------------------------ - #define inAD2 AD1 *A_RESTRICT - #define inAD3 AD1 *A_RESTRICT - #define inAD4 AD1 *A_RESTRICT - #define inAF2 AF1 *A_RESTRICT - #define inAF3 AF1 *A_RESTRICT - #define inAF4 AF1 *A_RESTRICT - #define inAL2 AL1 *A_RESTRICT - #define inAL3 AL1 *A_RESTRICT - #define inAL4 AL1 *A_RESTRICT - #define inAU2 AU1 *A_RESTRICT - #define inAU3 AU1 *A_RESTRICT - #define inAU4 AU1 *A_RESTRICT -//------------------------------------------------------------------------------------------------------------------------------ - #define inoutAD2 AD1 *A_RESTRICT - #define inoutAD3 AD1 *A_RESTRICT - #define inoutAD4 AD1 *A_RESTRICT - #define inoutAF2 AF1 *A_RESTRICT - #define inoutAF3 AF1 *A_RESTRICT - #define inoutAF4 AF1 *A_RESTRICT - #define inoutAL2 AL1 *A_RESTRICT - #define inoutAL3 AL1 *A_RESTRICT - #define inoutAL4 AL1 *A_RESTRICT - #define inoutAU2 AU1 *A_RESTRICT - #define inoutAU3 AU1 *A_RESTRICT - #define inoutAU4 AU1 *A_RESTRICT -//------------------------------------------------------------------------------------------------------------------------------ - #define outAD2 AD1 *A_RESTRICT - #define outAD3 AD1 *A_RESTRICT - #define outAD4 AD1 *A_RESTRICT - #define outAF2 AF1 *A_RESTRICT - #define outAF3 AF1 *A_RESTRICT - #define outAF4 AF1 *A_RESTRICT - #define outAL2 AL1 *A_RESTRICT - #define outAL3 AL1 *A_RESTRICT - #define outAL4 AL1 *A_RESTRICT - #define outAU2 AU1 *A_RESTRICT - #define outAU3 AU1 *A_RESTRICT - #define outAU4 AU1 *A_RESTRICT -//------------------------------------------------------------------------------------------------------------------------------ - #define varAD2(x) AD1 x[2] - #define varAD3(x) AD1 x[3] - #define varAD4(x) AD1 x[4] - #define varAF2(x) AF1 x[2] - #define varAF3(x) AF1 x[3] - #define varAF4(x) AF1 x[4] - #define varAL2(x) AL1 x[2] - #define varAL3(x) AL1 x[3] - #define varAL4(x) AL1 x[4] - #define varAU2(x) AU1 x[2] - #define varAU3(x) AU1 x[3] - #define varAU4(x) AU1 x[4] -//------------------------------------------------------------------------------------------------------------------------------ - #define initAD2(x,y) {x,y} - #define initAD3(x,y,z) {x,y,z} - #define initAD4(x,y,z,w) {x,y,z,w} - #define initAF2(x,y) {x,y} - #define initAF3(x,y,z) {x,y,z} - #define initAF4(x,y,z,w) {x,y,z,w} - #define initAL2(x,y) {x,y} - #define initAL3(x,y,z) {x,y,z} - #define initAL4(x,y,z,w) {x,y,z,w} - #define initAU2(x,y) {x,y} - #define initAU3(x,y,z) {x,y,z} - #define initAU4(x,y,z,w) {x,y,z,w} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// SCALAR RETURN OPS -//------------------------------------------------------------------------------------------------------------------------------ -// TODO -// ==== -// - Replace transcendentals with manual versions. -//============================================================================================================================== - #ifdef A_GCC - A_STATIC AD1 AAbsD1(AD1 a){return __builtin_fabs(a);} - A_STATIC AF1 AAbsF1(AF1 a){return __builtin_fabsf(a);} - A_STATIC AU1 AAbsSU1(AU1 a){return AU1_(__builtin_abs(ASU1_(a)));} - A_STATIC AL1 AAbsSL1(AL1 a){return AL1_(__builtin_llabs(ASL1_(a)));} - #else - A_STATIC AD1 AAbsD1(AD1 a){return fabs(a);} - A_STATIC AF1 AAbsF1(AF1 a){return fabsf(a);} - A_STATIC AU1 AAbsSU1(AU1 a){return AU1_(abs(ASU1_(a)));} - A_STATIC AL1 AAbsSL1(AL1 a){return AL1_(labs((long)ASL1_(a)));} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 ACosD1(AD1 a){return __builtin_cos(a);} - A_STATIC AF1 ACosF1(AF1 a){return __builtin_cosf(a);} - #else - A_STATIC AD1 ACosD1(AD1 a){return cos(a);} - A_STATIC AF1 ACosF1(AF1 a){return cosf(a);} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 ADotD2(inAD2 a,inAD2 b){return a[0]*b[0]+a[1]*b[1];} - A_STATIC AD1 ADotD3(inAD3 a,inAD3 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2];} - A_STATIC AD1 ADotD4(inAD4 a,inAD4 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3];} - A_STATIC AF1 ADotF2(inAF2 a,inAF2 b){return a[0]*b[0]+a[1]*b[1];} - A_STATIC AF1 ADotF3(inAF3 a,inAF3 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2];} - A_STATIC AF1 ADotF4(inAF4 a,inAF4 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3];} -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 AExp2D1(AD1 a){return __builtin_exp2(a);} - A_STATIC AF1 AExp2F1(AF1 a){return __builtin_exp2f(a);} - #else - A_STATIC AD1 AExp2D1(AD1 a){return exp2(a);} - A_STATIC AF1 AExp2F1(AF1 a){return exp2f(a);} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 AFloorD1(AD1 a){return __builtin_floor(a);} - A_STATIC AF1 AFloorF1(AF1 a){return __builtin_floorf(a);} - #else - A_STATIC AD1 AFloorD1(AD1 a){return floor(a);} - A_STATIC AF1 AFloorF1(AF1 a){return floorf(a);} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 ALerpD1(AD1 a,AD1 b,AD1 c){return b*c+(-a*c+a);} - A_STATIC AF1 ALerpF1(AF1 a,AF1 b,AF1 c){return b*c+(-a*c+a);} -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 ALog2D1(AD1 a){return __builtin_log2(a);} - A_STATIC AF1 ALog2F1(AF1 a){return __builtin_log2f(a);} - #else - A_STATIC AD1 ALog2D1(AD1 a){return log2(a);} - A_STATIC AF1 ALog2F1(AF1 a){return log2f(a);} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 AMaxD1(AD1 a,AD1 b){return a>b?a:b;} - A_STATIC AF1 AMaxF1(AF1 a,AF1 b){return a>b?a:b;} - A_STATIC AL1 AMaxL1(AL1 a,AL1 b){return a>b?a:b;} - A_STATIC AU1 AMaxU1(AU1 a,AU1 b){return a>b?a:b;} -//------------------------------------------------------------------------------------------------------------------------------ - // These follow the convention that A integer types don't have signage, until they are operated on. - A_STATIC AL1 AMaxSL1(AL1 a,AL1 b){return (ASL1_(a)>ASL1_(b))?a:b;} - A_STATIC AU1 AMaxSU1(AU1 a,AU1 b){return (ASU1_(a)>ASU1_(b))?a:b;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 AMinD1(AD1 a,AD1 b){return a>ASL1_(b));} - A_STATIC AU1 AShrSU1(AU1 a,AU1 b){return AU1_(ASU1_(a)>>ASU1_(b));} -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 ASinD1(AD1 a){return __builtin_sin(a);} - A_STATIC AF1 ASinF1(AF1 a){return __builtin_sinf(a);} - #else - A_STATIC AD1 ASinD1(AD1 a){return sin(a);} - A_STATIC AF1 ASinF1(AF1 a){return sinf(a);} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 ASqrtD1(AD1 a){return __builtin_sqrt(a);} - A_STATIC AF1 ASqrtF1(AF1 a){return __builtin_sqrtf(a);} - #else - A_STATIC AD1 ASqrtD1(AD1 a){return sqrt(a);} - A_STATIC AF1 ASqrtF1(AF1 a){return sqrtf(a);} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// SCALAR RETURN OPS - DEPENDENT -//============================================================================================================================== - A_STATIC AD1 AClampD1(AD1 x,AD1 n,AD1 m){return AMaxD1(n,AMinD1(x,m));} - A_STATIC AF1 AClampF1(AF1 x,AF1 n,AF1 m){return AMaxF1(n,AMinF1(x,m));} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 AFractD1(AD1 a){return a-AFloorD1(a);} - A_STATIC AF1 AFractF1(AF1 a){return a-AFloorF1(a);} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 APowD1(AD1 a,AD1 b){return AExp2D1(b*ALog2D1(a));} - A_STATIC AF1 APowF1(AF1 a,AF1 b){return AExp2F1(b*ALog2F1(a));} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 ARsqD1(AD1 a){return ARcpD1(ASqrtD1(a));} - A_STATIC AF1 ARsqF1(AF1 a){return ARcpF1(ASqrtF1(a));} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 ASatD1(AD1 a){return AMinD1(1.0,AMaxD1(0.0,a));} - A_STATIC AF1 ASatF1(AF1 a){return AMinF1(1.0f,AMaxF1(0.0f,a));} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// VECTOR OPS -//------------------------------------------------------------------------------------------------------------------------------ -// These are added as needed for production or prototyping, so not necessarily a complete set. -// They follow a convention of taking in a destination and also returning the destination value to increase utility. -//============================================================================================================================== - A_STATIC retAD2 opAAbsD2(outAD2 d,inAD2 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);return d;} - A_STATIC retAD3 opAAbsD3(outAD3 d,inAD3 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);d[2]=AAbsD1(a[2]);return d;} - A_STATIC retAD4 opAAbsD4(outAD4 d,inAD4 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);d[2]=AAbsD1(a[2]);d[3]=AAbsD1(a[3]);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAAbsF2(outAF2 d,inAF2 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);return d;} - A_STATIC retAF3 opAAbsF3(outAF3 d,inAF3 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);d[2]=AAbsF1(a[2]);return d;} - A_STATIC retAF4 opAAbsF4(outAF4 d,inAF4 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);d[2]=AAbsF1(a[2]);d[3]=AAbsF1(a[3]);return d;} -//============================================================================================================================== - A_STATIC retAD2 opAAddD2(outAD2 d,inAD2 a,inAD2 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];return d;} - A_STATIC retAD3 opAAddD3(outAD3 d,inAD3 a,inAD3 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];return d;} - A_STATIC retAD4 opAAddD4(outAD4 d,inAD4 a,inAD4 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];d[3]=a[3]+b[3];return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAAddF2(outAF2 d,inAF2 a,inAF2 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];return d;} - A_STATIC retAF3 opAAddF3(outAF3 d,inAF3 a,inAF3 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];return d;} - A_STATIC retAF4 opAAddF4(outAF4 d,inAF4 a,inAF4 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];d[3]=a[3]+b[3];return d;} -//============================================================================================================================== - A_STATIC retAD2 opAAddOneD2(outAD2 d,inAD2 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;return d;} - A_STATIC retAD3 opAAddOneD3(outAD3 d,inAD3 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;return d;} - A_STATIC retAD4 opAAddOneD4(outAD4 d,inAD4 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;d[3]=a[3]+b;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAAddOneF2(outAF2 d,inAF2 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;return d;} - A_STATIC retAF3 opAAddOneF3(outAF3 d,inAF3 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;return d;} - A_STATIC retAF4 opAAddOneF4(outAF4 d,inAF4 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;d[3]=a[3]+b;return d;} -//============================================================================================================================== - A_STATIC retAD2 opACpyD2(outAD2 d,inAD2 a){d[0]=a[0];d[1]=a[1];return d;} - A_STATIC retAD3 opACpyD3(outAD3 d,inAD3 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];return d;} - A_STATIC retAD4 opACpyD4(outAD4 d,inAD4 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];d[3]=a[3];return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opACpyF2(outAF2 d,inAF2 a){d[0]=a[0];d[1]=a[1];return d;} - A_STATIC retAF3 opACpyF3(outAF3 d,inAF3 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];return d;} - A_STATIC retAF4 opACpyF4(outAF4 d,inAF4 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];d[3]=a[3];return d;} -//============================================================================================================================== - A_STATIC retAD2 opALerpD2(outAD2 d,inAD2 a,inAD2 b,inAD2 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);return d;} - A_STATIC retAD3 opALerpD3(outAD3 d,inAD3 a,inAD3 b,inAD3 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);d[2]=ALerpD1(a[2],b[2],c[2]);return d;} - A_STATIC retAD4 opALerpD4(outAD4 d,inAD4 a,inAD4 b,inAD4 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);d[2]=ALerpD1(a[2],b[2],c[2]);d[3]=ALerpD1(a[3],b[3],c[3]);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opALerpF2(outAF2 d,inAF2 a,inAF2 b,inAF2 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);return d;} - A_STATIC retAF3 opALerpF3(outAF3 d,inAF3 a,inAF3 b,inAF3 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);d[2]=ALerpF1(a[2],b[2],c[2]);return d;} - A_STATIC retAF4 opALerpF4(outAF4 d,inAF4 a,inAF4 b,inAF4 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);d[2]=ALerpF1(a[2],b[2],c[2]);d[3]=ALerpF1(a[3],b[3],c[3]);return d;} -//============================================================================================================================== - A_STATIC retAD2 opALerpOneD2(outAD2 d,inAD2 a,inAD2 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);return d;} - A_STATIC retAD3 opALerpOneD3(outAD3 d,inAD3 a,inAD3 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);d[2]=ALerpD1(a[2],b[2],c);return d;} - A_STATIC retAD4 opALerpOneD4(outAD4 d,inAD4 a,inAD4 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);d[2]=ALerpD1(a[2],b[2],c);d[3]=ALerpD1(a[3],b[3],c);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opALerpOneF2(outAF2 d,inAF2 a,inAF2 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);return d;} - A_STATIC retAF3 opALerpOneF3(outAF3 d,inAF3 a,inAF3 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);d[2]=ALerpF1(a[2],b[2],c);return d;} - A_STATIC retAF4 opALerpOneF4(outAF4 d,inAF4 a,inAF4 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);d[2]=ALerpF1(a[2],b[2],c);d[3]=ALerpF1(a[3],b[3],c);return d;} -//============================================================================================================================== - A_STATIC retAD2 opAMaxD2(outAD2 d,inAD2 a,inAD2 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);return d;} - A_STATIC retAD3 opAMaxD3(outAD3 d,inAD3 a,inAD3 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);d[2]=AMaxD1(a[2],b[2]);return d;} - A_STATIC retAD4 opAMaxD4(outAD4 d,inAD4 a,inAD4 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);d[2]=AMaxD1(a[2],b[2]);d[3]=AMaxD1(a[3],b[3]);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAMaxF2(outAF2 d,inAF2 a,inAF2 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);return d;} - A_STATIC retAF3 opAMaxF3(outAF3 d,inAF3 a,inAF3 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);d[2]=AMaxF1(a[2],b[2]);return d;} - A_STATIC retAF4 opAMaxF4(outAF4 d,inAF4 a,inAF4 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);d[2]=AMaxF1(a[2],b[2]);d[3]=AMaxF1(a[3],b[3]);return d;} -//============================================================================================================================== - A_STATIC retAD2 opAMinD2(outAD2 d,inAD2 a,inAD2 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);return d;} - A_STATIC retAD3 opAMinD3(outAD3 d,inAD3 a,inAD3 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);d[2]=AMinD1(a[2],b[2]);return d;} - A_STATIC retAD4 opAMinD4(outAD4 d,inAD4 a,inAD4 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);d[2]=AMinD1(a[2],b[2]);d[3]=AMinD1(a[3],b[3]);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAMinF2(outAF2 d,inAF2 a,inAF2 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);return d;} - A_STATIC retAF3 opAMinF3(outAF3 d,inAF3 a,inAF3 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);d[2]=AMinF1(a[2],b[2]);return d;} - A_STATIC retAF4 opAMinF4(outAF4 d,inAF4 a,inAF4 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);d[2]=AMinF1(a[2],b[2]);d[3]=AMinF1(a[3],b[3]);return d;} -//============================================================================================================================== - A_STATIC retAD2 opAMulD2(outAD2 d,inAD2 a,inAD2 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];return d;} - A_STATIC retAD3 opAMulD3(outAD3 d,inAD3 a,inAD3 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];return d;} - A_STATIC retAD4 opAMulD4(outAD4 d,inAD4 a,inAD4 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];d[3]=a[3]*b[3];return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAMulF2(outAF2 d,inAF2 a,inAF2 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];return d;} - A_STATIC retAF3 opAMulF3(outAF3 d,inAF3 a,inAF3 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];return d;} - A_STATIC retAF4 opAMulF4(outAF4 d,inAF4 a,inAF4 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];d[3]=a[3]*b[3];return d;} -//============================================================================================================================== - A_STATIC retAD2 opAMulOneD2(outAD2 d,inAD2 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;return d;} - A_STATIC retAD3 opAMulOneD3(outAD3 d,inAD3 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;return d;} - A_STATIC retAD4 opAMulOneD4(outAD4 d,inAD4 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;d[3]=a[3]*b;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAMulOneF2(outAF2 d,inAF2 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;return d;} - A_STATIC retAF3 opAMulOneF3(outAF3 d,inAF3 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;return d;} - A_STATIC retAF4 opAMulOneF4(outAF4 d,inAF4 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;d[3]=a[3]*b;return d;} -//============================================================================================================================== - A_STATIC retAD2 opANegD2(outAD2 d,inAD2 a){d[0]=-a[0];d[1]=-a[1];return d;} - A_STATIC retAD3 opANegD3(outAD3 d,inAD3 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];return d;} - A_STATIC retAD4 opANegD4(outAD4 d,inAD4 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];d[3]=-a[3];return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opANegF2(outAF2 d,inAF2 a){d[0]=-a[0];d[1]=-a[1];return d;} - A_STATIC retAF3 opANegF3(outAF3 d,inAF3 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];return d;} - A_STATIC retAF4 opANegF4(outAF4 d,inAF4 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];d[3]=-a[3];return d;} -//============================================================================================================================== - A_STATIC retAD2 opARcpD2(outAD2 d,inAD2 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);return d;} - A_STATIC retAD3 opARcpD3(outAD3 d,inAD3 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);d[2]=ARcpD1(a[2]);return d;} - A_STATIC retAD4 opARcpD4(outAD4 d,inAD4 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);d[2]=ARcpD1(a[2]);d[3]=ARcpD1(a[3]);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opARcpF2(outAF2 d,inAF2 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);return d;} - A_STATIC retAF3 opARcpF3(outAF3 d,inAF3 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);d[2]=ARcpF1(a[2]);return d;} - A_STATIC retAF4 opARcpF4(outAF4 d,inAF4 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);d[2]=ARcpF1(a[2]);d[3]=ARcpF1(a[3]);return d;} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// HALF FLOAT PACKING -//============================================================================================================================== - // Convert float to half (in lower 16-bits of output). - // Same fast technique as documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf - // Supports denormals. - // Conversion rules are to make computations possibly "safer" on the GPU, - // -INF & -NaN -> -65504 - // +INF & +NaN -> +65504 - A_STATIC AU1 AU1_AH1_AF1(AF1 f){ - static AW1 base[512]={ - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0001,0x0002,0x0004,0x0008,0x0010,0x0020,0x0040,0x0080,0x0100, - 0x0200,0x0400,0x0800,0x0c00,0x1000,0x1400,0x1800,0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00, - 0x4000,0x4400,0x4800,0x4c00,0x5000,0x5400,0x5800,0x5c00,0x6000,0x6400,0x6800,0x6c00,0x7000,0x7400,0x7800,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8001,0x8002,0x8004,0x8008,0x8010,0x8020,0x8040,0x8080,0x8100, - 0x8200,0x8400,0x8800,0x8c00,0x9000,0x9400,0x9800,0x9c00,0xa000,0xa400,0xa800,0xac00,0xb000,0xb400,0xb800,0xbc00, - 0xc000,0xc400,0xc800,0xcc00,0xd000,0xd400,0xd800,0xdc00,0xe000,0xe400,0xe800,0xec00,0xf000,0xf400,0xf800,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff}; - static AB1 shift[512]={ - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f, - 0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d, - 0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f, - 0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d, - 0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18}; - union{AF1 f;AU1 u;}bits;bits.f=f;AU1 u=bits.u;AU1 i=u>>23;return (AU1)(base[i])+((u&0x7fffff)>>shift[i]);} -//------------------------------------------------------------------------------------------------------------------------------ - // Used to output packed constant. - A_STATIC AU1 AU1_AH2_AF2(inAF2 a){return AU1_AH1_AF1(a[0])+(AU1_AH1_AF1(a[1])<<16);} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// -// GLSL -// -// -//============================================================================================================================== -#if defined(A_GLSL) && defined(A_GPU) - #ifndef A_SKIP_EXT - #ifdef A_HALF - #extension GL_EXT_shader_16bit_storage:require - #extension GL_EXT_shader_explicit_arithmetic_types:require - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_LONG - #extension GL_ARB_gpu_shader_int64:require - #extension GL_NV_shader_atomic_int64:require - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_WAVE - #extension GL_KHR_shader_subgroup_arithmetic:require - #extension GL_KHR_shader_subgroup_ballot:require - #extension GL_KHR_shader_subgroup_quad:require - #extension GL_KHR_shader_subgroup_shuffle:require - #endif - #endif -//============================================================================================================================== - #define AP1 bool - #define AP2 bvec2 - #define AP3 bvec3 - #define AP4 bvec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AF1 float - #define AF2 vec2 - #define AF3 vec3 - #define AF4 vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1 uint - #define AU2 uvec2 - #define AU3 uvec3 - #define AU4 uvec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASU1 int - #define ASU2 ivec2 - #define ASU3 ivec3 - #define ASU4 ivec4 -//============================================================================================================================== - #define AF1_AU1(x) uintBitsToFloat(AU1(x)) - #define AF2_AU2(x) uintBitsToFloat(AU2(x)) - #define AF3_AU3(x) uintBitsToFloat(AU3(x)) - #define AF4_AU4(x) uintBitsToFloat(AU4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1_AF1(x) floatBitsToUint(AF1(x)) - #define AU2_AF2(x) floatBitsToUint(AF2(x)) - #define AU3_AF3(x) floatBitsToUint(AF3(x)) - #define AU4_AF4(x) floatBitsToUint(AF4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AU1_AH1_AF1_x(AF1 a){return packHalf2x16(AF2(a,0.0));} - #define AU1_AH1_AF1(a) AU1_AH1_AF1_x(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1_AH2_AF2 packHalf2x16 - #define AU1_AW2Unorm_AF2 packUnorm2x16 - #define AU1_AB4Unorm_AF4 packUnorm4x8 -//------------------------------------------------------------------------------------------------------------------------------ - #define AF2_AH2_AU1 unpackHalf2x16 - #define AF2_AW2Unorm_AU1 unpackUnorm2x16 - #define AF4_AB4Unorm_AU1 unpackUnorm4x8 -//============================================================================================================================== - AF1 AF1_x(AF1 a){return AF1(a);} - AF2 AF2_x(AF1 a){return AF2(a,a);} - AF3 AF3_x(AF1 a){return AF3(a,a,a);} - AF4 AF4_x(AF1 a){return AF4(a,a,a,a);} - #define AF1_(a) AF1_x(AF1(a)) - #define AF2_(a) AF2_x(AF1(a)) - #define AF3_(a) AF3_x(AF1(a)) - #define AF4_(a) AF4_x(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AU1_x(AU1 a){return AU1(a);} - AU2 AU2_x(AU1 a){return AU2(a,a);} - AU3 AU3_x(AU1 a){return AU3(a,a,a);} - AU4 AU4_x(AU1 a){return AU4(a,a,a,a);} - #define AU1_(a) AU1_x(AU1(a)) - #define AU2_(a) AU2_x(AU1(a)) - #define AU3_(a) AU3_x(AU1(a)) - #define AU4_(a) AU4_x(AU1(a)) -//============================================================================================================================== - AU1 AAbsSU1(AU1 a){return AU1(abs(ASU1(a)));} - AU2 AAbsSU2(AU2 a){return AU2(abs(ASU2(a)));} - AU3 AAbsSU3(AU3 a){return AU3(abs(ASU3(a)));} - AU4 AAbsSU4(AU4 a){return AU4(abs(ASU4(a)));} -//------------------------------------------------------------------------------------------------------------------------------ - AU1 ABfe(AU1 src,AU1 off,AU1 bits){return bitfieldExtract(src,ASU1(off),ASU1(bits));} - AU1 ABfi(AU1 src,AU1 ins,AU1 mask){return (ins&mask)|(src&(~mask));} - // Proxy for V_BFI_B32 where the 'mask' is set as 'bits', 'mask=(1<>ASU1(b));} - AU2 AShrSU2(AU2 a,AU2 b){return AU2(ASU2(a)>>ASU2(b));} - AU3 AShrSU3(AU3 a,AU3 b){return AU3(ASU3(a)>>ASU3(b));} - AU4 AShrSU4(AU4 a,AU4 b){return AU4(ASU4(a)>>ASU4(b));} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// GLSL BYTE -//============================================================================================================================== - #ifdef A_BYTE - #define AB1 uint8_t - #define AB2 u8vec2 - #define AB3 u8vec3 - #define AB4 u8vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASB1 int8_t - #define ASB2 i8vec2 - #define ASB3 i8vec3 - #define ASB4 i8vec4 -//------------------------------------------------------------------------------------------------------------------------------ - AB1 AB1_x(AB1 a){return AB1(a);} - AB2 AB2_x(AB1 a){return AB2(a,a);} - AB3 AB3_x(AB1 a){return AB3(a,a,a);} - AB4 AB4_x(AB1 a){return AB4(a,a,a,a);} - #define AB1_(a) AB1_x(AB1(a)) - #define AB2_(a) AB2_x(AB1(a)) - #define AB3_(a) AB3_x(AB1(a)) - #define AB4_(a) AB4_x(AB1(a)) - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// GLSL HALF -//============================================================================================================================== - #ifdef A_HALF - #define AH1 float16_t - #define AH2 f16vec2 - #define AH3 f16vec3 - #define AH4 f16vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AW1 uint16_t - #define AW2 u16vec2 - #define AW3 u16vec3 - #define AW4 u16vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASW1 int16_t - #define ASW2 i16vec2 - #define ASW3 i16vec3 - #define ASW4 i16vec4 -//============================================================================================================================== - #define AH2_AU1(x) unpackFloat2x16(AU1(x)) - AH4 AH4_AU2_x(AU2 x){return AH4(unpackFloat2x16(x.x),unpackFloat2x16(x.y));} - #define AH4_AU2(x) AH4_AU2_x(AU2(x)) - #define AW2_AU1(x) unpackUint2x16(AU1(x)) - #define AW4_AU2(x) unpackUint4x16(pack64(AU2(x))) -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1_AH2(x) packFloat2x16(AH2(x)) - AU2 AU2_AH4_x(AH4 x){return AU2(packFloat2x16(x.xy),packFloat2x16(x.zw));} - #define AU2_AH4(x) AU2_AH4_x(AH4(x)) - #define AU1_AW2(x) packUint2x16(AW2(x)) - #define AU2_AW4(x) unpack32(packUint4x16(AW4(x))) -//============================================================================================================================== - #define AW1_AH1(x) halfBitsToUint16(AH1(x)) - #define AW2_AH2(x) halfBitsToUint16(AH2(x)) - #define AW3_AH3(x) halfBitsToUint16(AH3(x)) - #define AW4_AH4(x) halfBitsToUint16(AH4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AH1_AW1(x) uint16BitsToHalf(AW1(x)) - #define AH2_AW2(x) uint16BitsToHalf(AW2(x)) - #define AH3_AW3(x) uint16BitsToHalf(AW3(x)) - #define AH4_AW4(x) uint16BitsToHalf(AW4(x)) -//============================================================================================================================== - AH1 AH1_x(AH1 a){return AH1(a);} - AH2 AH2_x(AH1 a){return AH2(a,a);} - AH3 AH3_x(AH1 a){return AH3(a,a,a);} - AH4 AH4_x(AH1 a){return AH4(a,a,a,a);} - #define AH1_(a) AH1_x(AH1(a)) - #define AH2_(a) AH2_x(AH1(a)) - #define AH3_(a) AH3_x(AH1(a)) - #define AH4_(a) AH4_x(AH1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AW1_x(AW1 a){return AW1(a);} - AW2 AW2_x(AW1 a){return AW2(a,a);} - AW3 AW3_x(AW1 a){return AW3(a,a,a);} - AW4 AW4_x(AW1 a){return AW4(a,a,a,a);} - #define AW1_(a) AW1_x(AW1(a)) - #define AW2_(a) AW2_x(AW1(a)) - #define AW3_(a) AW3_x(AW1(a)) - #define AW4_(a) AW4_x(AW1(a)) -//============================================================================================================================== - AW1 AAbsSW1(AW1 a){return AW1(abs(ASW1(a)));} - AW2 AAbsSW2(AW2 a){return AW2(abs(ASW2(a)));} - AW3 AAbsSW3(AW3 a){return AW3(abs(ASW3(a)));} - AW4 AAbsSW4(AW4 a){return AW4(abs(ASW4(a)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AClampH1(AH1 x,AH1 n,AH1 m){return clamp(x,n,m);} - AH2 AClampH2(AH2 x,AH2 n,AH2 m){return clamp(x,n,m);} - AH3 AClampH3(AH3 x,AH3 n,AH3 m){return clamp(x,n,m);} - AH4 AClampH4(AH4 x,AH4 n,AH4 m){return clamp(x,n,m);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AFractH1(AH1 x){return fract(x);} - AH2 AFractH2(AH2 x){return fract(x);} - AH3 AFractH3(AH3 x){return fract(x);} - AH4 AFractH4(AH4 x){return fract(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ALerpH1(AH1 x,AH1 y,AH1 a){return mix(x,y,a);} - AH2 ALerpH2(AH2 x,AH2 y,AH2 a){return mix(x,y,a);} - AH3 ALerpH3(AH3 x,AH3 y,AH3 a){return mix(x,y,a);} - AH4 ALerpH4(AH4 x,AH4 y,AH4 a){return mix(x,y,a);} -//------------------------------------------------------------------------------------------------------------------------------ - // No packed version of max3. - AH1 AMax3H1(AH1 x,AH1 y,AH1 z){return max(x,max(y,z));} - AH2 AMax3H2(AH2 x,AH2 y,AH2 z){return max(x,max(y,z));} - AH3 AMax3H3(AH3 x,AH3 y,AH3 z){return max(x,max(y,z));} - AH4 AMax3H4(AH4 x,AH4 y,AH4 z){return max(x,max(y,z));} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AMaxSW1(AW1 a,AW1 b){return AW1(max(ASU1(a),ASU1(b)));} - AW2 AMaxSW2(AW2 a,AW2 b){return AW2(max(ASU2(a),ASU2(b)));} - AW3 AMaxSW3(AW3 a,AW3 b){return AW3(max(ASU3(a),ASU3(b)));} - AW4 AMaxSW4(AW4 a,AW4 b){return AW4(max(ASU4(a),ASU4(b)));} -//------------------------------------------------------------------------------------------------------------------------------ - // No packed version of min3. - AH1 AMin3H1(AH1 x,AH1 y,AH1 z){return min(x,min(y,z));} - AH2 AMin3H2(AH2 x,AH2 y,AH2 z){return min(x,min(y,z));} - AH3 AMin3H3(AH3 x,AH3 y,AH3 z){return min(x,min(y,z));} - AH4 AMin3H4(AH4 x,AH4 y,AH4 z){return min(x,min(y,z));} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AMinSW1(AW1 a,AW1 b){return AW1(min(ASU1(a),ASU1(b)));} - AW2 AMinSW2(AW2 a,AW2 b){return AW2(min(ASU2(a),ASU2(b)));} - AW3 AMinSW3(AW3 a,AW3 b){return AW3(min(ASU3(a),ASU3(b)));} - AW4 AMinSW4(AW4 a,AW4 b){return AW4(min(ASU4(a),ASU4(b)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ARcpH1(AH1 x){return AH1_(1.0)/x;} - AH2 ARcpH2(AH2 x){return AH2_(1.0)/x;} - AH3 ARcpH3(AH3 x){return AH3_(1.0)/x;} - AH4 ARcpH4(AH4 x){return AH4_(1.0)/x;} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ARsqH1(AH1 x){return AH1_(1.0)/sqrt(x);} - AH2 ARsqH2(AH2 x){return AH2_(1.0)/sqrt(x);} - AH3 ARsqH3(AH3 x){return AH3_(1.0)/sqrt(x);} - AH4 ARsqH4(AH4 x){return AH4_(1.0)/sqrt(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ASatH1(AH1 x){return clamp(x,AH1_(0.0),AH1_(1.0));} - AH2 ASatH2(AH2 x){return clamp(x,AH2_(0.0),AH2_(1.0));} - AH3 ASatH3(AH3 x){return clamp(x,AH3_(0.0),AH3_(1.0));} - AH4 ASatH4(AH4 x){return clamp(x,AH4_(0.0),AH4_(1.0));} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AShrSW1(AW1 a,AW1 b){return AW1(ASW1(a)>>ASW1(b));} - AW2 AShrSW2(AW2 a,AW2 b){return AW2(ASW2(a)>>ASW2(b));} - AW3 AShrSW3(AW3 a,AW3 b){return AW3(ASW3(a)>>ASW3(b));} - AW4 AShrSW4(AW4 a,AW4 b){return AW4(ASW4(a)>>ASW4(b));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// GLSL DOUBLE -//============================================================================================================================== - #ifdef A_DUBL - #define AD1 double - #define AD2 dvec2 - #define AD3 dvec3 - #define AD4 dvec4 -//------------------------------------------------------------------------------------------------------------------------------ - AD1 AD1_x(AD1 a){return AD1(a);} - AD2 AD2_x(AD1 a){return AD2(a,a);} - AD3 AD3_x(AD1 a){return AD3(a,a,a);} - AD4 AD4_x(AD1 a){return AD4(a,a,a,a);} - #define AD1_(a) AD1_x(AD1(a)) - #define AD2_(a) AD2_x(AD1(a)) - #define AD3_(a) AD3_x(AD1(a)) - #define AD4_(a) AD4_x(AD1(a)) -//============================================================================================================================== - AD1 AFractD1(AD1 x){return fract(x);} - AD2 AFractD2(AD2 x){return fract(x);} - AD3 AFractD3(AD3 x){return fract(x);} - AD4 AFractD4(AD4 x){return fract(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ALerpD1(AD1 x,AD1 y,AD1 a){return mix(x,y,a);} - AD2 ALerpD2(AD2 x,AD2 y,AD2 a){return mix(x,y,a);} - AD3 ALerpD3(AD3 x,AD3 y,AD3 a){return mix(x,y,a);} - AD4 ALerpD4(AD4 x,AD4 y,AD4 a){return mix(x,y,a);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ARcpD1(AD1 x){return AD1_(1.0)/x;} - AD2 ARcpD2(AD2 x){return AD2_(1.0)/x;} - AD3 ARcpD3(AD3 x){return AD3_(1.0)/x;} - AD4 ARcpD4(AD4 x){return AD4_(1.0)/x;} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ARsqD1(AD1 x){return AD1_(1.0)/sqrt(x);} - AD2 ARsqD2(AD2 x){return AD2_(1.0)/sqrt(x);} - AD3 ARsqD3(AD3 x){return AD3_(1.0)/sqrt(x);} - AD4 ARsqD4(AD4 x){return AD4_(1.0)/sqrt(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ASatD1(AD1 x){return clamp(x,AD1_(0.0),AD1_(1.0));} - AD2 ASatD2(AD2 x){return clamp(x,AD2_(0.0),AD2_(1.0));} - AD3 ASatD3(AD3 x){return clamp(x,AD3_(0.0),AD3_(1.0));} - AD4 ASatD4(AD4 x){return clamp(x,AD4_(0.0),AD4_(1.0));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// GLSL LONG -//============================================================================================================================== - #ifdef A_LONG - #define AL1 uint64_t - #define AL2 u64vec2 - #define AL3 u64vec3 - #define AL4 u64vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASL1 int64_t - #define ASL2 i64vec2 - #define ASL3 i64vec3 - #define ASL4 i64vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AL1_AU2(x) packUint2x32(AU2(x)) - #define AU2_AL1(x) unpackUint2x32(AL1(x)) -//------------------------------------------------------------------------------------------------------------------------------ - AL1 AL1_x(AL1 a){return AL1(a);} - AL2 AL2_x(AL1 a){return AL2(a,a);} - AL3 AL3_x(AL1 a){return AL3(a,a,a);} - AL4 AL4_x(AL1 a){return AL4(a,a,a,a);} - #define AL1_(a) AL1_x(AL1(a)) - #define AL2_(a) AL2_x(AL1(a)) - #define AL3_(a) AL3_x(AL1(a)) - #define AL4_(a) AL4_x(AL1(a)) -//============================================================================================================================== - AL1 AAbsSL1(AL1 a){return AL1(abs(ASL1(a)));} - AL2 AAbsSL2(AL2 a){return AL2(abs(ASL2(a)));} - AL3 AAbsSL3(AL3 a){return AL3(abs(ASL3(a)));} - AL4 AAbsSL4(AL4 a){return AL4(abs(ASL4(a)));} -//------------------------------------------------------------------------------------------------------------------------------ - AL1 AMaxSL1(AL1 a,AL1 b){return AL1(max(ASU1(a),ASU1(b)));} - AL2 AMaxSL2(AL2 a,AL2 b){return AL2(max(ASU2(a),ASU2(b)));} - AL3 AMaxSL3(AL3 a,AL3 b){return AL3(max(ASU3(a),ASU3(b)));} - AL4 AMaxSL4(AL4 a,AL4 b){return AL4(max(ASU4(a),ASU4(b)));} -//------------------------------------------------------------------------------------------------------------------------------ - AL1 AMinSL1(AL1 a,AL1 b){return AL1(min(ASU1(a),ASU1(b)));} - AL2 AMinSL2(AL2 a,AL2 b){return AL2(min(ASU2(a),ASU2(b)));} - AL3 AMinSL3(AL3 a,AL3 b){return AL3(min(ASU3(a),ASU3(b)));} - AL4 AMinSL4(AL4 a,AL4 b){return AL4(min(ASU4(a),ASU4(b)));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// WAVE OPERATIONS -//============================================================================================================================== - #ifdef A_WAVE - // Where 'x' must be a compile time literal. - AF1 AWaveXorF1(AF1 v,AU1 x){return subgroupShuffleXor(v,x);} - AF2 AWaveXorF2(AF2 v,AU1 x){return subgroupShuffleXor(v,x);} - AF3 AWaveXorF3(AF3 v,AU1 x){return subgroupShuffleXor(v,x);} - AF4 AWaveXorF4(AF4 v,AU1 x){return subgroupShuffleXor(v,x);} - AU1 AWaveXorU1(AU1 v,AU1 x){return subgroupShuffleXor(v,x);} - AU2 AWaveXorU2(AU2 v,AU1 x){return subgroupShuffleXor(v,x);} - AU3 AWaveXorU3(AU3 v,AU1 x){return subgroupShuffleXor(v,x);} - AU4 AWaveXorU4(AU4 v,AU1 x){return subgroupShuffleXor(v,x);} -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_HALF - AH2 AWaveXorH2(AH2 v,AU1 x){return AH2_AU1(subgroupShuffleXor(AU1_AH2(v),x));} - AH4 AWaveXorH4(AH4 v,AU1 x){return AH4_AU2(subgroupShuffleXor(AU2_AH4(v),x));} - AW2 AWaveXorW2(AW2 v,AU1 x){return AW2_AU1(subgroupShuffleXor(AU1_AW2(v),x));} - AW4 AWaveXorW4(AW4 v,AU1 x){return AW4_AU2(subgroupShuffleXor(AU2_AW4(v),x));} - #endif - #endif -//============================================================================================================================== -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// -// HLSL -// -// -//============================================================================================================================== -#if defined(A_HLSL) && defined(A_GPU) - #ifdef A_HLSL_6_2 - #define AP1 bool - #define AP2 bool2 - #define AP3 bool3 - #define AP4 bool4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AF1 float32_t - #define AF2 float32_t2 - #define AF3 float32_t3 - #define AF4 float32_t4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1 uint32_t - #define AU2 uint32_t2 - #define AU3 uint32_t3 - #define AU4 uint32_t4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASU1 int32_t - #define ASU2 int32_t2 - #define ASU3 int32_t3 - #define ASU4 int32_t4 - #else - #define AP1 bool - #define AP2 bool2 - #define AP3 bool3 - #define AP4 bool4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AF1 half - #define AF2 half2 - #define AF3 half3 - #define AF4 half4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1 uint - #define AU2 uint2 - #define AU3 uint3 - #define AU4 uint4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASU1 int - #define ASU2 int2 - #define ASU3 int3 - #define ASU4 int4 - #endif -//============================================================================================================================== - #define AF1_AU1(x) asfloat(AU1(x)) - #define AF2_AU2(x) asfloat(AU2(x)) - #define AF3_AU3(x) asfloat(AU3(x)) - #define AF4_AU4(x) asfloat(AU4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1_AF1(x) asuint(AF1(x)) - #define AU2_AF2(x) asuint(AF2(x)) - #define AU3_AF3(x) asuint(AF3(x)) - #define AU4_AF4(x) asuint(AF4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AU1_AH1_AF1_x(AF1 a){return f32tof16(a);} - #define AU1_AH1_AF1(a) AU1_AH1_AF1_x(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AU1_AH2_AF2_x(AF2 a){return f32tof16(a.x)|(f32tof16(a.y)<<16);} - #define AU1_AH2_AF2(a) AU1_AH2_AF2_x(AF2(a)) - #define AU1_AB4Unorm_AF4(x) D3DCOLORtoUBYTE4(AF4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - AF2 AF2_AH2_AU1_x(AU1 x){return AF2(f16tof32(x&0xFFFF),f16tof32(x>>16));} - #define AF2_AH2_AU1(x) AF2_AH2_AU1_x(AU1(x)) -//============================================================================================================================== - AF1 AF1_x(AF1 a){return AF1(a);} - AF2 AF2_x(AF1 a){return AF2(a,a);} - AF3 AF3_x(AF1 a){return AF3(a,a,a);} - AF4 AF4_x(AF1 a){return AF4(a,a,a,a);} - #define AF1_(a) AF1_x(AF1(a)) - #define AF2_(a) AF2_x(AF1(a)) - #define AF3_(a) AF3_x(AF1(a)) - #define AF4_(a) AF4_x(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AU1_x(AU1 a){return AU1(a);} - AU2 AU2_x(AU1 a){return AU2(a,a);} - AU3 AU3_x(AU1 a){return AU3(a,a,a);} - AU4 AU4_x(AU1 a){return AU4(a,a,a,a);} - #define AU1_(a) AU1_x(AU1(a)) - #define AU2_(a) AU2_x(AU1(a)) - #define AU3_(a) AU3_x(AU1(a)) - #define AU4_(a) AU4_x(AU1(a)) -//============================================================================================================================== - AU1 AAbsSU1(AU1 a){return AU1(abs(ASU1(a)));} - AU2 AAbsSU2(AU2 a){return AU2(abs(ASU2(a)));} - AU3 AAbsSU3(AU3 a){return AU3(abs(ASU3(a)));} - AU4 AAbsSU4(AU4 a){return AU4(abs(ASU4(a)));} -//------------------------------------------------------------------------------------------------------------------------------ - AU1 ABfe(AU1 src,AU1 off,AU1 bits){AU1 mask=(1u<>off)&mask;} - AU1 ABfi(AU1 src,AU1 ins,AU1 mask){return (ins&mask)|(src&(~mask));} - AU1 ABfiM(AU1 src,AU1 ins,AU1 bits){AU1 mask=(1u<>ASU1(b));} - AU2 AShrSU2(AU2 a,AU2 b){return AU2(ASU2(a)>>ASU2(b));} - AU3 AShrSU3(AU3 a,AU3 b){return AU3(ASU3(a)>>ASU3(b));} - AU4 AShrSU4(AU4 a,AU4 b){return AU4(ASU4(a)>>ASU4(b));} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// HLSL BYTE -//============================================================================================================================== - #ifdef A_BYTE - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// HLSL HALF -//============================================================================================================================== - #ifdef A_HALF - #ifdef A_HLSL_6_2 - #define AH1 float16_t - #define AH2 float16_t2 - #define AH3 float16_t3 - #define AH4 float16_t4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AW1 uint16_t - #define AW2 uint16_t2 - #define AW3 uint16_t3 - #define AW4 uint16_t4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASW1 int16_t - #define ASW2 int16_t2 - #define ASW3 int16_t3 - #define ASW4 int16_t4 - #else - #define AH1 min16float - #define AH2 min16float2 - #define AH3 min16float3 - #define AH4 min16float4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AW1 min16uint - #define AW2 min16uint2 - #define AW3 min16uint3 - #define AW4 min16uint4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASW1 min16int - #define ASW2 min16int2 - #define ASW3 min16int3 - #define ASW4 min16int4 - #endif -//============================================================================================================================== - // Need to use manual unpack to get optimal execution (don't use packed types in buffers directly). - // Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/ - AH2 AH2_AU1_x(AU1 x){AF2 t=f16tof32(AU2(x&0xFFFF,x>>16));return AH2(t);} - AH4 AH4_AU2_x(AU2 x){return AH4(AH2_AU1_x(x.x),AH2_AU1_x(x.y));} - AW2 AW2_AU1_x(AU1 x){AU2 t=AU2(x&0xFFFF,x>>16);return AW2(t);} - AW4 AW4_AU2_x(AU2 x){return AW4(AW2_AU1_x(x.x),AW2_AU1_x(x.y));} - #define AH2_AU1(x) AH2_AU1_x(AU1(x)) - #define AH4_AU2(x) AH4_AU2_x(AU2(x)) - #define AW2_AU1(x) AW2_AU1_x(AU1(x)) - #define AW4_AU2(x) AW4_AU2_x(AU2(x)) -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AU1_AH2_x(AH2 x){return f32tof16(x.x)+(f32tof16(x.y)<<16);} - AU2 AU2_AH4_x(AH4 x){return AU2(AU1_AH2_x(x.xy),AU1_AH2_x(x.zw));} - AU1 AU1_AW2_x(AW2 x){return AU1(x.x)+(AU1(x.y)<<16);} - AU2 AU2_AW4_x(AW4 x){return AU2(AU1_AW2_x(x.xy),AU1_AW2_x(x.zw));} - #define AU1_AH2(x) AU1_AH2_x(AH2(x)) - #define AU2_AH4(x) AU2_AH4_x(AH4(x)) - #define AU1_AW2(x) AU1_AW2_x(AW2(x)) - #define AU2_AW4(x) AU2_AW4_x(AW4(x)) -//============================================================================================================================== - #if defined(A_HLSL_6_2) && !defined(A_NO_16_BIT_CAST) - #define AW1_AH1(x) asuint16(x) - #define AW2_AH2(x) asuint16(x) - #define AW3_AH3(x) asuint16(x) - #define AW4_AH4(x) asuint16(x) - #else - #define AW1_AH1(a) AW1(f32tof16(AF1(a))) - #define AW2_AH2(a) AW2(AW1_AH1((a).x),AW1_AH1((a).y)) - #define AW3_AH3(a) AW3(AW1_AH1((a).x),AW1_AH1((a).y),AW1_AH1((a).z)) - #define AW4_AH4(a) AW4(AW1_AH1((a).x),AW1_AH1((a).y),AW1_AH1((a).z),AW1_AH1((a).w)) - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #if defined(A_HLSL_6_2) && !defined(A_NO_16_BIT_CAST) - #define AH1_AW1(x) asfloat16(x) - #define AH2_AW2(x) asfloat16(x) - #define AH3_AW3(x) asfloat16(x) - #define AH4_AW4(x) asfloat16(x) - #else - #define AH1_AW1(a) AH1(f16tof32(AU1(a))) - #define AH2_AW2(a) AH2(AH1_AW1((a).x),AH1_AW1((a).y)) - #define AH3_AW3(a) AH3(AH1_AW1((a).x),AH1_AW1((a).y),AH1_AW1((a).z)) - #define AH4_AW4(a) AH4(AH1_AW1((a).x),AH1_AW1((a).y),AH1_AW1((a).z),AH1_AW1((a).w)) - #endif -//============================================================================================================================== - AH1 AH1_x(AH1 a){return AH1(a);} - AH2 AH2_x(AH1 a){return AH2(a,a);} - AH3 AH3_x(AH1 a){return AH3(a,a,a);} - AH4 AH4_x(AH1 a){return AH4(a,a,a,a);} - #define AH1_(a) AH1_x(AH1(a)) - #define AH2_(a) AH2_x(AH1(a)) - #define AH3_(a) AH3_x(AH1(a)) - #define AH4_(a) AH4_x(AH1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AW1_x(AW1 a){return AW1(a);} - AW2 AW2_x(AW1 a){return AW2(a,a);} - AW3 AW3_x(AW1 a){return AW3(a,a,a);} - AW4 AW4_x(AW1 a){return AW4(a,a,a,a);} - #define AW1_(a) AW1_x(AW1(a)) - #define AW2_(a) AW2_x(AW1(a)) - #define AW3_(a) AW3_x(AW1(a)) - #define AW4_(a) AW4_x(AW1(a)) -//============================================================================================================================== - AW1 AAbsSW1(AW1 a){return AW1(abs(ASW1(a)));} - AW2 AAbsSW2(AW2 a){return AW2(abs(ASW2(a)));} - AW3 AAbsSW3(AW3 a){return AW3(abs(ASW3(a)));} - AW4 AAbsSW4(AW4 a){return AW4(abs(ASW4(a)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AClampH1(AH1 x,AH1 n,AH1 m){return max(n,min(x,m));} - AH2 AClampH2(AH2 x,AH2 n,AH2 m){return max(n,min(x,m));} - AH3 AClampH3(AH3 x,AH3 n,AH3 m){return max(n,min(x,m));} - AH4 AClampH4(AH4 x,AH4 n,AH4 m){return max(n,min(x,m));} -//------------------------------------------------------------------------------------------------------------------------------ - // V_FRACT_F16 (note DX frac() is different). - AH1 AFractH1(AH1 x){return x-floor(x);} - AH2 AFractH2(AH2 x){return x-floor(x);} - AH3 AFractH3(AH3 x){return x-floor(x);} - AH4 AFractH4(AH4 x){return x-floor(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ALerpH1(AH1 x,AH1 y,AH1 a){return lerp(x,y,a);} - AH2 ALerpH2(AH2 x,AH2 y,AH2 a){return lerp(x,y,a);} - AH3 ALerpH3(AH3 x,AH3 y,AH3 a){return lerp(x,y,a);} - AH4 ALerpH4(AH4 x,AH4 y,AH4 a){return lerp(x,y,a);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AMax3H1(AH1 x,AH1 y,AH1 z){return max(x,max(y,z));} - AH2 AMax3H2(AH2 x,AH2 y,AH2 z){return max(x,max(y,z));} - AH3 AMax3H3(AH3 x,AH3 y,AH3 z){return max(x,max(y,z));} - AH4 AMax3H4(AH4 x,AH4 y,AH4 z){return max(x,max(y,z));} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AMaxSW1(AW1 a,AW1 b){return AW1(max(ASU1(a),ASU1(b)));} - AW2 AMaxSW2(AW2 a,AW2 b){return AW2(max(ASU2(a),ASU2(b)));} - AW3 AMaxSW3(AW3 a,AW3 b){return AW3(max(ASU3(a),ASU3(b)));} - AW4 AMaxSW4(AW4 a,AW4 b){return AW4(max(ASU4(a),ASU4(b)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AMin3H1(AH1 x,AH1 y,AH1 z){return min(x,min(y,z));} - AH2 AMin3H2(AH2 x,AH2 y,AH2 z){return min(x,min(y,z));} - AH3 AMin3H3(AH3 x,AH3 y,AH3 z){return min(x,min(y,z));} - AH4 AMin3H4(AH4 x,AH4 y,AH4 z){return min(x,min(y,z));} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AMinSW1(AW1 a,AW1 b){return AW1(min(ASU1(a),ASU1(b)));} - AW2 AMinSW2(AW2 a,AW2 b){return AW2(min(ASU2(a),ASU2(b)));} - AW3 AMinSW3(AW3 a,AW3 b){return AW3(min(ASU3(a),ASU3(b)));} - AW4 AMinSW4(AW4 a,AW4 b){return AW4(min(ASU4(a),ASU4(b)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ARcpH1(AH1 x){return rcp(x);} - AH2 ARcpH2(AH2 x){return rcp(x);} - AH3 ARcpH3(AH3 x){return rcp(x);} - AH4 ARcpH4(AH4 x){return rcp(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ARsqH1(AH1 x){return rsqrt(x);} - AH2 ARsqH2(AH2 x){return rsqrt(x);} - AH3 ARsqH3(AH3 x){return rsqrt(x);} - AH4 ARsqH4(AH4 x){return rsqrt(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ASatH1(AH1 x){return saturate(x);} - AH2 ASatH2(AH2 x){return saturate(x);} - AH3 ASatH3(AH3 x){return saturate(x);} - AH4 ASatH4(AH4 x){return saturate(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AShrSW1(AW1 a,AW1 b){return AW1(ASW1(a)>>ASW1(b));} - AW2 AShrSW2(AW2 a,AW2 b){return AW2(ASW2(a)>>ASW2(b));} - AW3 AShrSW3(AW3 a,AW3 b){return AW3(ASW3(a)>>ASW3(b));} - AW4 AShrSW4(AW4 a,AW4 b){return AW4(ASW4(a)>>ASW4(b));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// HLSL DOUBLE -//============================================================================================================================== - #ifdef A_DUBL - #ifdef A_HLSL_6_2 - #define AD1 float64_t - #define AD2 float64_t2 - #define AD3 float64_t3 - #define AD4 float64_t4 - #else - #define AD1 double - #define AD2 double2 - #define AD3 double3 - #define AD4 double4 - #endif -//------------------------------------------------------------------------------------------------------------------------------ - AD1 AD1_x(AD1 a){return AD1(a);} - AD2 AD2_x(AD1 a){return AD2(a,a);} - AD3 AD3_x(AD1 a){return AD3(a,a,a);} - AD4 AD4_x(AD1 a){return AD4(a,a,a,a);} - #define AD1_(a) AD1_x(AD1(a)) - #define AD2_(a) AD2_x(AD1(a)) - #define AD3_(a) AD3_x(AD1(a)) - #define AD4_(a) AD4_x(AD1(a)) -//============================================================================================================================== - AD1 AFractD1(AD1 a){return a-floor(a);} - AD2 AFractD2(AD2 a){return a-floor(a);} - AD3 AFractD3(AD3 a){return a-floor(a);} - AD4 AFractD4(AD4 a){return a-floor(a);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ALerpD1(AD1 x,AD1 y,AD1 a){return lerp(x,y,a);} - AD2 ALerpD2(AD2 x,AD2 y,AD2 a){return lerp(x,y,a);} - AD3 ALerpD3(AD3 x,AD3 y,AD3 a){return lerp(x,y,a);} - AD4 ALerpD4(AD4 x,AD4 y,AD4 a){return lerp(x,y,a);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ARcpD1(AD1 x){return rcp(x);} - AD2 ARcpD2(AD2 x){return rcp(x);} - AD3 ARcpD3(AD3 x){return rcp(x);} - AD4 ARcpD4(AD4 x){return rcp(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ARsqD1(AD1 x){return rsqrt(x);} - AD2 ARsqD2(AD2 x){return rsqrt(x);} - AD3 ARsqD3(AD3 x){return rsqrt(x);} - AD4 ARsqD4(AD4 x){return rsqrt(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ASatD1(AD1 x){return saturate(x);} - AD2 ASatD2(AD2 x){return saturate(x);} - AD3 ASatD3(AD3 x){return saturate(x);} - AD4 ASatD4(AD4 x){return saturate(x);} - #endif -//============================================================================================================================== -// HLSL WAVE -//============================================================================================================================== - #ifdef A_WAVE - // Where 'x' must be a compile time literal. - AF1 AWaveXorF1(AF1 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} - AF2 AWaveXorF2(AF2 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} - AF3 AWaveXorF3(AF3 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} - AF4 AWaveXorF4(AF4 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} - AU1 AWaveXorU1(AU1 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} - AU2 AWaveXorU1(AU2 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} - AU3 AWaveXorU1(AU3 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} - AU4 AWaveXorU1(AU4 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_HALF - AH2 AWaveXorH2(AH2 v,AU1 x){return AH2_AU1(WaveReadLaneAt(AU1_AH2(v),WaveGetLaneIndex()^x));} - AH4 AWaveXorH4(AH4 v,AU1 x){return AH4_AU2(WaveReadLaneAt(AU2_AH4(v),WaveGetLaneIndex()^x));} - AW2 AWaveXorW2(AW2 v,AU1 x){return AW2_AU1(WaveReadLaneAt(AU1_AW2(v),WaveGetLaneIndex()^x));} - AW4 AWaveXorW4(AW4 v,AU1 x){return AW4_AU1(WaveReadLaneAt(AU1_AW4(v),WaveGetLaneIndex()^x));} - #endif - #endif -//============================================================================================================================== -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// -// GPU COMMON -// -// -//============================================================================================================================== -#ifdef A_GPU - // Negative and positive infinity. - #define A_INFP_F AF1_AU1(0x7f800000u) - #define A_INFN_F AF1_AU1(0xff800000u) -//------------------------------------------------------------------------------------------------------------------------------ - // Copy sign from 's' to positive 'd'. - AF1 ACpySgnF1(AF1 d,AF1 s){return AF1_AU1(AU1_AF1(d)|(AU1_AF1(s)&AU1_(0x80000000u)));} - AF2 ACpySgnF2(AF2 d,AF2 s){return AF2_AU2(AU2_AF2(d)|(AU2_AF2(s)&AU2_(0x80000000u)));} - AF3 ACpySgnF3(AF3 d,AF3 s){return AF3_AU3(AU3_AF3(d)|(AU3_AF3(s)&AU3_(0x80000000u)));} - AF4 ACpySgnF4(AF4 d,AF4 s){return AF4_AU4(AU4_AF4(d)|(AU4_AF4(s)&AU4_(0x80000000u)));} -//------------------------------------------------------------------------------------------------------------------------------ - // Single operation to return (useful to create a mask to use in lerp for branch free logic), - // m=NaN := 0 - // m>=0 := 0 - // m<0 := 1 - // Uses the following useful floating point logic, - // saturate(+a*(-INF)==-INF) := 0 - // saturate( 0*(-INF)== NaN) := 0 - // saturate(-a*(-INF)==+INF) := 1 - AF1 ASignedF1(AF1 m){return ASatF1(m*AF1_(A_INFN_F));} - AF2 ASignedF2(AF2 m){return ASatF2(m*AF2_(A_INFN_F));} - AF3 ASignedF3(AF3 m){return ASatF3(m*AF3_(A_INFN_F));} - AF4 ASignedF4(AF4 m){return ASatF4(m*AF4_(A_INFN_F));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AGtZeroF1(AF1 m){return ASatF1(m*AF1_(A_INFP_F));} - AF2 AGtZeroF2(AF2 m){return ASatF2(m*AF2_(A_INFP_F));} - AF3 AGtZeroF3(AF3 m){return ASatF3(m*AF3_(A_INFP_F));} - AF4 AGtZeroF4(AF4 m){return ASatF4(m*AF4_(A_INFP_F));} -//============================================================================================================================== - #ifdef A_HALF - #ifdef A_HLSL_6_2 - #define A_INFP_H AH1_AW1((uint16_t)0x7c00u) - #define A_INFN_H AH1_AW1((uint16_t)0xfc00u) - #else - #define A_INFP_H AH1_AW1(0x7c00u) - #define A_INFN_H AH1_AW1(0xfc00u) - #endif - -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ACpySgnH1(AH1 d,AH1 s){return AH1_AW1(AW1_AH1(d)|(AW1_AH1(s)&AW1_(0x8000u)));} - AH2 ACpySgnH2(AH2 d,AH2 s){return AH2_AW2(AW2_AH2(d)|(AW2_AH2(s)&AW2_(0x8000u)));} - AH3 ACpySgnH3(AH3 d,AH3 s){return AH3_AW3(AW3_AH3(d)|(AW3_AH3(s)&AW3_(0x8000u)));} - AH4 ACpySgnH4(AH4 d,AH4 s){return AH4_AW4(AW4_AH4(d)|(AW4_AH4(s)&AW4_(0x8000u)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ASignedH1(AH1 m){return ASatH1(m*AH1_(A_INFN_H));} - AH2 ASignedH2(AH2 m){return ASatH2(m*AH2_(A_INFN_H));} - AH3 ASignedH3(AH3 m){return ASatH3(m*AH3_(A_INFN_H));} - AH4 ASignedH4(AH4 m){return ASatH4(m*AH4_(A_INFN_H));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AGtZeroH1(AH1 m){return ASatH1(m*AH1_(A_INFP_H));} - AH2 AGtZeroH2(AH2 m){return ASatH2(m*AH2_(A_INFP_H));} - AH3 AGtZeroH3(AH3 m){return ASatH3(m*AH3_(A_INFP_H));} - AH4 AGtZeroH4(AH4 m){return ASatH4(m*AH4_(A_INFP_H));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// [FIS] FLOAT INTEGER SORTABLE -//------------------------------------------------------------------------------------------------------------------------------ -// Float to integer sortable. -// - If sign bit=0, flip the sign bit (positives). -// - If sign bit=1, flip all bits (negatives). -// Integer sortable to float. -// - If sign bit=1, flip the sign bit (positives). -// - If sign bit=0, flip all bits (negatives). -// Has nice side effects. -// - Larger integers are more positive values. -// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). -// Burns 3 ops for conversion {shift,or,xor}. -//============================================================================================================================== - AU1 AFisToU1(AU1 x){return x^(( AShrSU1(x,AU1_(31)))|AU1_(0x80000000));} - AU1 AFisFromU1(AU1 x){return x^((~AShrSU1(x,AU1_(31)))|AU1_(0x80000000));} -//------------------------------------------------------------------------------------------------------------------------------ - // Just adjust high 16-bit value (useful when upper part of 32-bit word is a 16-bit float value). - AU1 AFisToHiU1(AU1 x){return x^(( AShrSU1(x,AU1_(15)))|AU1_(0x80000000));} - AU1 AFisFromHiU1(AU1 x){return x^((~AShrSU1(x,AU1_(15)))|AU1_(0x80000000));} -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_HALF - AW1 AFisToW1(AW1 x){return x^(( AShrSW1(x,AW1_(15)))|AW1_(0x8000));} - AW1 AFisFromW1(AW1 x){return x^((~AShrSW1(x,AW1_(15)))|AW1_(0x8000));} -//------------------------------------------------------------------------------------------------------------------------------ - AW2 AFisToW2(AW2 x){return x^(( AShrSW2(x,AW2_(15)))|AW2_(0x8000));} - AW2 AFisFromW2(AW2 x){return x^((~AShrSW2(x,AW2_(15)))|AW2_(0x8000));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// [PERM] V_PERM_B32 -//------------------------------------------------------------------------------------------------------------------------------ -// Support for V_PERM_B32 started in the 3rd generation of GCN. -//------------------------------------------------------------------------------------------------------------------------------ -// yyyyxxxx - The 'i' input. -// 76543210 -// ======== -// HGFEDCBA - Naming on permutation. -//------------------------------------------------------------------------------------------------------------------------------ -// TODO -// ==== -// - Make sure compiler optimizes this. -//============================================================================================================================== - #ifdef A_HALF - AU1 APerm0E0A(AU2 i){return((i.x )&0xffu)|((i.y<<16)&0xff0000u);} - AU1 APerm0F0B(AU2 i){return((i.x>> 8)&0xffu)|((i.y<< 8)&0xff0000u);} - AU1 APerm0G0C(AU2 i){return((i.x>>16)&0xffu)|((i.y )&0xff0000u);} - AU1 APerm0H0D(AU2 i){return((i.x>>24)&0xffu)|((i.y>> 8)&0xff0000u);} -//------------------------------------------------------------------------------------------------------------------------------ - AU1 APermHGFA(AU2 i){return((i.x )&0x000000ffu)|(i.y&0xffffff00u);} - AU1 APermHGFC(AU2 i){return((i.x>>16)&0x000000ffu)|(i.y&0xffffff00u);} - AU1 APermHGAE(AU2 i){return((i.x<< 8)&0x0000ff00u)|(i.y&0xffff00ffu);} - AU1 APermHGCE(AU2 i){return((i.x>> 8)&0x0000ff00u)|(i.y&0xffff00ffu);} - AU1 APermHAFE(AU2 i){return((i.x<<16)&0x00ff0000u)|(i.y&0xff00ffffu);} - AU1 APermHCFE(AU2 i){return((i.x )&0x00ff0000u)|(i.y&0xff00ffffu);} - AU1 APermAGFE(AU2 i){return((i.x<<24)&0xff000000u)|(i.y&0x00ffffffu);} - AU1 APermCGFE(AU2 i){return((i.x<< 8)&0xff000000u)|(i.y&0x00ffffffu);} -//------------------------------------------------------------------------------------------------------------------------------ - AU1 APermGCEA(AU2 i){return((i.x)&0x00ff00ffu)|((i.y<<8)&0xff00ff00u);} - AU1 APermGECA(AU2 i){return(((i.x)&0xffu)|((i.x>>8)&0xff00u)|((i.y<<16)&0xff0000u)|((i.y<<8)&0xff000000u));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// [BUC] BYTE UNSIGNED CONVERSION -//------------------------------------------------------------------------------------------------------------------------------ -// Designed to use the optimal conversion, enables the scaling to possibly be factored into other computation. -// Works on a range of {0 to A_BUC_<32,16>}, for <32-bit, and 16-bit> respectively. -//------------------------------------------------------------------------------------------------------------------------------ -// OPCODE NOTES -// ============ -// GCN does not do UNORM or SNORM for bytes in opcodes. -// - V_CVT_F32_UBYTE{0,1,2,3} - Unsigned byte to float. -// - V_CVT_PKACC_U8_F32 - Float to unsigned byte (does bit-field insert into 32-bit integer). -// V_PERM_B32 does byte packing with ability to zero fill bytes as well. -// - Can pull out byte values from two sources, and zero fill upper 8-bits of packed hi and lo. -//------------------------------------------------------------------------------------------------------------------------------ -// BYTE : FLOAT - ABuc{0,1,2,3}{To,From}U1() - Designed for V_CVT_F32_UBYTE* and V_CVT_PKACCUM_U8_F32 ops. -// ==== ===== -// 0 : 0 -// 1 : 1 -// ... -// 255 : 255 -// : 256 (just outside the encoding range) -//------------------------------------------------------------------------------------------------------------------------------ -// BYTE : FLOAT - ABuc{0,1,2,3}{To,From}U2() - Designed for 16-bit denormal tricks and V_PERM_B32. -// ==== ===== -// 0 : 0 -// 1 : 1/512 -// 2 : 1/256 -// ... -// 64 : 1/8 -// 128 : 1/4 -// 255 : 255/512 -// : 1/2 (just outside the encoding range) -//------------------------------------------------------------------------------------------------------------------------------ -// OPTIMAL IMPLEMENTATIONS ON AMD ARCHITECTURES -// ============================================ -// r=ABuc0FromU1(i) -// V_CVT_F32_UBYTE0 r,i -// -------------------------------------------- -// r=ABuc0ToU1(d,i) -// V_CVT_PKACCUM_U8_F32 r,i,0,d -// -------------------------------------------- -// d=ABuc0FromU2(i) -// Where 'k0' is an SGPR with 0x0E0A -// Where 'k1' is an SGPR with {32768.0} packed into the lower 16-bits -// V_PERM_B32 d,i.x,i.y,k0 -// V_PK_FMA_F16 d,d,k1.x,0 -// -------------------------------------------- -// r=ABuc0ToU2(d,i) -// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits -// Where 'k1' is an SGPR with 0x???? -// Where 'k2' is an SGPR with 0x???? -// V_PK_FMA_F16 i,i,k0.x,0 -// V_PERM_B32 r.x,i,i,k1 -// V_PERM_B32 r.y,i,i,k2 -//============================================================================================================================== - // Peak range for 32-bit and 16-bit operations. - #define A_BUC_32 (255.0) - #define A_BUC_16 (255.0/512.0) -//============================================================================================================================== - #if 1 - // Designed to be one V_CVT_PKACCUM_U8_F32. - // The extra min is required to pattern match to V_CVT_PKACCUM_U8_F32. - AU1 ABuc0ToU1(AU1 d,AF1 i){return (d&0xffffff00u)|((min(AU1(i),255u) )&(0x000000ffu));} - AU1 ABuc1ToU1(AU1 d,AF1 i){return (d&0xffff00ffu)|((min(AU1(i),255u)<< 8)&(0x0000ff00u));} - AU1 ABuc2ToU1(AU1 d,AF1 i){return (d&0xff00ffffu)|((min(AU1(i),255u)<<16)&(0x00ff0000u));} - AU1 ABuc3ToU1(AU1 d,AF1 i){return (d&0x00ffffffu)|((min(AU1(i),255u)<<24)&(0xff000000u));} -//------------------------------------------------------------------------------------------------------------------------------ - // Designed to be one V_CVT_F32_UBYTE*. - AF1 ABuc0FromU1(AU1 i){return AF1((i )&255u);} - AF1 ABuc1FromU1(AU1 i){return AF1((i>> 8)&255u);} - AF1 ABuc2FromU1(AU1 i){return AF1((i>>16)&255u);} - AF1 ABuc3FromU1(AU1 i){return AF1((i>>24)&255u);} - #endif -//============================================================================================================================== - #ifdef A_HALF - // Takes {x0,x1} and {y0,y1} and builds {{x0,y0},{x1,y1}}. - AW2 ABuc01ToW2(AH2 x,AH2 y){x*=AH2_(1.0/32768.0);y*=AH2_(1.0/32768.0); - return AW2_AU1(APermGCEA(AU2(AU1_AW2(AW2_AH2(x)),AU1_AW2(AW2_AH2(y)))));} -//------------------------------------------------------------------------------------------------------------------------------ - // Designed for 3 ops to do SOA to AOS and conversion. - AU2 ABuc0ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0))); - return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));} - AU2 ABuc1ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0))); - return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));} - AU2 ABuc2ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0))); - return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));} - AU2 ABuc3ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0))); - return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));} -//------------------------------------------------------------------------------------------------------------------------------ - // Designed for 2 ops to do both AOS to SOA, and conversion. - AH2 ABuc0FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)))*AH2_(32768.0);} - AH2 ABuc1FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)))*AH2_(32768.0);} - AH2 ABuc2FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)))*AH2_(32768.0);} - AH2 ABuc3FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)))*AH2_(32768.0);} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// [BSC] BYTE SIGNED CONVERSION -//------------------------------------------------------------------------------------------------------------------------------ -// Similar to [BUC]. -// Works on a range of {-/+ A_BSC_<32,16>}, for <32-bit, and 16-bit> respectively. -//------------------------------------------------------------------------------------------------------------------------------ -// ENCODING (without zero-based encoding) -// ======== -// 0 = unused (can be used to mean something else) -// 1 = lowest value -// 128 = exact zero center (zero based encoding -// 255 = highest value -//------------------------------------------------------------------------------------------------------------------------------ -// Zero-based [Zb] flips the MSB bit of the byte (making 128 "exact zero" actually zero). -// This is useful if there is a desire for cleared values to decode as zero. -//------------------------------------------------------------------------------------------------------------------------------ -// BYTE : FLOAT - ABsc{0,1,2,3}{To,From}U2() - Designed for 16-bit denormal tricks and V_PERM_B32. -// ==== ===== -// 0 : -127/512 (unused) -// 1 : -126/512 -// 2 : -125/512 -// ... -// 128 : 0 -// ... -// 255 : 127/512 -// : 1/4 (just outside the encoding range) -//============================================================================================================================== - // Peak range for 32-bit and 16-bit operations. - #define A_BSC_32 (127.0) - #define A_BSC_16 (127.0/512.0) -//============================================================================================================================== - #if 1 - AU1 ABsc0ToU1(AU1 d,AF1 i){return (d&0xffffff00u)|((min(AU1(i+128.0),255u) )&(0x000000ffu));} - AU1 ABsc1ToU1(AU1 d,AF1 i){return (d&0xffff00ffu)|((min(AU1(i+128.0),255u)<< 8)&(0x0000ff00u));} - AU1 ABsc2ToU1(AU1 d,AF1 i){return (d&0xff00ffffu)|((min(AU1(i+128.0),255u)<<16)&(0x00ff0000u));} - AU1 ABsc3ToU1(AU1 d,AF1 i){return (d&0x00ffffffu)|((min(AU1(i+128.0),255u)<<24)&(0xff000000u));} -//------------------------------------------------------------------------------------------------------------------------------ - AU1 ABsc0ToZbU1(AU1 d,AF1 i){return ((d&0xffffff00u)|((min(AU1(trunc(i)+128.0),255u) )&(0x000000ffu)))^0x00000080u;} - AU1 ABsc1ToZbU1(AU1 d,AF1 i){return ((d&0xffff00ffu)|((min(AU1(trunc(i)+128.0),255u)<< 8)&(0x0000ff00u)))^0x00008000u;} - AU1 ABsc2ToZbU1(AU1 d,AF1 i){return ((d&0xff00ffffu)|((min(AU1(trunc(i)+128.0),255u)<<16)&(0x00ff0000u)))^0x00800000u;} - AU1 ABsc3ToZbU1(AU1 d,AF1 i){return ((d&0x00ffffffu)|((min(AU1(trunc(i)+128.0),255u)<<24)&(0xff000000u)))^0x80000000u;} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 ABsc0FromU1(AU1 i){return AF1((i )&255u)-128.0;} - AF1 ABsc1FromU1(AU1 i){return AF1((i>> 8)&255u)-128.0;} - AF1 ABsc2FromU1(AU1 i){return AF1((i>>16)&255u)-128.0;} - AF1 ABsc3FromU1(AU1 i){return AF1((i>>24)&255u)-128.0;} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 ABsc0FromZbU1(AU1 i){return AF1(((i )&255u)^0x80u)-128.0;} - AF1 ABsc1FromZbU1(AU1 i){return AF1(((i>> 8)&255u)^0x80u)-128.0;} - AF1 ABsc2FromZbU1(AU1 i){return AF1(((i>>16)&255u)^0x80u)-128.0;} - AF1 ABsc3FromZbU1(AU1 i){return AF1(((i>>24)&255u)^0x80u)-128.0;} - #endif -//============================================================================================================================== - #ifdef A_HALF - // Takes {x0,x1} and {y0,y1} and builds {{x0,y0},{x1,y1}}. - AW2 ABsc01ToW2(AH2 x,AH2 y){x=x*AH2_(1.0/32768.0)+AH2_(0.25/32768.0);y=y*AH2_(1.0/32768.0)+AH2_(0.25/32768.0); - return AW2_AU1(APermGCEA(AU2(AU1_AW2(AW2_AH2(x)),AU1_AW2(AW2_AH2(y)))));} -//------------------------------------------------------------------------------------------------------------------------------ - AU2 ABsc0ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0))); - return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));} - AU2 ABsc1ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0))); - return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));} - AU2 ABsc2ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0))); - return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));} - AU2 ABsc3ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0))); - return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));} -//------------------------------------------------------------------------------------------------------------------------------ - AU2 ABsc0ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u; - return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));} - AU2 ABsc1ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u; - return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));} - AU2 ABsc2ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u; - return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));} - AU2 ABsc3ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u; - return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH2 ABsc0FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)))*AH2_(32768.0)-AH2_(0.25);} - AH2 ABsc1FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)))*AH2_(32768.0)-AH2_(0.25);} - AH2 ABsc2FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)))*AH2_(32768.0)-AH2_(0.25);} - AH2 ABsc3FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)))*AH2_(32768.0)-AH2_(0.25);} -//------------------------------------------------------------------------------------------------------------------------------ - AH2 ABsc0FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);} - AH2 ABsc1FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);} - AH2 ABsc2FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);} - AH2 ABsc3FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// HALF APPROXIMATIONS -//------------------------------------------------------------------------------------------------------------------------------ -// These support only positive inputs. -// Did not see value yet in specialization for range. -// Using quick testing, ended up mostly getting the same "best" approximation for various ranges. -// With hardware that can co-execute transcendentals, the value in approximations could be less than expected. -// However from a latency perspective, if execution of a transcendental is 4 clk, with no packed support, -> 8 clk total. -// And co-execution would require a compiler interleaving a lot of independent work for packed usage. -//------------------------------------------------------------------------------------------------------------------------------ -// The one Newton Raphson iteration form of rsq() was skipped (requires 6 ops total). -// Same with sqrt(), as this could be x*rsq() (7 ops). -//============================================================================================================================== - #ifdef A_HALF - // Minimize squared error across full positive range, 2 ops. - // The 0x1de2 based approximation maps {0 to 1} input maps to < 1 output. - AH1 APrxLoSqrtH1(AH1 a){return AH1_AW1((AW1_AH1(a)>>AW1_(1))+AW1_(0x1de2));} - AH2 APrxLoSqrtH2(AH2 a){return AH2_AW2((AW2_AH2(a)>>AW2_(1))+AW2_(0x1de2));} - AH3 APrxLoSqrtH3(AH3 a){return AH3_AW3((AW3_AH3(a)>>AW3_(1))+AW3_(0x1de2));} - AH4 APrxLoSqrtH4(AH4 a){return AH4_AW4((AW4_AH4(a)>>AW4_(1))+AW4_(0x1de2));} -//------------------------------------------------------------------------------------------------------------------------------ - // Lower precision estimation, 1 op. - // Minimize squared error across {smallest normal to 16384.0}. - AH1 APrxLoRcpH1(AH1 a){return AH1_AW1(AW1_(0x7784)-AW1_AH1(a));} - AH2 APrxLoRcpH2(AH2 a){return AH2_AW2(AW2_(0x7784)-AW2_AH2(a));} - AH3 APrxLoRcpH3(AH3 a){return AH3_AW3(AW3_(0x7784)-AW3_AH3(a));} - AH4 APrxLoRcpH4(AH4 a){return AH4_AW4(AW4_(0x7784)-AW4_AH4(a));} -//------------------------------------------------------------------------------------------------------------------------------ - // Medium precision estimation, one Newton Raphson iteration, 3 ops. - AH1 APrxMedRcpH1(AH1 a){AH1 b=AH1_AW1(AW1_(0x778d)-AW1_AH1(a));return b*(-b*a+AH1_(2.0));} - AH2 APrxMedRcpH2(AH2 a){AH2 b=AH2_AW2(AW2_(0x778d)-AW2_AH2(a));return b*(-b*a+AH2_(2.0));} - AH3 APrxMedRcpH3(AH3 a){AH3 b=AH3_AW3(AW3_(0x778d)-AW3_AH3(a));return b*(-b*a+AH3_(2.0));} - AH4 APrxMedRcpH4(AH4 a){AH4 b=AH4_AW4(AW4_(0x778d)-AW4_AH4(a));return b*(-b*a+AH4_(2.0));} -//------------------------------------------------------------------------------------------------------------------------------ - // Minimize squared error across {smallest normal to 16384.0}, 2 ops. - AH1 APrxLoRsqH1(AH1 a){return AH1_AW1(AW1_(0x59a3)-(AW1_AH1(a)>>AW1_(1)));} - AH2 APrxLoRsqH2(AH2 a){return AH2_AW2(AW2_(0x59a3)-(AW2_AH2(a)>>AW2_(1)));} - AH3 APrxLoRsqH3(AH3 a){return AH3_AW3(AW3_(0x59a3)-(AW3_AH3(a)>>AW3_(1)));} - AH4 APrxLoRsqH4(AH4 a){return AH4_AW4(AW4_(0x59a3)-(AW4_AH4(a)>>AW4_(1)));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// FLOAT APPROXIMATIONS -//------------------------------------------------------------------------------------------------------------------------------ -// Michal Drobot has an excellent presentation on these: "Low Level Optimizations For GCN", -// - Idea dates back to SGI, then to Quake 3, etc. -// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -// - sqrt(x)=rsqrt(x)*x -// - rcp(x)=rsqrt(x)*rsqrt(x) for positive x -// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -//------------------------------------------------------------------------------------------------------------------------------ -// These below are from perhaps less complete searching for optimal. -// Used FP16 normal range for testing with +4096 32-bit step size for sampling error. -// So these match up well with the half approximations. -//============================================================================================================================== - AF1 APrxLoSqrtF1(AF1 a){return AF1_AU1((AU1_AF1(a)>>AU1_(1))+AU1_(0x1fbc4639));} - AF1 APrxLoRcpF1(AF1 a){return AF1_AU1(AU1_(0x7ef07ebb)-AU1_AF1(a));} - AF1 APrxMedRcpF1(AF1 a){AF1 b=AF1_AU1(AU1_(0x7ef19fff)-AU1_AF1(a));return b*(-b*a+AF1_(2.0));} - AF1 APrxLoRsqF1(AF1 a){return AF1_AU1(AU1_(0x5f347d74)-(AU1_AF1(a)>>AU1_(1)));} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 APrxLoSqrtF2(AF2 a){return AF2_AU2((AU2_AF2(a)>>AU2_(1))+AU2_(0x1fbc4639));} - AF2 APrxLoRcpF2(AF2 a){return AF2_AU2(AU2_(0x7ef07ebb)-AU2_AF2(a));} - AF2 APrxMedRcpF2(AF2 a){AF2 b=AF2_AU2(AU2_(0x7ef19fff)-AU2_AF2(a));return b*(-b*a+AF2_(2.0));} - AF2 APrxLoRsqF2(AF2 a){return AF2_AU2(AU2_(0x5f347d74)-(AU2_AF2(a)>>AU2_(1)));} -//------------------------------------------------------------------------------------------------------------------------------ - AF3 APrxLoSqrtF3(AF3 a){return AF3_AU3((AU3_AF3(a)>>AU3_(1))+AU3_(0x1fbc4639));} - AF3 APrxLoRcpF3(AF3 a){return AF3_AU3(AU3_(0x7ef07ebb)-AU3_AF3(a));} - AF3 APrxMedRcpF3(AF3 a){AF3 b=AF3_AU3(AU3_(0x7ef19fff)-AU3_AF3(a));return b*(-b*a+AF3_(2.0));} - AF3 APrxLoRsqF3(AF3 a){return AF3_AU3(AU3_(0x5f347d74)-(AU3_AF3(a)>>AU3_(1)));} -//------------------------------------------------------------------------------------------------------------------------------ - AF4 APrxLoSqrtF4(AF4 a){return AF4_AU4((AU4_AF4(a)>>AU4_(1))+AU4_(0x1fbc4639));} - AF4 APrxLoRcpF4(AF4 a){return AF4_AU4(AU4_(0x7ef07ebb)-AU4_AF4(a));} - AF4 APrxMedRcpF4(AF4 a){AF4 b=AF4_AU4(AU4_(0x7ef19fff)-AU4_AF4(a));return b*(-b*a+AF4_(2.0));} - AF4 APrxLoRsqF4(AF4 a){return AF4_AU4(AU4_(0x5f347d74)-(AU4_AF4(a)>>AU4_(1)));} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// PQ APPROXIMATIONS -//------------------------------------------------------------------------------------------------------------------------------ -// PQ is very close to x^(1/8). The functions below Use the fast float approximation method to do -// PQ<~>Gamma2 (4th power and fast 4th root) and PQ<~>Linear (8th power and fast 8th root). Maximum error is ~0.2%. -//============================================================================================================================== -// Helpers - AF1 Quart(AF1 a) { a = a * a; return a * a;} - AF1 Oct(AF1 a) { a = a * a; a = a * a; return a * a; } - AF2 Quart(AF2 a) { a = a * a; return a * a; } - AF2 Oct(AF2 a) { a = a * a; a = a * a; return a * a; } - AF3 Quart(AF3 a) { a = a * a; return a * a; } - AF3 Oct(AF3 a) { a = a * a; a = a * a; return a * a; } - AF4 Quart(AF4 a) { a = a * a; return a * a; } - AF4 Oct(AF4 a) { a = a * a; a = a * a; return a * a; } - //------------------------------------------------------------------------------------------------------------------------------ - AF1 APrxPQToGamma2(AF1 a) { return Quart(a); } - AF1 APrxPQToLinear(AF1 a) { return Oct(a); } - AF1 APrxLoGamma2ToPQ(AF1 a) { return AF1_AU1((AU1_AF1(a) >> AU1_(2)) + AU1_(0x2F9A4E46)); } - AF1 APrxMedGamma2ToPQ(AF1 a) { AF1 b = AF1_AU1((AU1_AF1(a) >> AU1_(2)) + AU1_(0x2F9A4E46)); AF1 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); } - AF1 APrxHighGamma2ToPQ(AF1 a) { return sqrt(sqrt(a)); } - AF1 APrxLoLinearToPQ(AF1 a) { return AF1_AU1((AU1_AF1(a) >> AU1_(3)) + AU1_(0x378D8723)); } - AF1 APrxMedLinearToPQ(AF1 a) { AF1 b = AF1_AU1((AU1_AF1(a) >> AU1_(3)) + AU1_(0x378D8723)); AF1 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); } - AF1 APrxHighLinearToPQ(AF1 a) { return sqrt(sqrt(sqrt(a))); } - //------------------------------------------------------------------------------------------------------------------------------ - AF2 APrxPQToGamma2(AF2 a) { return Quart(a); } - AF2 APrxPQToLinear(AF2 a) { return Oct(a); } - AF2 APrxLoGamma2ToPQ(AF2 a) { return AF2_AU2((AU2_AF2(a) >> AU2_(2)) + AU2_(0x2F9A4E46)); } - AF2 APrxMedGamma2ToPQ(AF2 a) { AF2 b = AF2_AU2((AU2_AF2(a) >> AU2_(2)) + AU2_(0x2F9A4E46)); AF2 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); } - AF2 APrxHighGamma2ToPQ(AF2 a) { return sqrt(sqrt(a)); } - AF2 APrxLoLinearToPQ(AF2 a) { return AF2_AU2((AU2_AF2(a) >> AU2_(3)) + AU2_(0x378D8723)); } - AF2 APrxMedLinearToPQ(AF2 a) { AF2 b = AF2_AU2((AU2_AF2(a) >> AU2_(3)) + AU2_(0x378D8723)); AF2 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); } - AF2 APrxHighLinearToPQ(AF2 a) { return sqrt(sqrt(sqrt(a))); } - //------------------------------------------------------------------------------------------------------------------------------ - AF3 APrxPQToGamma2(AF3 a) { return Quart(a); } - AF3 APrxPQToLinear(AF3 a) { return Oct(a); } - AF3 APrxLoGamma2ToPQ(AF3 a) { return AF3_AU3((AU3_AF3(a) >> AU3_(2)) + AU3_(0x2F9A4E46)); } - AF3 APrxMedGamma2ToPQ(AF3 a) { AF3 b = AF3_AU3((AU3_AF3(a) >> AU3_(2)) + AU3_(0x2F9A4E46)); AF3 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); } - AF3 APrxHighGamma2ToPQ(AF3 a) { return sqrt(sqrt(a)); } - AF3 APrxLoLinearToPQ(AF3 a) { return AF3_AU3((AU3_AF3(a) >> AU3_(3)) + AU3_(0x378D8723)); } - AF3 APrxMedLinearToPQ(AF3 a) { AF3 b = AF3_AU3((AU3_AF3(a) >> AU3_(3)) + AU3_(0x378D8723)); AF3 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); } - AF3 APrxHighLinearToPQ(AF3 a) { return sqrt(sqrt(sqrt(a))); } - //------------------------------------------------------------------------------------------------------------------------------ - AF4 APrxPQToGamma2(AF4 a) { return Quart(a); } - AF4 APrxPQToLinear(AF4 a) { return Oct(a); } - AF4 APrxLoGamma2ToPQ(AF4 a) { return AF4_AU4((AU4_AF4(a) >> AU4_(2)) + AU4_(0x2F9A4E46)); } - AF4 APrxMedGamma2ToPQ(AF4 a) { AF4 b = AF4_AU4((AU4_AF4(a) >> AU4_(2)) + AU4_(0x2F9A4E46)); AF4 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); } - AF4 APrxHighGamma2ToPQ(AF4 a) { return sqrt(sqrt(a)); } - AF4 APrxLoLinearToPQ(AF4 a) { return AF4_AU4((AU4_AF4(a) >> AU4_(3)) + AU4_(0x378D8723)); } - AF4 APrxMedLinearToPQ(AF4 a) { AF4 b = AF4_AU4((AU4_AF4(a) >> AU4_(3)) + AU4_(0x378D8723)); AF4 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); } - AF4 APrxHighLinearToPQ(AF4 a) { return sqrt(sqrt(sqrt(a))); } -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// PARABOLIC SIN & COS -//------------------------------------------------------------------------------------------------------------------------------ -// Approximate answers to transcendental questions. -//------------------------------------------------------------------------------------------------------------------------------ -//============================================================================================================================== - #if 1 - // Valid input range is {-1 to 1} representing {0 to 2 pi}. - // Output range is {-1/4 to 1/4} representing {-1 to 1}. - AF1 APSinF1(AF1 x){return x*abs(x)-x;} // MAD. - AF2 APSinF2(AF2 x){return x*abs(x)-x;} - AF1 APCosF1(AF1 x){x=AFractF1(x*AF1_(0.5)+AF1_(0.75));x=x*AF1_(2.0)-AF1_(1.0);return APSinF1(x);} // 3x MAD, FRACT - AF2 APCosF2(AF2 x){x=AFractF2(x*AF2_(0.5)+AF2_(0.75));x=x*AF2_(2.0)-AF2_(1.0);return APSinF2(x);} - AF2 APSinCosF1(AF1 x){AF1 y=AFractF1(x*AF1_(0.5)+AF1_(0.75));y=y*AF1_(2.0)-AF1_(1.0);return APSinF2(AF2(x,y));} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_HALF - // For a packed {sin,cos} pair, - // - Native takes 16 clocks and 4 issue slots (no packed transcendentals). - // - Parabolic takes 8 clocks and 8 issue slots (only fract is non-packed). - AH1 APSinH1(AH1 x){return x*abs(x)-x;} - AH2 APSinH2(AH2 x){return x*abs(x)-x;} // AND,FMA - AH1 APCosH1(AH1 x){x=AFractH1(x*AH1_(0.5)+AH1_(0.75));x=x*AH1_(2.0)-AH1_(1.0);return APSinH1(x);} - AH2 APCosH2(AH2 x){x=AFractH2(x*AH2_(0.5)+AH2_(0.75));x=x*AH2_(2.0)-AH2_(1.0);return APSinH2(x);} // 3x FMA, 2xFRACT, AND - AH2 APSinCosH1(AH1 x){AH1 y=AFractH1(x*AH1_(0.5)+AH1_(0.75));y=y*AH1_(2.0)-AH1_(1.0);return APSinH2(AH2(x,y));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// [ZOL] ZERO ONE LOGIC -//------------------------------------------------------------------------------------------------------------------------------ -// Conditional free logic designed for easy 16-bit packing, and backwards porting to 32-bit. -//------------------------------------------------------------------------------------------------------------------------------ -// 0 := false -// 1 := true -//------------------------------------------------------------------------------------------------------------------------------ -// AndNot(x,y) -> !(x&y) .... One op. -// AndOr(x,y,z) -> (x&y)|z ... One op. -// GtZero(x) -> x>0.0 ..... One op. -// Sel(x,y,z) -> x?y:z ..... Two ops, has no precision loss. -// Signed(x) -> x<0.0 ..... One op. -// ZeroPass(x,y) -> x?0:y ..... Two ops, 'y' is a pass through safe for aliasing as integer. -//------------------------------------------------------------------------------------------------------------------------------ -// OPTIMIZATION NOTES -// ================== -// - On Vega to use 2 constants in a packed op, pass in as one AW2 or one AH2 'k.xy' and use as 'k.xx' and 'k.yy'. -// For example 'a.xy*k.xx+k.yy'. -//============================================================================================================================== - #if 1 - AU1 AZolAndU1(AU1 x,AU1 y){return min(x,y);} - AU2 AZolAndU2(AU2 x,AU2 y){return min(x,y);} - AU3 AZolAndU3(AU3 x,AU3 y){return min(x,y);} - AU4 AZolAndU4(AU4 x,AU4 y){return min(x,y);} -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AZolNotU1(AU1 x){return x^AU1_(1);} - AU2 AZolNotU2(AU2 x){return x^AU2_(1);} - AU3 AZolNotU3(AU3 x){return x^AU3_(1);} - AU4 AZolNotU4(AU4 x){return x^AU4_(1);} -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AZolOrU1(AU1 x,AU1 y){return max(x,y);} - AU2 AZolOrU2(AU2 x,AU2 y){return max(x,y);} - AU3 AZolOrU3(AU3 x,AU3 y){return max(x,y);} - AU4 AZolOrU4(AU4 x,AU4 y){return max(x,y);} -//============================================================================================================================== - AU1 AZolF1ToU1(AF1 x){return AU1(x);} - AU2 AZolF2ToU2(AF2 x){return AU2(x);} - AU3 AZolF3ToU3(AF3 x){return AU3(x);} - AU4 AZolF4ToU4(AF4 x){return AU4(x);} -//------------------------------------------------------------------------------------------------------------------------------ - // 2 ops, denormals don't work in 32-bit on PC (and if they are enabled, OMOD is disabled). - AU1 AZolNotF1ToU1(AF1 x){return AU1(AF1_(1.0)-x);} - AU2 AZolNotF2ToU2(AF2 x){return AU2(AF2_(1.0)-x);} - AU3 AZolNotF3ToU3(AF3 x){return AU3(AF3_(1.0)-x);} - AU4 AZolNotF4ToU4(AF4 x){return AU4(AF4_(1.0)-x);} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AZolU1ToF1(AU1 x){return AF1(x);} - AF2 AZolU2ToF2(AU2 x){return AF2(x);} - AF3 AZolU3ToF3(AU3 x){return AF3(x);} - AF4 AZolU4ToF4(AU4 x){return AF4(x);} -//============================================================================================================================== - AF1 AZolAndF1(AF1 x,AF1 y){return min(x,y);} - AF2 AZolAndF2(AF2 x,AF2 y){return min(x,y);} - AF3 AZolAndF3(AF3 x,AF3 y){return min(x,y);} - AF4 AZolAndF4(AF4 x,AF4 y){return min(x,y);} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 ASolAndNotF1(AF1 x,AF1 y){return (-x)*y+AF1_(1.0);} - AF2 ASolAndNotF2(AF2 x,AF2 y){return (-x)*y+AF2_(1.0);} - AF3 ASolAndNotF3(AF3 x,AF3 y){return (-x)*y+AF3_(1.0);} - AF4 ASolAndNotF4(AF4 x,AF4 y){return (-x)*y+AF4_(1.0);} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AZolAndOrF1(AF1 x,AF1 y,AF1 z){return ASatF1(x*y+z);} - AF2 AZolAndOrF2(AF2 x,AF2 y,AF2 z){return ASatF2(x*y+z);} - AF3 AZolAndOrF3(AF3 x,AF3 y,AF3 z){return ASatF3(x*y+z);} - AF4 AZolAndOrF4(AF4 x,AF4 y,AF4 z){return ASatF4(x*y+z);} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AZolGtZeroF1(AF1 x){return ASatF1(x*AF1_(A_INFP_F));} - AF2 AZolGtZeroF2(AF2 x){return ASatF2(x*AF2_(A_INFP_F));} - AF3 AZolGtZeroF3(AF3 x){return ASatF3(x*AF3_(A_INFP_F));} - AF4 AZolGtZeroF4(AF4 x){return ASatF4(x*AF4_(A_INFP_F));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AZolNotF1(AF1 x){return AF1_(1.0)-x;} - AF2 AZolNotF2(AF2 x){return AF2_(1.0)-x;} - AF3 AZolNotF3(AF3 x){return AF3_(1.0)-x;} - AF4 AZolNotF4(AF4 x){return AF4_(1.0)-x;} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AZolOrF1(AF1 x,AF1 y){return max(x,y);} - AF2 AZolOrF2(AF2 x,AF2 y){return max(x,y);} - AF3 AZolOrF3(AF3 x,AF3 y){return max(x,y);} - AF4 AZolOrF4(AF4 x,AF4 y){return max(x,y);} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AZolSelF1(AF1 x,AF1 y,AF1 z){AF1 r=(-x)*z+z;return x*y+r;} - AF2 AZolSelF2(AF2 x,AF2 y,AF2 z){AF2 r=(-x)*z+z;return x*y+r;} - AF3 AZolSelF3(AF3 x,AF3 y,AF3 z){AF3 r=(-x)*z+z;return x*y+r;} - AF4 AZolSelF4(AF4 x,AF4 y,AF4 z){AF4 r=(-x)*z+z;return x*y+r;} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AZolSignedF1(AF1 x){return ASatF1(x*AF1_(A_INFN_F));} - AF2 AZolSignedF2(AF2 x){return ASatF2(x*AF2_(A_INFN_F));} - AF3 AZolSignedF3(AF3 x){return ASatF3(x*AF3_(A_INFN_F));} - AF4 AZolSignedF4(AF4 x){return ASatF4(x*AF4_(A_INFN_F));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AZolZeroPassF1(AF1 x,AF1 y){return AF1_AU1((AU1_AF1(x)!=AU1_(0))?AU1_(0):AU1_AF1(y));} - AF2 AZolZeroPassF2(AF2 x,AF2 y){return AF2_AU2((AU2_AF2(x)!=AU2_(0))?AU2_(0):AU2_AF2(y));} - AF3 AZolZeroPassF3(AF3 x,AF3 y){return AF3_AU3((AU3_AF3(x)!=AU3_(0))?AU3_(0):AU3_AF3(y));} - AF4 AZolZeroPassF4(AF4 x,AF4 y){return AF4_AU4((AU4_AF4(x)!=AU4_(0))?AU4_(0):AU4_AF4(y));} - #endif -//============================================================================================================================== - #ifdef A_HALF - AW1 AZolAndW1(AW1 x,AW1 y){return min(x,y);} - AW2 AZolAndW2(AW2 x,AW2 y){return min(x,y);} - AW3 AZolAndW3(AW3 x,AW3 y){return min(x,y);} - AW4 AZolAndW4(AW4 x,AW4 y){return min(x,y);} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AZolNotW1(AW1 x){return x^AW1_(1);} - AW2 AZolNotW2(AW2 x){return x^AW2_(1);} - AW3 AZolNotW3(AW3 x){return x^AW3_(1);} - AW4 AZolNotW4(AW4 x){return x^AW4_(1);} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AZolOrW1(AW1 x,AW1 y){return max(x,y);} - AW2 AZolOrW2(AW2 x,AW2 y){return max(x,y);} - AW3 AZolOrW3(AW3 x,AW3 y){return max(x,y);} - AW4 AZolOrW4(AW4 x,AW4 y){return max(x,y);} -//============================================================================================================================== - // Uses denormal trick. - AW1 AZolH1ToW1(AH1 x){return AW1_AH1(x*AH1_AW1(AW1_(1)));} - AW2 AZolH2ToW2(AH2 x){return AW2_AH2(x*AH2_AW2(AW2_(1)));} - AW3 AZolH3ToW3(AH3 x){return AW3_AH3(x*AH3_AW3(AW3_(1)));} - AW4 AZolH4ToW4(AH4 x){return AW4_AH4(x*AH4_AW4(AW4_(1)));} -//------------------------------------------------------------------------------------------------------------------------------ - // AMD arch lacks a packed conversion opcode. - AH1 AZolW1ToH1(AW1 x){return AH1_AW1(x*AW1_AH1(AH1_(1.0)));} - AH2 AZolW2ToH2(AW2 x){return AH2_AW2(x*AW2_AH2(AH2_(1.0)));} - AH3 AZolW1ToH3(AW3 x){return AH3_AW3(x*AW3_AH3(AH3_(1.0)));} - AH4 AZolW2ToH4(AW4 x){return AH4_AW4(x*AW4_AH4(AH4_(1.0)));} -//============================================================================================================================== - AH1 AZolAndH1(AH1 x,AH1 y){return min(x,y);} - AH2 AZolAndH2(AH2 x,AH2 y){return min(x,y);} - AH3 AZolAndH3(AH3 x,AH3 y){return min(x,y);} - AH4 AZolAndH4(AH4 x,AH4 y){return min(x,y);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ASolAndNotH1(AH1 x,AH1 y){return (-x)*y+AH1_(1.0);} - AH2 ASolAndNotH2(AH2 x,AH2 y){return (-x)*y+AH2_(1.0);} - AH3 ASolAndNotH3(AH3 x,AH3 y){return (-x)*y+AH3_(1.0);} - AH4 ASolAndNotH4(AH4 x,AH4 y){return (-x)*y+AH4_(1.0);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AZolAndOrH1(AH1 x,AH1 y,AH1 z){return ASatH1(x*y+z);} - AH2 AZolAndOrH2(AH2 x,AH2 y,AH2 z){return ASatH2(x*y+z);} - AH3 AZolAndOrH3(AH3 x,AH3 y,AH3 z){return ASatH3(x*y+z);} - AH4 AZolAndOrH4(AH4 x,AH4 y,AH4 z){return ASatH4(x*y+z);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AZolGtZeroH1(AH1 x){return ASatH1(x*AH1_(A_INFP_H));} - AH2 AZolGtZeroH2(AH2 x){return ASatH2(x*AH2_(A_INFP_H));} - AH3 AZolGtZeroH3(AH3 x){return ASatH3(x*AH3_(A_INFP_H));} - AH4 AZolGtZeroH4(AH4 x){return ASatH4(x*AH4_(A_INFP_H));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AZolNotH1(AH1 x){return AH1_(1.0)-x;} - AH2 AZolNotH2(AH2 x){return AH2_(1.0)-x;} - AH3 AZolNotH3(AH3 x){return AH3_(1.0)-x;} - AH4 AZolNotH4(AH4 x){return AH4_(1.0)-x;} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AZolOrH1(AH1 x,AH1 y){return max(x,y);} - AH2 AZolOrH2(AH2 x,AH2 y){return max(x,y);} - AH3 AZolOrH3(AH3 x,AH3 y){return max(x,y);} - AH4 AZolOrH4(AH4 x,AH4 y){return max(x,y);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AZolSelH1(AH1 x,AH1 y,AH1 z){AH1 r=(-x)*z+z;return x*y+r;} - AH2 AZolSelH2(AH2 x,AH2 y,AH2 z){AH2 r=(-x)*z+z;return x*y+r;} - AH3 AZolSelH3(AH3 x,AH3 y,AH3 z){AH3 r=(-x)*z+z;return x*y+r;} - AH4 AZolSelH4(AH4 x,AH4 y,AH4 z){AH4 r=(-x)*z+z;return x*y+r;} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AZolSignedH1(AH1 x){return ASatH1(x*AH1_(A_INFN_H));} - AH2 AZolSignedH2(AH2 x){return ASatH2(x*AH2_(A_INFN_H));} - AH3 AZolSignedH3(AH3 x){return ASatH3(x*AH3_(A_INFN_H));} - AH4 AZolSignedH4(AH4 x){return ASatH4(x*AH4_(A_INFN_H));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// COLOR CONVERSIONS -//------------------------------------------------------------------------------------------------------------------------------ -// These are all linear to/from some other space (where 'linear' has been shortened out of the function name). -// So 'ToGamma' is 'LinearToGamma', and 'FromGamma' is 'LinearFromGamma'. -// These are branch free implementations. -// The AToSrgbF1() function is useful for stores for compute shaders for GPUs without hardware linear->sRGB store conversion. -//------------------------------------------------------------------------------------------------------------------------------ -// TRANSFER FUNCTIONS -// ================== -// 709 ..... Rec709 used for some HDTVs -// Gamma ... Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native -// Pq ...... PQ native for HDR10 -// Srgb .... The sRGB output, typical of PC displays, useful for 10-bit output, or storing to 8-bit UNORM without SRGB type -// Two ..... Gamma 2.0, fastest conversion (useful for intermediate pass approximations) -// Three ... Gamma 3.0, less fast, but good for HDR. -//------------------------------------------------------------------------------------------------------------------------------ -// KEEPING TO SPEC -// =============== -// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. -// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). -// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). -// Also there is a slight step in the transition regions. -// Precision of the coefficients in the spec being the likely cause. -// Main usage case of the sRGB code is to do the linear->sRGB converstion in a compute shader before store. -// This is to work around lack of hardware (typically only ROP does the conversion for free). -// To "correct" the linear segment, would be to introduce error, because hardware decode of sRGB->linear is fixed (and free). -// So this header keeps with the spec. -// For linear->sRGB transforms, the linear segment in some respects reduces error, because rounding in that region is linear. -// Rounding in the curved region in hardware (and fast software code) introduces error due to rounding in non-linear. -//------------------------------------------------------------------------------------------------------------------------------ -// FOR PQ -// ====== -// Both input and output is {0.0-1.0}, and where output 1.0 represents 10000.0 cd/m^2. -// All constants are only specified to FP32 precision. -// External PQ source reference, -// - https://github.com/ampas/aces-dev/blob/master/transforms/ctl/utilities/ACESlib.Utilities_Color.a1.0.1.ctl -//------------------------------------------------------------------------------------------------------------------------------ -// PACKED VERSIONS -// =============== -// These are the A*H2() functions. -// There is no PQ functions as FP16 seemed to not have enough precision for the conversion. -// The remaining functions are "good enough" for 8-bit, and maybe 10-bit if not concerned about a few 1-bit errors. -// Precision is lowest in the 709 conversion, higher in sRGB, higher still in Two and Gamma (when using 2.2 at least). -//------------------------------------------------------------------------------------------------------------------------------ -// NOTES -// ===== -// Could be faster for PQ conversions to be in ALU or a texture lookup depending on usage case. -//============================================================================================================================== - #if 1 - AF1 ATo709F1(AF1 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099); - return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );} - AF2 ATo709F2(AF2 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099); - return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );} - AF3 ATo709F3(AF3 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099); - return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);} -//------------------------------------------------------------------------------------------------------------------------------ - // Note 'rcpX' is '1/x', where the 'x' is what would be used in AFromGamma(). - AF1 AToGammaF1(AF1 c,AF1 rcpX){return pow(c,AF1_(rcpX));} - AF2 AToGammaF2(AF2 c,AF1 rcpX){return pow(c,AF2_(rcpX));} - AF3 AToGammaF3(AF3 c,AF1 rcpX){return pow(c,AF3_(rcpX));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AToPqF1(AF1 x){AF1 p=pow(x,AF1_(0.159302)); - return pow((AF1_(0.835938)+AF1_(18.8516)*p)/(AF1_(1.0)+AF1_(18.6875)*p),AF1_(78.8438));} - AF2 AToPqF1(AF2 x){AF2 p=pow(x,AF2_(0.159302)); - return pow((AF2_(0.835938)+AF2_(18.8516)*p)/(AF2_(1.0)+AF2_(18.6875)*p),AF2_(78.8438));} - AF3 AToPqF1(AF3 x){AF3 p=pow(x,AF3_(0.159302)); - return pow((AF3_(0.835938)+AF3_(18.8516)*p)/(AF3_(1.0)+AF3_(18.6875)*p),AF3_(78.8438));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AToSrgbF1(AF1 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055); - return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );} - AF2 AToSrgbF2(AF2 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055); - return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );} - AF3 AToSrgbF3(AF3 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055); - return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AToTwoF1(AF1 c){return sqrt(c);} - AF2 AToTwoF2(AF2 c){return sqrt(c);} - AF3 AToTwoF3(AF3 c){return sqrt(c);} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AToThreeF1(AF1 c){return pow(c,AF1_(1.0/3.0));} - AF2 AToThreeF2(AF2 c){return pow(c,AF2_(1.0/3.0));} - AF3 AToThreeF3(AF3 c){return pow(c,AF3_(1.0/3.0));} - #endif -//============================================================================================================================== - #if 1 - // Unfortunately median won't work here. - AF1 AFrom709F1(AF1 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099); - return AZolSelF1(AZolSignedF1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));} - AF2 AFrom709F2(AF2 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099); - return AZolSelF2(AZolSignedF2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));} - AF3 AFrom709F3(AF3 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099); - return AZolSelF3(AZolSignedF3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AFromGammaF1(AF1 c,AF1 x){return pow(c,AF1_(x));} - AF2 AFromGammaF2(AF2 c,AF1 x){return pow(c,AF2_(x));} - AF3 AFromGammaF3(AF3 c,AF1 x){return pow(c,AF3_(x));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AFromPqF1(AF1 x){AF1 p=pow(x,AF1_(0.0126833)); - return pow(ASatF1(p-AF1_(0.835938))/(AF1_(18.8516)-AF1_(18.6875)*p),AF1_(6.27739));} - AF2 AFromPqF1(AF2 x){AF2 p=pow(x,AF2_(0.0126833)); - return pow(ASatF2(p-AF2_(0.835938))/(AF2_(18.8516)-AF2_(18.6875)*p),AF2_(6.27739));} - AF3 AFromPqF1(AF3 x){AF3 p=pow(x,AF3_(0.0126833)); - return pow(ASatF3(p-AF3_(0.835938))/(AF3_(18.8516)-AF3_(18.6875)*p),AF3_(6.27739));} -//------------------------------------------------------------------------------------------------------------------------------ - // Unfortunately median won't work here. - AF1 AFromSrgbF1(AF1 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055); - return AZolSelF1(AZolSignedF1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));} - AF2 AFromSrgbF2(AF2 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055); - return AZolSelF2(AZolSignedF2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));} - AF3 AFromSrgbF3(AF3 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055); - return AZolSelF3(AZolSignedF3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AFromTwoF1(AF1 c){return c*c;} - AF2 AFromTwoF2(AF2 c){return c*c;} - AF3 AFromTwoF3(AF3 c){return c*c;} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AFromThreeF1(AF1 c){return c*c*c;} - AF2 AFromThreeF2(AF2 c){return c*c*c;} - AF3 AFromThreeF3(AF3 c){return c*c*c;} - #endif -//============================================================================================================================== - #ifdef A_HALF - AH1 ATo709H1(AH1 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099); - return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );} - AH2 ATo709H2(AH2 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099); - return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );} - AH3 ATo709H3(AH3 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099); - return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AToGammaH1(AH1 c,AH1 rcpX){return pow(c,AH1_(rcpX));} - AH2 AToGammaH2(AH2 c,AH1 rcpX){return pow(c,AH2_(rcpX));} - AH3 AToGammaH3(AH3 c,AH1 rcpX){return pow(c,AH3_(rcpX));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AToSrgbH1(AH1 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055); - return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );} - AH2 AToSrgbH2(AH2 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055); - return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );} - AH3 AToSrgbH3(AH3 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055); - return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AToTwoH1(AH1 c){return sqrt(c);} - AH2 AToTwoH2(AH2 c){return sqrt(c);} - AH3 AToTwoH3(AH3 c){return sqrt(c);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AToThreeF1(AH1 c){return pow(c,AH1_(1.0/3.0));} - AH2 AToThreeF2(AH2 c){return pow(c,AH2_(1.0/3.0));} - AH3 AToThreeF3(AH3 c){return pow(c,AH3_(1.0/3.0));} - #endif -//============================================================================================================================== - #ifdef A_HALF - AH1 AFrom709H1(AH1 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099); - return AZolSelH1(AZolSignedH1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));} - AH2 AFrom709H2(AH2 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099); - return AZolSelH2(AZolSignedH2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));} - AH3 AFrom709H3(AH3 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099); - return AZolSelH3(AZolSignedH3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AFromGammaH1(AH1 c,AH1 x){return pow(c,AH1_(x));} - AH2 AFromGammaH2(AH2 c,AH1 x){return pow(c,AH2_(x));} - AH3 AFromGammaH3(AH3 c,AH1 x){return pow(c,AH3_(x));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AHromSrgbF1(AH1 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055); - return AZolSelH1(AZolSignedH1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));} - AH2 AHromSrgbF2(AH2 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055); - return AZolSelH2(AZolSignedH2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));} - AH3 AHromSrgbF3(AH3 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055); - return AZolSelH3(AZolSignedH3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AFromTwoH1(AH1 c){return c*c;} - AH2 AFromTwoH2(AH2 c){return c*c;} - AH3 AFromTwoH3(AH3 c){return c*c;} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AFromThreeH1(AH1 c){return c*c*c;} - AH2 AFromThreeH2(AH2 c){return c*c*c;} - AH3 AFromThreeH3(AH3 c){return c*c*c;} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// CS REMAP -//============================================================================================================================== - // Simple remap 64x1 to 8x8 with rotated 2x2 pixel quads in quad linear. - // 543210 - // ====== - // ..xxx. - // yy...y - AU2 ARmp8x8(AU1 a){return AU2(ABfe(a,1u,3u),ABfiM(ABfe(a,3u,3u),a,1u));} -//============================================================================================================================== - // More complex remap 64x1 to 8x8 which is necessary for 2D wave reductions. - // 543210 - // ====== - // .xx..x - // y..yy. - // Details, - // LANE TO 8x8 MAPPING - // =================== - // 00 01 08 09 10 11 18 19 - // 02 03 0a 0b 12 13 1a 1b - // 04 05 0c 0d 14 15 1c 1d - // 06 07 0e 0f 16 17 1e 1f - // 20 21 28 29 30 31 38 39 - // 22 23 2a 2b 32 33 3a 3b - // 24 25 2c 2d 34 35 3c 3d - // 26 27 2e 2f 36 37 3e 3f - AU2 ARmpRed8x8(AU1 a){return AU2(ABfiM(ABfe(a,2u,3u),a,1u),ABfiM(ABfe(a,3u,3u),ABfe(a,1u,2u),2u));} -//============================================================================================================================== - #ifdef A_HALF - AW2 ARmp8x8H(AU1 a){return AW2(ABfe(a,1u,3u),ABfiM(ABfe(a,3u,3u),a,1u));} - AW2 ARmpRed8x8H(AU1 a){return AW2(ABfiM(ABfe(a,2u,3u),a,1u),ABfiM(ABfe(a,3u,3u),ABfe(a,1u,2u),2u));} - #endif -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// REFERENCE -// -//------------------------------------------------------------------------------------------------------------------------------ -// IEEE FLOAT RULES -// ================ -// - saturate(NaN)=0, saturate(-INF)=0, saturate(+INF)=1 -// - {+/-}0 * {+/-}INF = NaN -// - -INF + (+INF) = NaN -// - {+/-}0 / {+/-}0 = NaN -// - {+/-}INF / {+/-}INF = NaN -// - a<(-0) := sqrt(a) = NaN (a=-0.0 won't NaN) -// - 0 == -0 -// - 4/0 = +INF -// - 4/-0 = -INF -// - 4+INF = +INF -// - 4-INF = -INF -// - 4*(+INF) = +INF -// - 4*(-INF) = -INF -// - -4*(+INF) = -INF -// - sqrt(+INF) = +INF -//------------------------------------------------------------------------------------------------------------------------------ -// FP16 ENCODING -// ============= -// fedcba9876543210 -// ---------------- -// ......mmmmmmmmmm 10-bit mantissa (encodes 11-bit 0.5 to 1.0 except for denormals) -// .eeeee.......... 5-bit exponent -// .00000.......... denormals -// .00001.......... -14 exponent -// .11110.......... 15 exponent -// .111110000000000 infinity -// .11111nnnnnnnnnn NaN with n!=0 -// s............... sign -//------------------------------------------------------------------------------------------------------------------------------ -// FP16/INT16 ALIASING DENORMAL -// ============================ -// 11-bit unsigned integers alias with half float denormal/normal values, -// 1 = 2^(-24) = 1/16777216 ....................... first denormal value -// 2 = 2^(-23) -// ... -// 1023 = 2^(-14)*(1-2^(-10)) = 2^(-14)*(1-1/1024) ... last denormal value -// 1024 = 2^(-14) = 1/16384 .......................... first normal value that still maps to integers -// 2047 .............................................. last normal value that still maps to integers -// Scaling limits, -// 2^15 = 32768 ...................................... largest power of 2 scaling -// Largest pow2 conversion mapping is at *32768, -// 1 : 2^(-9) = 1/512 -// 2 : 1/256 -// 4 : 1/128 -// 8 : 1/64 -// 16 : 1/32 -// 32 : 1/16 -// 64 : 1/8 -// 128 : 1/4 -// 256 : 1/2 -// 512 : 1 -// 1024 : 2 -// 2047 : a little less than 4 -//============================================================================================================================== -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// -// GPU/CPU PORTABILITY -// -// -//------------------------------------------------------------------------------------------------------------------------------ -// This is the GPU implementation. -// See the CPU implementation for docs. -//============================================================================================================================== -#ifdef A_GPU - #define A_TRUE true - #define A_FALSE false - #define A_STATIC -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// VECTOR ARGUMENT/RETURN/INITIALIZATION PORTABILITY -//============================================================================================================================== - #define retAD2 AD2 - #define retAD3 AD3 - #define retAD4 AD4 - #define retAF2 AF2 - #define retAF3 AF3 - #define retAF4 AF4 - #define retAL2 AL2 - #define retAL3 AL3 - #define retAL4 AL4 - #define retAU2 AU2 - #define retAU3 AU3 - #define retAU4 AU4 -//------------------------------------------------------------------------------------------------------------------------------ - #define inAD2 in AD2 - #define inAD3 in AD3 - #define inAD4 in AD4 - #define inAF2 in AF2 - #define inAF3 in AF3 - #define inAF4 in AF4 - #define inAL2 in AL2 - #define inAL3 in AL3 - #define inAL4 in AL4 - #define inAU2 in AU2 - #define inAU3 in AU3 - #define inAU4 in AU4 -//------------------------------------------------------------------------------------------------------------------------------ - #define inoutAD2 inout AD2 - #define inoutAD3 inout AD3 - #define inoutAD4 inout AD4 - #define inoutAF2 inout AF2 - #define inoutAF3 inout AF3 - #define inoutAF4 inout AF4 - #define inoutAL2 inout AL2 - #define inoutAL3 inout AL3 - #define inoutAL4 inout AL4 - #define inoutAU2 inout AU2 - #define inoutAU3 inout AU3 - #define inoutAU4 inout AU4 -//------------------------------------------------------------------------------------------------------------------------------ - #define outAD2 out AD2 - #define outAD3 out AD3 - #define outAD4 out AD4 - #define outAF2 out AF2 - #define outAF3 out AF3 - #define outAF4 out AF4 - #define outAL2 out AL2 - #define outAL3 out AL3 - #define outAL4 out AL4 - #define outAU2 out AU2 - #define outAU3 out AU3 - #define outAU4 out AU4 -//------------------------------------------------------------------------------------------------------------------------------ - #define varAD2(x) AD2 x - #define varAD3(x) AD3 x - #define varAD4(x) AD4 x - #define varAF2(x) AF2 x - #define varAF3(x) AF3 x - #define varAF4(x) AF4 x - #define varAL2(x) AL2 x - #define varAL3(x) AL3 x - #define varAL4(x) AL4 x - #define varAU2(x) AU2 x - #define varAU3(x) AU3 x - #define varAU4(x) AU4 x -//------------------------------------------------------------------------------------------------------------------------------ - #define initAD2(x,y) AD2(x,y) - #define initAD3(x,y,z) AD3(x,y,z) - #define initAD4(x,y,z,w) AD4(x,y,z,w) - #define initAF2(x,y) AF2(x,y) - #define initAF3(x,y,z) AF3(x,y,z) - #define initAF4(x,y,z,w) AF4(x,y,z,w) - #define initAL2(x,y) AL2(x,y) - #define initAL3(x,y,z) AL3(x,y,z) - #define initAL4(x,y,z,w) AL4(x,y,z,w) - #define initAU2(x,y) AU2(x,y) - #define initAU3(x,y,z) AU3(x,y,z) - #define initAU4(x,y,z,w) AU4(x,y,z,w) -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// SCALAR RETURN OPS -//============================================================================================================================== - #define AAbsD1(a) abs(AD1(a)) - #define AAbsF1(a) abs(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define ACosD1(a) cos(AD1(a)) - #define ACosF1(a) cos(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define ADotD2(a,b) dot(AD2(a),AD2(b)) - #define ADotD3(a,b) dot(AD3(a),AD3(b)) - #define ADotD4(a,b) dot(AD4(a),AD4(b)) - #define ADotF2(a,b) dot(AF2(a),AF2(b)) - #define ADotF3(a,b) dot(AF3(a),AF3(b)) - #define ADotF4(a,b) dot(AF4(a),AF4(b)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AExp2D1(a) exp2(AD1(a)) - #define AExp2F1(a) exp2(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AFloorD1(a) floor(AD1(a)) - #define AFloorF1(a) floor(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define ALog2D1(a) log2(AD1(a)) - #define ALog2F1(a) log2(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AMaxD1(a,b) max(a,b) - #define AMaxF1(a,b) max(a,b) - #define AMaxL1(a,b) max(a,b) - #define AMaxU1(a,b) max(a,b) -//------------------------------------------------------------------------------------------------------------------------------ - #define AMinD1(a,b) min(a,b) - #define AMinF1(a,b) min(a,b) - #define AMinL1(a,b) min(a,b) - #define AMinU1(a,b) min(a,b) -//------------------------------------------------------------------------------------------------------------------------------ - #define ASinD1(a) sin(AD1(a)) - #define ASinF1(a) sin(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define ASqrtD1(a) sqrt(AD1(a)) - #define ASqrtF1(a) sqrt(AF1(a)) -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// SCALAR RETURN OPS - DEPENDENT -//============================================================================================================================== - #define APowD1(a,b) pow(AD1(a),AF1(b)) - #define APowF1(a,b) pow(AF1(a),AF1(b)) -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// VECTOR OPS -//------------------------------------------------------------------------------------------------------------------------------ -// These are added as needed for production or prototyping, so not necessarily a complete set. -// They follow a convention of taking in a destination and also returning the destination value to increase utility. -//============================================================================================================================== - #ifdef A_DUBL - AD2 opAAbsD2(outAD2 d,inAD2 a){d=abs(a);return d;} - AD3 opAAbsD3(outAD3 d,inAD3 a){d=abs(a);return d;} - AD4 opAAbsD4(outAD4 d,inAD4 a){d=abs(a);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opAAddD2(outAD2 d,inAD2 a,inAD2 b){d=a+b;return d;} - AD3 opAAddD3(outAD3 d,inAD3 a,inAD3 b){d=a+b;return d;} - AD4 opAAddD4(outAD4 d,inAD4 a,inAD4 b){d=a+b;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opAAddOneD2(outAD2 d,inAD2 a,AD1 b){d=a+AD2_(b);return d;} - AD3 opAAddOneD3(outAD3 d,inAD3 a,AD1 b){d=a+AD3_(b);return d;} - AD4 opAAddOneD4(outAD4 d,inAD4 a,AD1 b){d=a+AD4_(b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opACpyD2(outAD2 d,inAD2 a){d=a;return d;} - AD3 opACpyD3(outAD3 d,inAD3 a){d=a;return d;} - AD4 opACpyD4(outAD4 d,inAD4 a){d=a;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opALerpD2(outAD2 d,inAD2 a,inAD2 b,inAD2 c){d=ALerpD2(a,b,c);return d;} - AD3 opALerpD3(outAD3 d,inAD3 a,inAD3 b,inAD3 c){d=ALerpD3(a,b,c);return d;} - AD4 opALerpD4(outAD4 d,inAD4 a,inAD4 b,inAD4 c){d=ALerpD4(a,b,c);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opALerpOneD2(outAD2 d,inAD2 a,inAD2 b,AD1 c){d=ALerpD2(a,b,AD2_(c));return d;} - AD3 opALerpOneD3(outAD3 d,inAD3 a,inAD3 b,AD1 c){d=ALerpD3(a,b,AD3_(c));return d;} - AD4 opALerpOneD4(outAD4 d,inAD4 a,inAD4 b,AD1 c){d=ALerpD4(a,b,AD4_(c));return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opAMaxD2(outAD2 d,inAD2 a,inAD2 b){d=max(a,b);return d;} - AD3 opAMaxD3(outAD3 d,inAD3 a,inAD3 b){d=max(a,b);return d;} - AD4 opAMaxD4(outAD4 d,inAD4 a,inAD4 b){d=max(a,b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opAMinD2(outAD2 d,inAD2 a,inAD2 b){d=min(a,b);return d;} - AD3 opAMinD3(outAD3 d,inAD3 a,inAD3 b){d=min(a,b);return d;} - AD4 opAMinD4(outAD4 d,inAD4 a,inAD4 b){d=min(a,b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opAMulD2(outAD2 d,inAD2 a,inAD2 b){d=a*b;return d;} - AD3 opAMulD3(outAD3 d,inAD3 a,inAD3 b){d=a*b;return d;} - AD4 opAMulD4(outAD4 d,inAD4 a,inAD4 b){d=a*b;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opAMulOneD2(outAD2 d,inAD2 a,AD1 b){d=a*AD2_(b);return d;} - AD3 opAMulOneD3(outAD3 d,inAD3 a,AD1 b){d=a*AD3_(b);return d;} - AD4 opAMulOneD4(outAD4 d,inAD4 a,AD1 b){d=a*AD4_(b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opANegD2(outAD2 d,inAD2 a){d=-a;return d;} - AD3 opANegD3(outAD3 d,inAD3 a){d=-a;return d;} - AD4 opANegD4(outAD4 d,inAD4 a){d=-a;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opARcpD2(outAD2 d,inAD2 a){d=ARcpD2(a);return d;} - AD3 opARcpD3(outAD3 d,inAD3 a){d=ARcpD3(a);return d;} - AD4 opARcpD4(outAD4 d,inAD4 a){d=ARcpD4(a);return d;} - #endif -//============================================================================================================================== - AF2 opAAbsF2(outAF2 d,inAF2 a){d=abs(a);return d;} - AF3 opAAbsF3(outAF3 d,inAF3 a){d=abs(a);return d;} - AF4 opAAbsF4(outAF4 d,inAF4 a){d=abs(a);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opAAddF2(outAF2 d,inAF2 a,inAF2 b){d=a+b;return d;} - AF3 opAAddF3(outAF3 d,inAF3 a,inAF3 b){d=a+b;return d;} - AF4 opAAddF4(outAF4 d,inAF4 a,inAF4 b){d=a+b;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opAAddOneF2(outAF2 d,inAF2 a,AF1 b){d=a+AF2_(b);return d;} - AF3 opAAddOneF3(outAF3 d,inAF3 a,AF1 b){d=a+AF3_(b);return d;} - AF4 opAAddOneF4(outAF4 d,inAF4 a,AF1 b){d=a+AF4_(b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opACpyF2(outAF2 d,inAF2 a){d=a;return d;} - AF3 opACpyF3(outAF3 d,inAF3 a){d=a;return d;} - AF4 opACpyF4(outAF4 d,inAF4 a){d=a;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opALerpF2(outAF2 d,inAF2 a,inAF2 b,inAF2 c){d=ALerpF2(a,b,c);return d;} - AF3 opALerpF3(outAF3 d,inAF3 a,inAF3 b,inAF3 c){d=ALerpF3(a,b,c);return d;} - AF4 opALerpF4(outAF4 d,inAF4 a,inAF4 b,inAF4 c){d=ALerpF4(a,b,c);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opALerpOneF2(outAF2 d,inAF2 a,inAF2 b,AF1 c){d=ALerpF2(a,b,AF2_(c));return d;} - AF3 opALerpOneF3(outAF3 d,inAF3 a,inAF3 b,AF1 c){d=ALerpF3(a,b,AF3_(c));return d;} - AF4 opALerpOneF4(outAF4 d,inAF4 a,inAF4 b,AF1 c){d=ALerpF4(a,b,AF4_(c));return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opAMaxF2(outAF2 d,inAF2 a,inAF2 b){d=max(a,b);return d;} - AF3 opAMaxF3(outAF3 d,inAF3 a,inAF3 b){d=max(a,b);return d;} - AF4 opAMaxF4(outAF4 d,inAF4 a,inAF4 b){d=max(a,b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opAMinF2(outAF2 d,inAF2 a,inAF2 b){d=min(a,b);return d;} - AF3 opAMinF3(outAF3 d,inAF3 a,inAF3 b){d=min(a,b);return d;} - AF4 opAMinF4(outAF4 d,inAF4 a,inAF4 b){d=min(a,b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opAMulF2(outAF2 d,inAF2 a,inAF2 b){d=a*b;return d;} - AF3 opAMulF3(outAF3 d,inAF3 a,inAF3 b){d=a*b;return d;} - AF4 opAMulF4(outAF4 d,inAF4 a,inAF4 b){d=a*b;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opAMulOneF2(outAF2 d,inAF2 a,AF1 b){d=a*AF2_(b);return d;} - AF3 opAMulOneF3(outAF3 d,inAF3 a,AF1 b){d=a*AF3_(b);return d;} - AF4 opAMulOneF4(outAF4 d,inAF4 a,AF1 b){d=a*AF4_(b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opANegF2(outAF2 d,inAF2 a){d=-a;return d;} - AF3 opANegF3(outAF3 d,inAF3 a){d=-a;return d;} - AF4 opANegF4(outAF4 d,inAF4 a){d=-a;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opARcpF2(outAF2 d,inAF2 a){d=ARcpF2(a);return d;} - AF3 opARcpF3(outAF3 d,inAF3 a){d=ARcpF3(a);return d;} - AF4 opARcpF4(outAF4 d,inAF4 a){d=ARcpF4(a);return d;} -#endif diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/ffx_a.hlsl.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/ffx_a.hlsl.meta deleted file mode 100644 index fbc7a34..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/ffx_a.hlsl.meta +++ /dev/null @@ -1,9 +0,0 @@ -fileFormatVersion: 2 -guid: a1f876d966449794bbb092962d2d7fc9 -ShaderImporter: - externalObjects: {} - defaultTextures: [] - nonModifiableTextures: [] - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/ffx_fsr1.hlsl b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/ffx_fsr1.hlsl deleted file mode 100644 index ab92d2d..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/ffx_fsr1.hlsl +++ /dev/null @@ -1,1199 +0,0 @@ -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// -// AMD FidelityFX SUPER RESOLUTION [FSR 1] ::: SPATIAL SCALING & EXTRAS - v1.20210629 -// -// -//------------------------------------------------------------------------------------------------------------------------------ -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//------------------------------------------------------------------------------------------------------------------------------ -// FidelityFX Super Resolution Sample -// -// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files(the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions : -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -//------------------------------------------------------------------------------------------------------------------------------ -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//------------------------------------------------------------------------------------------------------------------------------ -// ABOUT -// ===== -// FSR is a collection of algorithms relating to generating a higher resolution image. -// This specific header focuses on single-image non-temporal image scaling, and related tools. -// -// The core functions are EASU and RCAS: -// [EASU] Edge Adaptive Spatial Upsampling ....... 1x to 4x area range spatial scaling, clamped adaptive elliptical filter. -// [RCAS] Robust Contrast Adaptive Sharpening .... A non-scaling variation on CAS. -// RCAS needs to be applied after EASU as a separate pass. -// -// Optional utility functions are: -// [LFGA] Linear Film Grain Applicator ........... Tool to apply film grain after scaling. -// [SRTM] Simple Reversible Tone-Mapper .......... Linear HDR {0 to FP16_MAX} to {0 to 1} and back. -// [TEPD] Temporal Energy Preserving Dither ...... Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion. -// See each individual sub-section for inline documentation. -//------------------------------------------------------------------------------------------------------------------------------ -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//------------------------------------------------------------------------------------------------------------------------------ -// FUNCTION PERMUTATIONS -// ===================== -// *F() ..... Single item computation with 32-bit. -// *H() ..... Single item computation with 16-bit, with packing (aka two 16-bit ops in parallel) when possible. -// *Hx2() ... Processing two items in parallel with 16-bit, easier packing. -// Not all interfaces in this file have a *Hx2() form. -//============================================================================================================================== -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// FSR - [EASU] EDGE ADAPTIVE SPATIAL UPSAMPLING -// -//------------------------------------------------------------------------------------------------------------------------------ -// EASU provides a high quality spatial-only scaling at relatively low cost. -// Meaning EASU is appropiate for laptops and other low-end GPUs. -// Quality from 1x to 4x area scaling is good. -//------------------------------------------------------------------------------------------------------------------------------ -// The scalar uses a modified fast approximation to the standard lanczos(size=2) kernel. -// EASU runs in a single pass, so it applies a directionally and anisotropically adaptive radial lanczos. -// This is also kept as simple as possible to have minimum runtime. -//------------------------------------------------------------------------------------------------------------------------------ -// The lanzcos filter has negative lobes, so by itself it will introduce ringing. -// To remove all ringing, the algorithm uses the nearest 2x2 input texels as a neighborhood, -// and limits output to the minimum and maximum of that neighborhood. -//------------------------------------------------------------------------------------------------------------------------------ -// Input image requirements: -// -// Color needs to be encoded as 3 channel[red, green, blue](e.g.XYZ not supported) -// Each channel needs to be in the range[0, 1] -// Any color primaries are supported -// Display / tonemapping curve needs to be as if presenting to sRGB display or similar(e.g.Gamma 2.0) -// There should be no banding in the input -// There should be no high amplitude noise in the input -// There should be no noise in the input that is not at input pixel granularity -// For performance purposes, use 32bpp formats -//------------------------------------------------------------------------------------------------------------------------------ -// Best to apply EASU at the end of the frame after tonemapping -// but before film grain or composite of the UI. -//------------------------------------------------------------------------------------------------------------------------------ -// Example of including this header for D3D HLSL : -// -// #define A_GPU 1 -// #define A_HLSL 1 -// #define A_HALF 1 -// #include "ffx_a.h" -// #define FSR_EASU_H 1 -// #define FSR_RCAS_H 1 -// //declare input callbacks -// #include "ffx_fsr1.h" -// -// Example of including this header for Vulkan GLSL : -// -// #define A_GPU 1 -// #define A_GLSL 1 -// #define A_HALF 1 -// #include "ffx_a.h" -// #define FSR_EASU_H 1 -// #define FSR_RCAS_H 1 -// //declare input callbacks -// #include "ffx_fsr1.h" -// -// Example of including this header for Vulkan HLSL : -// -// #define A_GPU 1 -// #define A_HLSL 1 -// #define A_HLSL_6_2 1 -// #define A_NO_16_BIT_CAST 1 -// #define A_HALF 1 -// #include "ffx_a.h" -// #define FSR_EASU_H 1 -// #define FSR_RCAS_H 1 -// //declare input callbacks -// #include "ffx_fsr1.h" -// -// Example of declaring the required input callbacks for GLSL : -// The callbacks need to gather4 for each color channel using the specified texture coordinate 'p'. -// EASU uses gather4 to reduce position computation logic and for free Arrays of Structures to Structures of Arrays conversion. -// -// AH4 FsrEasuRH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,0));} -// AH4 FsrEasuGH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,1));} -// AH4 FsrEasuBH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,2));} -// ... -// The FsrEasuCon function needs to be called from the CPU or GPU to set up constants. -// The difference in viewport and input image size is there to support Dynamic Resolution Scaling. -// To use FsrEasuCon() on the CPU, define A_CPU before including ffx_a and ffx_fsr1. -// Including a GPU example here, the 'con0' through 'con3' values would be stored out to a constant buffer. -// AU4 con0,con1,con2,con3; -// FsrEasuCon(con0,con1,con2,con3, -// 1920.0,1080.0, // Viewport size (top left aligned) in the input image which is to be scaled. -// 3840.0,2160.0, // The size of the input image. -// 2560.0,1440.0); // The output resolution. -//============================================================================================================================== -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// CONSTANT SETUP -//============================================================================================================================== -// Call to setup required constant values (works on CPU or GPU). -A_STATIC void FsrEasuCon( -outAU4 con0, -outAU4 con1, -outAU4 con2, -outAU4 con3, -// This the rendered image resolution being upscaled -AF1 inputViewportInPixelsX, -AF1 inputViewportInPixelsY, -// This is the resolution of the resource containing the input image (useful for dynamic resolution) -AF1 inputSizeInPixelsX, -AF1 inputSizeInPixelsY, -// This is the display resolution which the input image gets upscaled to -AF1 outputSizeInPixelsX, -AF1 outputSizeInPixelsY){ - // Output integer position to a pixel position in viewport. - con0[0]=AU1_AF1(inputViewportInPixelsX*ARcpF1(outputSizeInPixelsX)); - con0[1]=AU1_AF1(inputViewportInPixelsY*ARcpF1(outputSizeInPixelsY)); - con0[2]=AU1_AF1(AF1_(0.5)*inputViewportInPixelsX*ARcpF1(outputSizeInPixelsX)-AF1_(0.5)); - con0[3]=AU1_AF1(AF1_(0.5)*inputViewportInPixelsY*ARcpF1(outputSizeInPixelsY)-AF1_(0.5)); - // Viewport pixel position to normalized image space. - // This is used to get upper-left of 'F' tap. - con1[0]=AU1_AF1(ARcpF1(inputSizeInPixelsX)); - con1[1]=AU1_AF1(ARcpF1(inputSizeInPixelsY)); - // Centers of gather4, first offset from upper-left of 'F'. - // +---+---+ - // | | | - // +--(0)--+ - // | b | c | - // +---F---+---+---+ - // | e | f | g | h | - // +--(1)--+--(2)--+ - // | i | j | k | l | - // +---+---+---+---+ - // | n | o | - // +--(3)--+ - // | | | - // +---+---+ - con1[2]=AU1_AF1(AF1_( 1.0)*ARcpF1(inputSizeInPixelsX)); - con1[3]=AU1_AF1(AF1_(-1.0)*ARcpF1(inputSizeInPixelsY)); - // These are from (0) instead of 'F'. - con2[0]=AU1_AF1(AF1_(-1.0)*ARcpF1(inputSizeInPixelsX)); - con2[1]=AU1_AF1(AF1_( 2.0)*ARcpF1(inputSizeInPixelsY)); - con2[2]=AU1_AF1(AF1_( 1.0)*ARcpF1(inputSizeInPixelsX)); - con2[3]=AU1_AF1(AF1_( 2.0)*ARcpF1(inputSizeInPixelsY)); - con3[0]=AU1_AF1(AF1_( 0.0)*ARcpF1(inputSizeInPixelsX)); - con3[1]=AU1_AF1(AF1_( 4.0)*ARcpF1(inputSizeInPixelsY)); - con3[2]=con3[3]=0;} - -//If the an offset into the input image resource -A_STATIC void FsrEasuConOffset( - outAU4 con0, - outAU4 con1, - outAU4 con2, - outAU4 con3, - // This the rendered image resolution being upscaled - AF1 inputViewportInPixelsX, - AF1 inputViewportInPixelsY, - // This is the resolution of the resource containing the input image (useful for dynamic resolution) - AF1 inputSizeInPixelsX, - AF1 inputSizeInPixelsY, - // This is the display resolution which the input image gets upscaled to - AF1 outputSizeInPixelsX, - AF1 outputSizeInPixelsY, - // This is the input image offset into the resource containing it (useful for dynamic resolution) - AF1 inputOffsetInPixelsX, - AF1 inputOffsetInPixelsY) { - FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY); - con0[2] = AU1_AF1(AF1_(0.5) * inputViewportInPixelsX * ARcpF1(outputSizeInPixelsX) - AF1_(0.5) + inputOffsetInPixelsX); - con0[3] = AU1_AF1(AF1_(0.5) * inputViewportInPixelsY * ARcpF1(outputSizeInPixelsY) - AF1_(0.5) + inputOffsetInPixelsY); -} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// NON-PACKED 32-BIT VERSION -//============================================================================================================================== -#if defined(A_GPU)&&defined(FSR_EASU_F) - // Input callback prototypes, need to be implemented by calling shader - AF4 FsrEasuRF(AF2 p); - AF4 FsrEasuGF(AF2 p); - AF4 FsrEasuBF(AF2 p); -//------------------------------------------------------------------------------------------------------------------------------ - // Filtering for a given tap for the scalar. - void FsrEasuTapF( - inout AF3 aC, // Accumulated color, with negative lobe. - inout AF1 aW, // Accumulated weight. - AF2 off, // Pixel offset from resolve position to tap. - AF2 dir, // Gradient direction. - AF2 len, // Length. - AF1 lob, // Negative lobe strength. - AF1 clp, // Clipping point. - AF3 c){ // Tap color. - // Rotate offset by direction. - AF2 v; - v.x=(off.x*( dir.x))+(off.y*dir.y); - v.y=(off.x*(-dir.y))+(off.y*dir.x); - // Anisotropy. - v*=len; - // Compute distance^2. - AF1 d2=v.x*v.x+v.y*v.y; - // Limit to the window as at corner, 2 taps can easily be outside. - d2=min(d2,clp); - // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x. - // (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2 - // |_______________________________________| |_______________| - // base window - // The general form of the 'base' is, - // (a*(b*x^2-1)^2-(a-1)) - // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe. - AF1 wB=AF1_(2.0/5.0)*d2+AF1_(-1.0); - AF1 wA=lob*d2+AF1_(-1.0); - wB*=wB; - wA*=wA; - wB=AF1_(25.0/16.0)*wB+AF1_(-(25.0/16.0-1.0)); - AF1 w=wB*wA; - // Do weighted average. - aC+=c*w;aW+=w;} -//------------------------------------------------------------------------------------------------------------------------------ - // Accumulate direction and length. - void FsrEasuSetF( - inout AF2 dir, - inout AF1 len, - AF2 pp, - AP1 biS,AP1 biT,AP1 biU,AP1 biV, - AF1 lA,AF1 lB,AF1 lC,AF1 lD,AF1 lE){ - // Compute bilinear weight, branches factor out as predicates are compiler time immediates. - // s t - // u v - AF1 w = AF1_(0.0); - if(biS)w=(AF1_(1.0)-pp.x)*(AF1_(1.0)-pp.y); - if(biT)w= pp.x *(AF1_(1.0)-pp.y); - if(biU)w=(AF1_(1.0)-pp.x)* pp.y ; - if(biV)w= pp.x * pp.y ; - // Direction is the '+' diff. - // a - // b c d - // e - // Then takes magnitude from abs average of both sides of 'c'. - // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms. - AF1 dc=lD-lC; - AF1 cb=lC-lB; - AF1 lenX=max(abs(dc),abs(cb)); - lenX=APrxLoRcpF1(lenX); - AF1 dirX=lD-lB; - dir.x+=dirX*w; - lenX=ASatF1(abs(dirX)*lenX); - lenX*=lenX; - len+=lenX*w; - // Repeat for the y axis. - AF1 ec=lE-lC; - AF1 ca=lC-lA; - AF1 lenY=max(abs(ec),abs(ca)); - lenY=APrxLoRcpF1(lenY); - AF1 dirY=lE-lA; - dir.y+=dirY*w; - lenY=ASatF1(abs(dirY)*lenY); - lenY*=lenY; - len+=lenY*w;} -//------------------------------------------------------------------------------------------------------------------------------ - void FsrEasuF( - out AF3 pix, - AU2 ip, // Integer pixel position in output. - AU4 con0, // Constants generated by FsrEasuCon(). - AU4 con1, - AU4 con2, - AU4 con3){ -//------------------------------------------------------------------------------------------------------------------------------ - // Get position of 'f'. - AF2 pp=AF2(ip)*AF2_AU2(con0.xy)+AF2_AU2(con0.zw); - AF2 fp=floor(pp); - pp-=fp; -//------------------------------------------------------------------------------------------------------------------------------ - // 12-tap kernel. - // b c - // e f g h - // i j k l - // n o - // Gather 4 ordering. - // a b - // r g - // For packed FP16, need either {rg} or {ab} so using the following setup for gather in all versions, - // a b <- unused (z) - // r g - // a b a b - // r g r g - // a b - // r g <- unused (z) - // Allowing dead-code removal to remove the 'z's. - AF2 p0=fp*AF2_AU2(con1.xy)+AF2_AU2(con1.zw); - // These are from p0 to avoid pulling two constants on pre-Navi hardware. - AF2 p1=p0+AF2_AU2(con2.xy); - AF2 p2=p0+AF2_AU2(con2.zw); - AF2 p3=p0+AF2_AU2(con3.xy); - AF4 bczzR=FsrEasuRF(p0); - AF4 bczzG=FsrEasuGF(p0); - AF4 bczzB=FsrEasuBF(p0); - AF4 ijfeR=FsrEasuRF(p1); - AF4 ijfeG=FsrEasuGF(p1); - AF4 ijfeB=FsrEasuBF(p1); - AF4 klhgR=FsrEasuRF(p2); - AF4 klhgG=FsrEasuGF(p2); - AF4 klhgB=FsrEasuBF(p2); - AF4 zzonR=FsrEasuRF(p3); - AF4 zzonG=FsrEasuGF(p3); - AF4 zzonB=FsrEasuBF(p3); -//------------------------------------------------------------------------------------------------------------------------------ - // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD). - AF4 bczzL=bczzB*AF4_(0.5)+(bczzR*AF4_(0.5)+bczzG); - AF4 ijfeL=ijfeB*AF4_(0.5)+(ijfeR*AF4_(0.5)+ijfeG); - AF4 klhgL=klhgB*AF4_(0.5)+(klhgR*AF4_(0.5)+klhgG); - AF4 zzonL=zzonB*AF4_(0.5)+(zzonR*AF4_(0.5)+zzonG); - // Rename. - AF1 bL=bczzL.x; - AF1 cL=bczzL.y; - AF1 iL=ijfeL.x; - AF1 jL=ijfeL.y; - AF1 fL=ijfeL.z; - AF1 eL=ijfeL.w; - AF1 kL=klhgL.x; - AF1 lL=klhgL.y; - AF1 hL=klhgL.z; - AF1 gL=klhgL.w; - AF1 oL=zzonL.z; - AF1 nL=zzonL.w; - // Accumulate for bilinear interpolation. - AF2 dir=AF2_(0.0); - AF1 len=AF1_(0.0); - FsrEasuSetF(dir,len,pp,true, false,false,false,bL,eL,fL,gL,jL); - FsrEasuSetF(dir,len,pp,false,true ,false,false,cL,fL,gL,hL,kL); - FsrEasuSetF(dir,len,pp,false,false,true ,false,fL,iL,jL,kL,nL); - FsrEasuSetF(dir,len,pp,false,false,false,true ,gL,jL,kL,lL,oL); -//------------------------------------------------------------------------------------------------------------------------------ - // Normalize with approximation, and cleanup close to zero. - AF2 dir2=dir*dir; - AF1 dirR=dir2.x+dir2.y; - AP1 zro=dirR w = -m/(n+e+w+s) -// 1 == (w*(n+e+w+s)+m)/(4*w+1) -> w = (1-m)/(n+e+w+s-4*1) -// Then chooses the 'w' which results in no clipping, limits 'w', and multiplies by the 'sharp' amount. -// This solution above has issues with MSAA input as the steps along the gradient cause edge detection issues. -// So RCAS uses 4x the maximum and 4x the minimum (depending on equation)in place of the individual taps. -// As well as switching from 'm' to either the minimum or maximum (depending on side), to help in energy conservation. -// This stabilizes RCAS. -// RCAS does a simple highpass which is normalized against the local contrast then shaped, -// 0.25 -// 0.25 -1 0.25 -// 0.25 -// This is used as a noise detection filter, to reduce the effect of RCAS on grain, and focus on real edges. -// -// GLSL example for the required callbacks : -// -// AH4 FsrRcasLoadH(ASW2 p){return AH4(imageLoad(imgSrc,ASU2(p)));} -// void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b) -// { -// //do any simple input color conversions here or leave empty if none needed -// } -// -// FsrRcasCon need to be called from the CPU or GPU to set up constants. -// Including a GPU example here, the 'con' value would be stored out to a constant buffer. -// -// AU4 con; -// FsrRcasCon(con, -// 0.0); // The scale is {0.0 := maximum sharpness, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. -// --------------- -// RCAS sharpening supports a CAS-like pass-through alpha via, -// #define FSR_RCAS_PASSTHROUGH_ALPHA 1 -// RCAS also supports a define to enable a more expensive path to avoid some sharpening of noise. -// Would suggest it is better to apply film grain after RCAS sharpening (and after scaling) instead of using this define, -// #define FSR_RCAS_DENOISE 1 -//============================================================================================================================== -// This is set at the limit of providing unnatural results for sharpening. -#define FSR_RCAS_LIMIT (0.25-(1.0/16.0)) -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// CONSTANT SETUP -//============================================================================================================================== -// Call to setup required constant values (works on CPU or GPU). -A_STATIC void FsrRcasCon( -outAU4 con, -// The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. -AF1 sharpness){ - // Transform from stops to linear value. - sharpness=AExp2F1(-sharpness); - varAF2(hSharp)=initAF2(sharpness,sharpness); - con[0]=AU1_AF1(sharpness); - con[1]=AU1_AH2_AF2(hSharp); - con[2]=0; - con[3]=0;} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// NON-PACKED 32-BIT VERSION -//============================================================================================================================== -#if defined(A_GPU)&&defined(FSR_RCAS_F) - // Input callback prototypes that need to be implemented by calling shader - AF4 FsrRcasLoadF(ASU2 p); - void FsrRcasInputF(inout AF1 r,inout AF1 g,inout AF1 b); -//------------------------------------------------------------------------------------------------------------------------------ - void FsrRcasF( - out AF1 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. - out AF1 pixG, - out AF1 pixB, - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - out AF1 pixA, - #endif - AU2 ip, // Integer pixel position in output. - AU4 con){ // Constant generated by RcasSetup(). - // Algorithm uses minimal 3x3 pixel neighborhood. - // b - // d e f - // h - ASU2 sp=ASU2(ip); - AF3 b=FsrRcasLoadF(sp+ASU2( 0,-1)).rgb; - AF3 d=FsrRcasLoadF(sp+ASU2(-1, 0)).rgb; - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - AF4 ee=FsrRcasLoadF(sp); - AF3 e=ee.rgb;pixA=ee.a; - #else - AF3 e=FsrRcasLoadF(sp).rgb; - #endif - AF3 f=FsrRcasLoadF(sp+ASU2( 1, 0)).rgb; - AF3 h=FsrRcasLoadF(sp+ASU2( 0, 1)).rgb; - // Rename (32-bit) or regroup (16-bit). - AF1 bR=b.r; - AF1 bG=b.g; - AF1 bB=b.b; - AF1 dR=d.r; - AF1 dG=d.g; - AF1 dB=d.b; - AF1 eR=e.r; - AF1 eG=e.g; - AF1 eB=e.b; - AF1 fR=f.r; - AF1 fG=f.g; - AF1 fB=f.b; - AF1 hR=h.r; - AF1 hG=h.g; - AF1 hB=h.b; - // Run optional input transform. - FsrRcasInputF(bR,bG,bB); - FsrRcasInputF(dR,dG,dB); - FsrRcasInputF(eR,eG,eB); - FsrRcasInputF(fR,fG,fB); - FsrRcasInputF(hR,hG,hB); - // Luma times 2. - AF1 bL=bB*AF1_(0.5)+(bR*AF1_(0.5)+bG); - AF1 dL=dB*AF1_(0.5)+(dR*AF1_(0.5)+dG); - AF1 eL=eB*AF1_(0.5)+(eR*AF1_(0.5)+eG); - AF1 fL=fB*AF1_(0.5)+(fR*AF1_(0.5)+fG); - AF1 hL=hB*AF1_(0.5)+(hR*AF1_(0.5)+hG); - // Noise detection. - AF1 nz=AF1_(0.25)*bL+AF1_(0.25)*dL+AF1_(0.25)*fL+AF1_(0.25)*hL-eL; - nz=ASatF1(abs(nz)*APrxMedRcpF1(AMax3F1(AMax3F1(bL,dL,eL),fL,hL)-AMin3F1(AMin3F1(bL,dL,eL),fL,hL))); - nz=AF1_(-0.5)*nz+AF1_(1.0); - // Min and max of ring. - AF1 mn4R=min(AMin3F1(bR,dR,fR),hR); - AF1 mn4G=min(AMin3F1(bG,dG,fG),hG); - AF1 mn4B=min(AMin3F1(bB,dB,fB),hB); - AF1 mx4R=max(AMax3F1(bR,dR,fR),hR); - AF1 mx4G=max(AMax3F1(bG,dG,fG),hG); - AF1 mx4B=max(AMax3F1(bB,dB,fB),hB); - // Immediate constants for peak range. - AF2 peakC=AF2(1.0,-1.0*4.0); - // Limiters, these need to be high precision RCPs. - AF1 hitMinR=min(mn4R,eR)*ARcpF1(AF1_(4.0)*mx4R); - AF1 hitMinG=min(mn4G,eG)*ARcpF1(AF1_(4.0)*mx4G); - AF1 hitMinB=min(mn4B,eB)*ARcpF1(AF1_(4.0)*mx4B); - AF1 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpF1(AF1_(4.0)*mn4R+peakC.y); - AF1 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpF1(AF1_(4.0)*mn4G+peakC.y); - AF1 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpF1(AF1_(4.0)*mn4B+peakC.y); - AF1 lobeR=max(-hitMinR,hitMaxR); - AF1 lobeG=max(-hitMinG,hitMaxG); - AF1 lobeB=max(-hitMinB,hitMaxB); - AF1 lobe=max(AF1_(-FSR_RCAS_LIMIT),min(AMax3F1(lobeR,lobeG,lobeB),AF1_(0.0)))*AF1_AU1(con.x); - // Apply noise removal. - #ifdef FSR_RCAS_DENOISE - lobe*=nz; - #endif - // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. - AF1 rcpL=APrxMedRcpF1(AF1_(4.0)*lobe+AF1_(1.0)); - pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; - pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; - pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL; - return;} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// NON-PACKED 16-BIT VERSION -//============================================================================================================================== -#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_RCAS_H) - // Input callback prototypes that need to be implemented by calling shader - AH4 FsrRcasLoadH(ASW2 p); - void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b); -//------------------------------------------------------------------------------------------------------------------------------ - void FsrRcasH( - out AH1 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. - out AH1 pixG, - out AH1 pixB, - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - out AH1 pixA, - #endif - AU2 ip, // Integer pixel position in output. - AU4 con){ // Constant generated by RcasSetup(). - // Sharpening algorithm uses minimal 3x3 pixel neighborhood. - // b - // d e f - // h - ASW2 sp=ASW2(ip); - AH3 b=FsrRcasLoadH(sp+ASW2( 0,-1)).rgb; - AH3 d=FsrRcasLoadH(sp+ASW2(-1, 0)).rgb; - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - AH4 ee=FsrRcasLoadH(sp); - AH3 e=ee.rgb;pixA=ee.a; - #else - AH3 e=FsrRcasLoadH(sp).rgb; - #endif - AH3 f=FsrRcasLoadH(sp+ASW2( 1, 0)).rgb; - AH3 h=FsrRcasLoadH(sp+ASW2( 0, 1)).rgb; - // Rename (32-bit) or regroup (16-bit). - AH1 bR=b.r; - AH1 bG=b.g; - AH1 bB=b.b; - AH1 dR=d.r; - AH1 dG=d.g; - AH1 dB=d.b; - AH1 eR=e.r; - AH1 eG=e.g; - AH1 eB=e.b; - AH1 fR=f.r; - AH1 fG=f.g; - AH1 fB=f.b; - AH1 hR=h.r; - AH1 hG=h.g; - AH1 hB=h.b; - // Run optional input transform. - FsrRcasInputH(bR,bG,bB); - FsrRcasInputH(dR,dG,dB); - FsrRcasInputH(eR,eG,eB); - FsrRcasInputH(fR,fG,fB); - FsrRcasInputH(hR,hG,hB); - // Luma times 2. - AH1 bL=bB*AH1_(0.5)+(bR*AH1_(0.5)+bG); - AH1 dL=dB*AH1_(0.5)+(dR*AH1_(0.5)+dG); - AH1 eL=eB*AH1_(0.5)+(eR*AH1_(0.5)+eG); - AH1 fL=fB*AH1_(0.5)+(fR*AH1_(0.5)+fG); - AH1 hL=hB*AH1_(0.5)+(hR*AH1_(0.5)+hG); - // Noise detection. - AH1 nz=AH1_(0.25)*bL+AH1_(0.25)*dL+AH1_(0.25)*fL+AH1_(0.25)*hL-eL; - nz=ASatH1(abs(nz)*APrxMedRcpH1(AMax3H1(AMax3H1(bL,dL,eL),fL,hL)-AMin3H1(AMin3H1(bL,dL,eL),fL,hL))); - nz=AH1_(-0.5)*nz+AH1_(1.0); - // Min and max of ring. - AH1 mn4R=min(AMin3H1(bR,dR,fR),hR); - AH1 mn4G=min(AMin3H1(bG,dG,fG),hG); - AH1 mn4B=min(AMin3H1(bB,dB,fB),hB); - AH1 mx4R=max(AMax3H1(bR,dR,fR),hR); - AH1 mx4G=max(AMax3H1(bG,dG,fG),hG); - AH1 mx4B=max(AMax3H1(bB,dB,fB),hB); - // Immediate constants for peak range. - AH2 peakC=AH2(1.0,-1.0*4.0); - // Limiters, these need to be high precision RCPs. - AH1 hitMinR=min(mn4R,eR)*ARcpH1(AH1_(4.0)*mx4R); - AH1 hitMinG=min(mn4G,eG)*ARcpH1(AH1_(4.0)*mx4G); - AH1 hitMinB=min(mn4B,eB)*ARcpH1(AH1_(4.0)*mx4B); - AH1 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpH1(AH1_(4.0)*mn4R+peakC.y); - AH1 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpH1(AH1_(4.0)*mn4G+peakC.y); - AH1 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpH1(AH1_(4.0)*mn4B+peakC.y); - AH1 lobeR=max(-hitMinR,hitMaxR); - AH1 lobeG=max(-hitMinG,hitMaxG); - AH1 lobeB=max(-hitMinB,hitMaxB); - AH1 lobe=max(AH1_(-FSR_RCAS_LIMIT),min(AMax3H1(lobeR,lobeG,lobeB),AH1_(0.0)))*AH2_AU1(con.y).x; - // Apply noise removal. - #ifdef FSR_RCAS_DENOISE - lobe*=nz; - #endif - // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. - AH1 rcpL=APrxMedRcpH1(AH1_(4.0)*lobe+AH1_(1.0)); - pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; - pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; - pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// PACKED 16-BIT VERSION -//============================================================================================================================== -#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_RCAS_HX2) - // Input callback prototypes that need to be implemented by the calling shader - AH4 FsrRcasLoadHx2(ASW2 p); - void FsrRcasInputHx2(inout AH2 r,inout AH2 g,inout AH2 b); -//------------------------------------------------------------------------------------------------------------------------------ - // Can be used to convert from packed Structures of Arrays to Arrays of Structures for store. - void FsrRcasDepackHx2(out AH4 pix0,out AH4 pix1,AH2 pixR,AH2 pixG,AH2 pixB){ - #ifdef A_HLSL - // Invoke a slower path for DX only, since it won't allow uninitialized values. - pix0.a=pix1.a=0.0; - #endif - pix0.rgb=AH3(pixR.x,pixG.x,pixB.x); - pix1.rgb=AH3(pixR.y,pixG.y,pixB.y);} -//------------------------------------------------------------------------------------------------------------------------------ - void FsrRcasHx2( - // Output values are for 2 8x8 tiles in a 16x8 region. - // pix.x = left 8x8 tile - // pix.y = right 8x8 tile - // This enables later processing to easily be packed as well. - out AH2 pixR, - out AH2 pixG, - out AH2 pixB, - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - out AH2 pixA, - #endif - AU2 ip, // Integer pixel position in output. - AU4 con){ // Constant generated by RcasSetup(). - // No scaling algorithm uses minimal 3x3 pixel neighborhood. - ASW2 sp0=ASW2(ip); - AH3 b0=FsrRcasLoadHx2(sp0+ASW2( 0,-1)).rgb; - AH3 d0=FsrRcasLoadHx2(sp0+ASW2(-1, 0)).rgb; - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - AH4 ee0=FsrRcasLoadHx2(sp0); - AH3 e0=ee0.rgb;pixA.r=ee0.a; - #else - AH3 e0=FsrRcasLoadHx2(sp0).rgb; - #endif - AH3 f0=FsrRcasLoadHx2(sp0+ASW2( 1, 0)).rgb; - AH3 h0=FsrRcasLoadHx2(sp0+ASW2( 0, 1)).rgb; - ASW2 sp1=sp0+ASW2(8,0); - AH3 b1=FsrRcasLoadHx2(sp1+ASW2( 0,-1)).rgb; - AH3 d1=FsrRcasLoadHx2(sp1+ASW2(-1, 0)).rgb; - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - AH4 ee1=FsrRcasLoadHx2(sp1); - AH3 e1=ee1.rgb;pixA.g=ee1.a; - #else - AH3 e1=FsrRcasLoadHx2(sp1).rgb; - #endif - AH3 f1=FsrRcasLoadHx2(sp1+ASW2( 1, 0)).rgb; - AH3 h1=FsrRcasLoadHx2(sp1+ASW2( 0, 1)).rgb; - // Arrays of Structures to Structures of Arrays conversion. - AH2 bR=AH2(b0.r,b1.r); - AH2 bG=AH2(b0.g,b1.g); - AH2 bB=AH2(b0.b,b1.b); - AH2 dR=AH2(d0.r,d1.r); - AH2 dG=AH2(d0.g,d1.g); - AH2 dB=AH2(d0.b,d1.b); - AH2 eR=AH2(e0.r,e1.r); - AH2 eG=AH2(e0.g,e1.g); - AH2 eB=AH2(e0.b,e1.b); - AH2 fR=AH2(f0.r,f1.r); - AH2 fG=AH2(f0.g,f1.g); - AH2 fB=AH2(f0.b,f1.b); - AH2 hR=AH2(h0.r,h1.r); - AH2 hG=AH2(h0.g,h1.g); - AH2 hB=AH2(h0.b,h1.b); - // Run optional input transform. - FsrRcasInputHx2(bR,bG,bB); - FsrRcasInputHx2(dR,dG,dB); - FsrRcasInputHx2(eR,eG,eB); - FsrRcasInputHx2(fR,fG,fB); - FsrRcasInputHx2(hR,hG,hB); - // Luma times 2. - AH2 bL=bB*AH2_(0.5)+(bR*AH2_(0.5)+bG); - AH2 dL=dB*AH2_(0.5)+(dR*AH2_(0.5)+dG); - AH2 eL=eB*AH2_(0.5)+(eR*AH2_(0.5)+eG); - AH2 fL=fB*AH2_(0.5)+(fR*AH2_(0.5)+fG); - AH2 hL=hB*AH2_(0.5)+(hR*AH2_(0.5)+hG); - // Noise detection. - AH2 nz=AH2_(0.25)*bL+AH2_(0.25)*dL+AH2_(0.25)*fL+AH2_(0.25)*hL-eL; - nz=ASatH2(abs(nz)*APrxMedRcpH2(AMax3H2(AMax3H2(bL,dL,eL),fL,hL)-AMin3H2(AMin3H2(bL,dL,eL),fL,hL))); - nz=AH2_(-0.5)*nz+AH2_(1.0); - // Min and max of ring. - AH2 mn4R=min(AMin3H2(bR,dR,fR),hR); - AH2 mn4G=min(AMin3H2(bG,dG,fG),hG); - AH2 mn4B=min(AMin3H2(bB,dB,fB),hB); - AH2 mx4R=max(AMax3H2(bR,dR,fR),hR); - AH2 mx4G=max(AMax3H2(bG,dG,fG),hG); - AH2 mx4B=max(AMax3H2(bB,dB,fB),hB); - // Immediate constants for peak range. - AH2 peakC=AH2(1.0,-1.0*4.0); - // Limiters, these need to be high precision RCPs. - AH2 hitMinR=min(mn4R,eR)*ARcpH2(AH2_(4.0)*mx4R); - AH2 hitMinG=min(mn4G,eG)*ARcpH2(AH2_(4.0)*mx4G); - AH2 hitMinB=min(mn4B,eB)*ARcpH2(AH2_(4.0)*mx4B); - AH2 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpH2(AH2_(4.0)*mn4R+peakC.y); - AH2 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpH2(AH2_(4.0)*mn4G+peakC.y); - AH2 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpH2(AH2_(4.0)*mn4B+peakC.y); - AH2 lobeR=max(-hitMinR,hitMaxR); - AH2 lobeG=max(-hitMinG,hitMaxG); - AH2 lobeB=max(-hitMinB,hitMaxB); - AH2 lobe=max(AH2_(-FSR_RCAS_LIMIT),min(AMax3H2(lobeR,lobeG,lobeB),AH2_(0.0)))*AH2_(AH2_AU1(con.y).x); - // Apply noise removal. - #ifdef FSR_RCAS_DENOISE - lobe*=nz; - #endif - // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. - AH2 rcpL=APrxMedRcpH2(AH2_(4.0)*lobe+AH2_(1.0)); - pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; - pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; - pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// FSR - [LFGA] LINEAR FILM GRAIN APPLICATOR -// -//------------------------------------------------------------------------------------------------------------------------------ -// Adding output-resolution film grain after scaling is a good way to mask both rendering and scaling artifacts. -// Suggest using tiled blue noise as film grain input, with peak noise frequency set for a specific look and feel. -// The 'Lfga*()' functions provide a convenient way to introduce grain. -// These functions limit grain based on distance to signal limits. -// This is done so that the grain is temporally energy preserving, and thus won't modify image tonality. -// Grain application should be done in a linear colorspace. -// The grain should be temporally changing, but have a temporal sum per pixel that adds to zero (non-biased). -//------------------------------------------------------------------------------------------------------------------------------ -// Usage, -// FsrLfga*( -// color, // In/out linear colorspace color {0 to 1} ranged. -// grain, // Per pixel grain texture value {-0.5 to 0.5} ranged, input is 3-channel to support colored grain. -// amount); // Amount of grain (0 to 1} ranged. -//------------------------------------------------------------------------------------------------------------------------------ -// Example if grain texture is monochrome: 'FsrLfgaF(color,AF3_(grain),amount)' -//============================================================================================================================== -#if defined(A_GPU) - // Maximum grain is the minimum distance to the signal limit. - void FsrLfgaF(inout AF3 c,AF3 t,AF1 a){c+=(t*AF3_(a))*min(AF3_(1.0)-c,c);} -#endif -//============================================================================================================================== -#if defined(A_GPU)&&defined(A_HALF) - // Half precision version (slower). - void FsrLfgaH(inout AH3 c,AH3 t,AH1 a){c+=(t*AH3_(a))*min(AH3_(1.0)-c,c);} -//------------------------------------------------------------------------------------------------------------------------------ - // Packed half precision version (faster). - void FsrLfgaHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 tR,AH2 tG,AH2 tB,AH1 a){ - cR+=(tR*AH2_(a))*min(AH2_(1.0)-cR,cR);cG+=(tG*AH2_(a))*min(AH2_(1.0)-cG,cG);cB+=(tB*AH2_(a))*min(AH2_(1.0)-cB,cB);} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// FSR - [SRTM] SIMPLE REVERSIBLE TONE-MAPPER -// -//------------------------------------------------------------------------------------------------------------------------------ -// This provides a way to take linear HDR color {0 to FP16_MAX} and convert it into a temporary {0 to 1} ranged post-tonemapped linear. -// The tonemapper preserves RGB ratio, which helps maintain HDR color bleed during filtering. -//------------------------------------------------------------------------------------------------------------------------------ -// Reversible tonemapper usage, -// FsrSrtm*(color); // {0 to FP16_MAX} converted to {0 to 1}. -// FsrSrtmInv*(color); // {0 to 1} converted into {0 to 32768, output peak safe for FP16}. -//============================================================================================================================== -#if defined(A_GPU) - void FsrSrtmF(inout AF3 c){c*=AF3_(ARcpF1(AMax3F1(c.r,c.g,c.b)+AF1_(1.0)));} - // The extra max solves the c=1.0 case (which is a /0). - void FsrSrtmInvF(inout AF3 c){c*=AF3_(ARcpF1(max(AF1_(1.0/32768.0),AF1_(1.0)-AMax3F1(c.r,c.g,c.b))));} -#endif -//============================================================================================================================== -#if defined(A_GPU)&&defined(A_HALF) - void FsrSrtmH(inout AH3 c){c*=AH3_(ARcpH1(AMax3H1(c.r,c.g,c.b)+AH1_(1.0)));} - void FsrSrtmInvH(inout AH3 c){c*=AH3_(ARcpH1(max(AH1_(1.0/32768.0),AH1_(1.0)-AMax3H1(c.r,c.g,c.b))));} -//------------------------------------------------------------------------------------------------------------------------------ - void FsrSrtmHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB){ - AH2 rcp=ARcpH2(AMax3H2(cR,cG,cB)+AH2_(1.0));cR*=rcp;cG*=rcp;cB*=rcp;} - void FsrSrtmInvHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB){ - AH2 rcp=ARcpH2(max(AH2_(1.0/32768.0),AH2_(1.0)-AMax3H2(cR,cG,cB)));cR*=rcp;cG*=rcp;cB*=rcp;} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// FSR - [TEPD] TEMPORAL ENERGY PRESERVING DITHER -// -//------------------------------------------------------------------------------------------------------------------------------ -// Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion. -// Gamma 2.0 is used so that the conversion back to linear is just to square the color. -// The conversion comes in 8-bit and 10-bit modes, designed for output to 8-bit UNORM or 10:10:10:2 respectively. -// Given good non-biased temporal blue noise as dither input, -// the output dither will temporally conserve energy. -// This is done by choosing the linear nearest step point instead of perceptual nearest. -// See code below for details. -//------------------------------------------------------------------------------------------------------------------------------ -// DX SPEC RULES FOR FLOAT->UNORM 8-BIT CONVERSION -// =============================================== -// - Output is 'uint(floor(saturate(n)*255.0+0.5))'. -// - Thus rounding is to nearest. -// - NaN gets converted to zero. -// - INF is clamped to {0.0 to 1.0}. -//============================================================================================================================== -#if defined(A_GPU) - // Hand tuned integer position to dither value, with more values than simple checkerboard. - // Only 32-bit has enough precision for this compddation. - // Output is {0 to <1}. - AF1 FsrTepdDitF(AU2 p,AU1 f){ - AF1 x=AF1_(p.x+f); - AF1 y=AF1_(p.y); - // The 1.61803 golden ratio. - AF1 a=AF1_((1.0+sqrt(5.0))/2.0); - // Number designed to provide a good visual pattern. - AF1 b=AF1_(1.0/3.69); - x=x*a+(y*b); - return AFractF1(x);} -//------------------------------------------------------------------------------------------------------------------------------ - // This version is 8-bit gamma 2.0. - // The 'c' input is {0 to 1}. - // Output is {0 to 1} ready for image store. - void FsrTepdC8F(inout AF3 c,AF1 dit){ - AF3 n=sqrt(c); - n=floor(n*AF3_(255.0))*AF3_(1.0/255.0); - AF3 a=n*n; - AF3 b=n+AF3_(1.0/255.0);b=b*b; - // Ratio of 'a' to 'b' required to produce 'c'. - // APrxLoRcpF1() won't work here (at least for very high dynamic ranges). - // APrxMedRcpF1() is an IADD,FMA,MUL. - AF3 r=(c-b)*APrxMedRcpF3(a-b); - // Use the ratio as a cutoff to choose 'a' or 'b'. - // AGtZeroF1() is a MUL. - c=ASatF3(n+AGtZeroF3(AF3_(dit)-r)*AF3_(1.0/255.0));} -//------------------------------------------------------------------------------------------------------------------------------ - // This version is 10-bit gamma 2.0. - // The 'c' input is {0 to 1}. - // Output is {0 to 1} ready for image store. - void FsrTepdC10F(inout AF3 c,AF1 dit){ - AF3 n=sqrt(c); - n=floor(n*AF3_(1023.0))*AF3_(1.0/1023.0); - AF3 a=n*n; - AF3 b=n+AF3_(1.0/1023.0);b=b*b; - AF3 r=(c-b)*APrxMedRcpF3(a-b); - c=ASatF3(n+AGtZeroF3(AF3_(dit)-r)*AF3_(1.0/1023.0));} -#endif -//============================================================================================================================== -#if defined(A_GPU)&&defined(A_HALF) - AH1 FsrTepdDitH(AU2 p,AU1 f){ - AF1 x=AF1_(p.x+f); - AF1 y=AF1_(p.y); - AF1 a=AF1_((1.0+sqrt(5.0))/2.0); - AF1 b=AF1_(1.0/3.69); - x=x*a+(y*b); - return AH1(AFractF1(x));} -//------------------------------------------------------------------------------------------------------------------------------ - void FsrTepdC8H(inout AH3 c,AH1 dit){ - AH3 n=sqrt(c); - n=floor(n*AH3_(255.0))*AH3_(1.0/255.0); - AH3 a=n*n; - AH3 b=n+AH3_(1.0/255.0);b=b*b; - AH3 r=(c-b)*APrxMedRcpH3(a-b); - c=ASatH3(n+AGtZeroH3(AH3_(dit)-r)*AH3_(1.0/255.0));} -//------------------------------------------------------------------------------------------------------------------------------ - void FsrTepdC10H(inout AH3 c,AH1 dit){ - AH3 n=sqrt(c); - n=floor(n*AH3_(1023.0))*AH3_(1.0/1023.0); - AH3 a=n*n; - AH3 b=n+AH3_(1.0/1023.0);b=b*b; - AH3 r=(c-b)*APrxMedRcpH3(a-b); - c=ASatH3(n+AGtZeroH3(AH3_(dit)-r)*AH3_(1.0/1023.0));} -//============================================================================================================================== - // This computes dither for positions 'p' and 'p+{8,0}'. - AH2 FsrTepdDitHx2(AU2 p,AU1 f){ - AF2 x; - x.x=AF1_(p.x+f); - x.y=x.x+AF1_(8.0); - AF1 y=AF1_(p.y); - AF1 a=AF1_((1.0+sqrt(5.0))/2.0); - AF1 b=AF1_(1.0/3.69); - x=x*AF2_(a)+AF2_(y*b); - return AH2(AFractF2(x));} -//------------------------------------------------------------------------------------------------------------------------------ - void FsrTepdC8Hx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 dit){ - AH2 nR=sqrt(cR); - AH2 nG=sqrt(cG); - AH2 nB=sqrt(cB); - nR=floor(nR*AH2_(255.0))*AH2_(1.0/255.0); - nG=floor(nG*AH2_(255.0))*AH2_(1.0/255.0); - nB=floor(nB*AH2_(255.0))*AH2_(1.0/255.0); - AH2 aR=nR*nR; - AH2 aG=nG*nG; - AH2 aB=nB*nB; - AH2 bR=nR+AH2_(1.0/255.0);bR=bR*bR; - AH2 bG=nG+AH2_(1.0/255.0);bG=bG*bG; - AH2 bB=nB+AH2_(1.0/255.0);bB=bB*bB; - AH2 rR=(cR-bR)*APrxMedRcpH2(aR-bR); - AH2 rG=(cG-bG)*APrxMedRcpH2(aG-bG); - AH2 rB=(cB-bB)*APrxMedRcpH2(aB-bB); - cR=ASatH2(nR+AGtZeroH2(dit-rR)*AH2_(1.0/255.0)); - cG=ASatH2(nG+AGtZeroH2(dit-rG)*AH2_(1.0/255.0)); - cB=ASatH2(nB+AGtZeroH2(dit-rB)*AH2_(1.0/255.0));} -//------------------------------------------------------------------------------------------------------------------------------ - void FsrTepdC10Hx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 dit){ - AH2 nR=sqrt(cR); - AH2 nG=sqrt(cG); - AH2 nB=sqrt(cB); - nR=floor(nR*AH2_(1023.0))*AH2_(1.0/1023.0); - nG=floor(nG*AH2_(1023.0))*AH2_(1.0/1023.0); - nB=floor(nB*AH2_(1023.0))*AH2_(1.0/1023.0); - AH2 aR=nR*nR; - AH2 aG=nG*nG; - AH2 aB=nB*nB; - AH2 bR=nR+AH2_(1.0/1023.0);bR=bR*bR; - AH2 bG=nG+AH2_(1.0/1023.0);bG=bG*bG; - AH2 bB=nB+AH2_(1.0/1023.0);bB=bB*bB; - AH2 rR=(cR-bR)*APrxMedRcpH2(aR-bR); - AH2 rG=(cG-bG)*APrxMedRcpH2(aG-bG); - AH2 rB=(cB-bB)*APrxMedRcpH2(aB-bB); - cR=ASatH2(nR+AGtZeroH2(dit-rR)*AH2_(1.0/1023.0)); - cG=ASatH2(nG+AGtZeroH2(dit-rG)*AH2_(1.0/1023.0)); - cB=ASatH2(nB+AGtZeroH2(dit-rB)*AH2_(1.0/1023.0));} -#endif diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/ffx_fsr1.hlsl.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/ffx_fsr1.hlsl.meta deleted file mode 100644 index 5c25ec3..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR1/ffx_fsr1.hlsl.meta +++ /dev/null @@ -1,9 +0,0 @@ -fileFormatVersion: 2 -guid: 68ca247feb098b247b34478c0d6e1712 -ShaderImporter: - externalObjects: {} - defaultTextures: [] - nonModifiableTextures: [] - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3.meta deleted file mode 100644 index 14f23a0..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: 3915f68f4f2254b4b80980d89504c01d -folderAsset: yes -DefaultImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_accumulate_pass.compute b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_accumulate_pass.compute deleted file mode 100644 index 5cbfb80..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_accumulate_pass.compute +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2023 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#pragma kernel CS - -#pragma multi_compile_local __ FFX_HALF -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING - -#pragma multi_compile_local __ UNITY_FSR3UPSCALER_HDRP - -#include "ffx_fsr3upscaler_unity_common.cginc" - -// Ensure the correct value is defined for this keyword, as it is used to select one of multiple sampler functions -#ifdef FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE -#undef FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE -#define FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE 1 -#endif - -#include "shaders/ffx_fsr3upscaler_accumulate_pass.hlsl" diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_accumulate_pass.compute.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_accumulate_pass.compute.meta deleted file mode 100644 index dbe5282..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_accumulate_pass.compute.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: c9b45f0ae7673694ba57a4aadfe212e9 -ComputeShaderImporter: - externalObjects: {} - preprocessorOverride: 0 - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_autogen_reactive_pass.compute b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_autogen_reactive_pass.compute deleted file mode 100644 index e13c001..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_autogen_reactive_pass.compute +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (c) 2023 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#pragma kernel CS - -#pragma multi_compile_local __ FFX_HALF -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH - -#pragma multi_compile_local __ UNITY_FSR3UPSCALER_HDRP - -#include "ffx_fsr3upscaler_unity_common.cginc" - -#include "shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl" diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_autogen_reactive_pass.compute.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_autogen_reactive_pass.compute.meta deleted file mode 100644 index 1df041b..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_autogen_reactive_pass.compute.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: 5716b91fdaa4e9e439df6b96a796fe6e -ComputeShaderImporter: - externalObjects: {} - preprocessorOverride: 0 - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_compute_luminance_pyramid_pass.compute b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_compute_luminance_pyramid_pass.compute deleted file mode 100644 index d5903c0..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_compute_luminance_pyramid_pass.compute +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2023 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#pragma kernel CS - -#pragma multi_compile_local __ FFX_HALF -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH - -#pragma multi_compile_local __ UNITY_FSR3UPSCALER_HDRP - -#include "ffx_fsr3upscaler_unity_common.cginc" - -// Wave operations require shader model 6.0; this can only be enabled when using DXC on D3D12 -// These pragmas are commented out by default as Unity will sometimes ignore the #if's and try to enable these features anyway. -// Uncomment the below lines if you intend to try wave operations on DX12 with the DXC compiler. -//#if defined(UNITY_COMPILER_DXC) && defined(SHADER_API_D3D12) -//#pragma require WaveBasic // Required for WaveGetLaneIndex -//#pragma require WaveBallot // Required for WaveReadLaneAt -//#else -#define FFX_SPD_NO_WAVE_OPERATIONS -//#endif - -#include "shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl" diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_compute_luminance_pyramid_pass.compute.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_compute_luminance_pyramid_pass.compute.meta deleted file mode 100644 index 9e002c0..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_compute_luminance_pyramid_pass.compute.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: d253be05abcdc80428503d3e4cce3a36 -ComputeShaderImporter: - externalObjects: {} - preprocessorOverride: 0 - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_depth_clip_pass.compute b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_depth_clip_pass.compute deleted file mode 100644 index 0ccd388..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_depth_clip_pass.compute +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (c) 2023 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#pragma kernel CS - -#pragma multi_compile_local __ FFX_HALF -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH - -#pragma multi_compile_local __ UNITY_FSR3UPSCALER_HDRP - -#include "ffx_fsr3upscaler_unity_common.cginc" - -#include "shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl" diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_depth_clip_pass.compute.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_depth_clip_pass.compute.meta deleted file mode 100644 index d695f48..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_depth_clip_pass.compute.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: 20e44016ed34b0d4b8de499d1b566c69 -ComputeShaderImporter: - externalObjects: {} - preprocessorOverride: 0 - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_lock_pass.compute b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_lock_pass.compute deleted file mode 100644 index e38ad99..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_lock_pass.compute +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) 2023 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#pragma kernel CS - -#pragma multi_compile_local __ FFX_HALF -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH - -#include "ffx_fsr3upscaler_unity_common.cginc" - -#include "shaders/ffx_fsr3upscaler_lock_pass.hlsl" diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_lock_pass.compute.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_lock_pass.compute.meta deleted file mode 100644 index c01e009..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_lock_pass.compute.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: a135306e6d1857e43a86ef20db2a47fe -ComputeShaderImporter: - externalObjects: {} - preprocessorOverride: 0 - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_rcas_pass.compute b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_rcas_pass.compute deleted file mode 100644 index be7bbb5..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_rcas_pass.compute +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2023 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#pragma kernel CS - -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH - -#pragma multi_compile_local __ UNITY_FSR3UPSCALER_HDRP - -#include "ffx_fsr3upscaler_unity_common.cginc" - -#include "shaders/ffx_fsr3upscaler_rcas_pass.hlsl" diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_rcas_pass.compute.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_rcas_pass.compute.meta deleted file mode 100644 index cd12641..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_rcas_pass.compute.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: 7aaf5cfff022de2499e9b0412f947f6c -ComputeShaderImporter: - externalObjects: {} - preprocessorOverride: 0 - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_reconstruct_previous_depth_pass.compute b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_reconstruct_previous_depth_pass.compute deleted file mode 100644 index ee2f276..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_reconstruct_previous_depth_pass.compute +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) 2023 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#pragma kernel CS - -#pragma multi_compile_local __ FFX_HALF -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH - -#pragma multi_compile_local __ UNITY_FSR3UPSCALER_HDRP - -#include "ffx_fsr3upscaler_unity_common.cginc" - -#include "shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl" diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_reconstruct_previous_depth_pass.compute.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_reconstruct_previous_depth_pass.compute.meta deleted file mode 100644 index 1053c34..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_reconstruct_previous_depth_pass.compute.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: 4f59e5b9179d74844ae06a30ae1e0629 -ComputeShaderImporter: - externalObjects: {} - preprocessorOverride: 0 - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_tcr_autogen_pass.compute b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_tcr_autogen_pass.compute deleted file mode 100644 index 6338918..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_tcr_autogen_pass.compute +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (c) 2023 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#pragma kernel CS - -#pragma multi_compile_local __ FFX_HALF -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH - -#pragma multi_compile_local __ UNITY_FSR3UPSCALER_HDRP - -#include "ffx_fsr3upscaler_unity_common.cginc" - -#include "shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl" diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_tcr_autogen_pass.compute.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_tcr_autogen_pass.compute.meta deleted file mode 100644 index ad42fbb..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_tcr_autogen_pass.compute.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: 75cdc6ef23f08ed498d4da511923fcea -ComputeShaderImporter: - externalObjects: {} - preprocessorOverride: 0 - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_unity_common.cginc b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_unity_common.cginc deleted file mode 100644 index 758bb0c..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_unity_common.cginc +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2023 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -// Suppress a few warnings produced by FFX's HLSL code -#pragma warning(disable: 3078) // Loop control variable conflicts -#pragma warning(disable: 3203) // Signed/unsigned mismatch - -#define FFX_GPU // Compiling for GPU -#define FFX_HLSL // Compile for plain HLSL - -// Use the DXC shader compiler on modern graphics APIs to enable a few advanced features -// The DXC-related pragmas are disabled by default, as DXC doesn't support all platforms yet and will break on some platforms when enabled. -// Consider this to be an experimental feature. If you want to benefit from 16-bit floating point and wave operations, and don't care about supporting older graphics APIs, then it's worth a try. -//#if defined(SHADER_API_D3D12) || defined(SHADER_API_VULKAN) || defined(SHADER_API_METAL) -//#pragma use_dxc // Using DXC will currently break DX11 support since DX11 and DX12 share the same shader bytecode in Unity. -//#endif - -// Enable half precision data types on platforms that support it -//#if defined(UNITY_COMPILER_DXC) && defined(FFX_HALF) -//#pragma require Native16Bit -//#endif - -// Hack to work around the lack of texture atomics on Metal -#if defined(SHADER_API_METAL) -#define InterlockedAdd(dest, val, orig) { (orig) = (dest); (dest) += (val); } -#define InterlockedMin(dest, val) { (dest) = min((dest), (val)); } -#define InterlockedMax(dest, val) { (dest) = max((dest), (val)); } -#endif - -// Workaround for HDRP using texture arrays for its camera buffers on some platforms -// The below defines are copied from: Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/TextureXR.hlsl -#if defined(UNITY_FSR3UPSCALER_HDRP) - // Must be in sync with C# with property useTexArray in TextureXR.cs - #if ((defined(SHADER_API_D3D11) || defined(SHADER_API_D3D12)) && !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_GAMECORE)) || defined(SHADER_API_PSSL) || defined(SHADER_API_VULKAN) - #define UNITY_TEXTURE2D_X_ARRAY_SUPPORTED - #endif - - // Control if TEXTURE2D_X macros will expand to texture arrays - #if defined(UNITY_TEXTURE2D_X_ARRAY_SUPPORTED) && !defined(DISABLE_TEXTURE2D_X_ARRAY) - #define USE_TEXTURE2D_X_AS_ARRAY - #endif - - // Early defines for single-pass instancing - #if defined(STEREO_INSTANCING_ON) && defined(UNITY_TEXTURE2D_X_ARRAY_SUPPORTED) - #define UNITY_STEREO_INSTANCING_ENABLED - #endif - - // Helper macros to handle XR single-pass with Texture2DArray - #if defined(USE_TEXTURE2D_X_AS_ARRAY) - - // Only single-pass stereo instancing used array indexing - #if defined(UNITY_STEREO_INSTANCING_ENABLED) - #define SLICE_ARRAY_INDEX unity_StereoEyeIndex - #else - #define SLICE_ARRAY_INDEX 0 - #endif - - // Declare and sample camera buffers as texture arrays - #define UNITY_FSR3_TEX2D(type) Texture2DArray - #define UNITY_FSR3_RWTEX2D(type) RWTexture2DArray - #define UNITY_FSR3_POS(pxPos) FfxUInt32x3(pxPos, SLICE_ARRAY_INDEX) - #define UNITY_FSR3_UV(uv) FfxFloat32x3(uv, SLICE_ARRAY_INDEX) - - #endif -#endif diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_unity_common.cginc.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_unity_common.cginc.meta deleted file mode 100644 index 5a68b6c..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/ffx_fsr3upscaler_unity_common.cginc.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 3ce00ba677bb7e14bb91772fd68bfe6b -ShaderIncludeImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders.meta deleted file mode 100644 index 8a4ff2b..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: 8364d4f86c613ec4d999d062f5f773b8 -folderAsset: yes -DefaultImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl deleted file mode 100644 index d2f1b32..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl +++ /dev/null @@ -1,79 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 0 -#define FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS 1 -#if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS -#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 2 -#else -#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS 2 -#endif -#define FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED 3 -#define FSR3UPSCALER_BIND_SRV_LOCK_STATUS 4 -#define FSR3UPSCALER_BIND_SRV_PREPARED_INPUT_COLOR 5 -#define FSR3UPSCALER_BIND_SRV_LANCZOS_LUT 6 -#define FSR3UPSCALER_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT 7 -#define FSR3UPSCALER_BIND_SRV_SCENE_LUMINANCE_MIPS 8 -#define FSR3UPSCALER_BIND_SRV_AUTO_EXPOSURE 9 -#define FSR3UPSCALER_BIND_SRV_LUMA_HISTORY 10 - -#define FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED 0 -#define FSR3UPSCALER_BIND_UAV_LOCK_STATUS 1 -#define FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT 2 -#define FSR3UPSCALER_BIND_UAV_NEW_LOCKS 3 -#define FSR3UPSCALER_BIND_UAV_LUMA_HISTORY 4 - -#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 - -#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" -#include "fsr3upscaler/ffx_fsr3upscaler_common.h" -#include "fsr3upscaler/ffx_fsr3upscaler_sample.h" -#include "fsr3upscaler/ffx_fsr3upscaler_upsample.h" -#include "fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h" -#include "fsr3upscaler/ffx_fsr3upscaler_reproject.h" -#include "fsr3upscaler/ffx_fsr3upscaler_accumulate.h" - -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH -#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT -#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 -#endif // FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH -#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH -#ifndef FFX_FSR3UPSCALER_NUM_THREADS -#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] -#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS - -FFX_PREFER_WAVE64 -FFX_FSR3UPSCALER_NUM_THREADS -FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT -void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) -{ - const uint GroupRows = (uint(DisplaySize().y) + FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT - 1) / FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT; - uGroupId.y = GroupRows - uGroupId.y - 1; - - uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT) + uGroupThreadId; - - Accumulate(uDispatchThreadId); -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl.meta deleted file mode 100644 index 80f209e..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 507ab779c38eddb429cdcedf9c108d1b -ShaderIncludeImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl deleted file mode 100644 index 0d6e2eb..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl +++ /dev/null @@ -1,77 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#define FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY 0 -#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 1 - -#define FSR3UPSCALER_BIND_UAV_AUTOREACTIVE 0 - -#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 -#define FSR3UPSCALER_BIND_CB_REACTIVE 1 - -#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" -#include "fsr3upscaler/ffx_fsr3upscaler_common.h" - -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH -#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT -#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 -#endif // FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH -#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH -#ifndef FFX_FSR3UPSCALER_NUM_THREADS -#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] -#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS - -FFX_FSR3UPSCALER_NUM_THREADS -FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT -void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) -{ - uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT) + uGroupThreadId; - - float3 ColorPreAlpha = LoadOpaqueOnly( FFX_MIN16_I2(uDispatchThreadId) ).rgb; - float3 ColorPostAlpha = LoadInputColor(uDispatchThreadId).rgb; - - if (GenReactiveFlags() & FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_TONEMAP) - { - ColorPreAlpha = Tonemap(ColorPreAlpha); - ColorPostAlpha = Tonemap(ColorPostAlpha); - } - - if (GenReactiveFlags() & FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP) - { - ColorPreAlpha = InverseTonemap(ColorPreAlpha); - ColorPostAlpha = InverseTonemap(ColorPostAlpha); - } - - float out_reactive_value = 0.f; - float3 delta = abs(ColorPostAlpha - ColorPreAlpha); - - out_reactive_value = (GenReactiveFlags() & FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX) ? max(delta.x, max(delta.y, delta.z)) : length(delta); - out_reactive_value *= GenReactiveScale(); - - out_reactive_value = (GenReactiveFlags() & FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_THRESHOLD) ? (out_reactive_value < GenReactiveThreshold() ? 0 : GenReactiveBinaryValue()) : out_reactive_value; - - rw_output_autoreactive[uDispatchThreadId] = out_reactive_value; -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl.meta deleted file mode 100644 index c55f004..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 52cdb7a7c30cb614984908593ed19082 -ShaderIncludeImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl deleted file mode 100644 index 93b7332..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl +++ /dev/null @@ -1,55 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 0 - -#define FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC 0 -#define FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 1 -#define FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5 2 -#define FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE 3 - -#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 -#define FSR3UPSCALER_BIND_CB_SPD 1 - -#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" -#include "fsr3upscaler/ffx_fsr3upscaler_common.h" -#include "fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h" - -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH -#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 256 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT -#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 1 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH -#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH -#ifndef FFX_FSR3UPSCALER_NUM_THREADS -#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] -#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS - -FFX_FSR3UPSCALER_NUM_THREADS -FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT -void CS(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) -{ - ComputeAutoExposure(WorkGroupId, LocalThreadIndex); -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl.meta deleted file mode 100644 index 508b43e..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 2d149b52ba0f5bb468a94a71dbbcb66f -ShaderIncludeImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl deleted file mode 100644 index 70cc7ba..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl +++ /dev/null @@ -1,67 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#define FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 -#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 1 -#define FSR3UPSCALER_BIND_SRV_DILATED_DEPTH 2 -#define FSR3UPSCALER_BIND_SRV_REACTIVE_MASK 3 -#define FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 4 -#define FSR3UPSCALER_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS 5 -#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS 6 -#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 7 -#define FSR3UPSCALER_BIND_SRV_INPUT_DEPTH 8 -#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 9 - -#define FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS 0 -#define FSR3UPSCALER_BIND_UAV_PREPARED_INPUT_COLOR 1 - -#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 - -#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" -#include "fsr3upscaler/ffx_fsr3upscaler_common.h" -#include "fsr3upscaler/ffx_fsr3upscaler_sample.h" -#include "fsr3upscaler/ffx_fsr3upscaler_depth_clip.h" - -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH -#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT -#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH -#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH -#ifndef FFX_FSR3UPSCALER_NUM_THREADS -#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] -#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS - -FFX_PREFER_WAVE64 -FFX_FSR3UPSCALER_NUM_THREADS -FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT -void CS( - int2 iGroupId : SV_GroupID, - int2 iDispatchThreadId : SV_DispatchThreadID, - int2 iGroupThreadId : SV_GroupThreadID, - int iGroupIndex : SV_GroupIndex) -{ - DepthClip(iDispatchThreadId); -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl.meta deleted file mode 100644 index cde3a5e..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: da435b71cf57e2247b80ae0f0f86d1f8 -ShaderIncludeImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl deleted file mode 100644 index 26b28f0..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl +++ /dev/null @@ -1,56 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#define FSR3UPSCALER_BIND_SRV_LOCK_INPUT_LUMA 0 - -#define FSR3UPSCALER_BIND_UAV_NEW_LOCKS 0 -#define FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 1 - -#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 - -#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" -#include "fsr3upscaler/ffx_fsr3upscaler_common.h" -#include "fsr3upscaler/ffx_fsr3upscaler_sample.h" -#include "fsr3upscaler/ffx_fsr3upscaler_lock.h" - -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH -#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT -#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH -#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH -#ifndef FFX_FSR3UPSCALER_NUM_THREADS -#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] -#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS - -FFX_PREFER_WAVE64 -FFX_FSR3UPSCALER_NUM_THREADS -FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT -void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) -{ - uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT) + uGroupThreadId; - - ComputeLock(uDispatchThreadId); -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl.meta deleted file mode 100644 index 45c99dc..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 98d2cbbda5e90dd4ebd1d70abbb63a09 -ShaderIncludeImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl deleted file mode 100644 index bebdeb3..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl +++ /dev/null @@ -1,53 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 0 -#define FSR3UPSCALER_BIND_SRV_RCAS_INPUT 1 - -#define FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT 0 - -#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 -#define FSR3UPSCALER_BIND_CB_RCAS 1 - -#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" -#include "fsr3upscaler/ffx_fsr3upscaler_common.h" -#include "fsr3upscaler/ffx_fsr3upscaler_rcas.h" - -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH -#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 64 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT -#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 1 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH -#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH -#ifndef FFX_FSR3UPSCALER_NUM_THREADS -#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] -#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS - -FFX_FSR3UPSCALER_NUM_THREADS -FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT -void CS(uint3 LocalThreadId : SV_GroupThreadID, uint3 WorkGroupId : SV_GroupID, uint3 Dtid : SV_DispatchThreadID) -{ - RCAS(LocalThreadId, WorkGroupId, Dtid); -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl.meta deleted file mode 100644 index fb9bfe2..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 9a15fc73170a9bc478801c8fa4d8d574 -ShaderIncludeImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl deleted file mode 100644 index f277fd1..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl +++ /dev/null @@ -1,64 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS 0 -#define FSR3UPSCALER_BIND_SRV_INPUT_DEPTH 1 -#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 2 -#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 3 - -#define FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 -#define FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS 1 -#define FSR3UPSCALER_BIND_UAV_DILATED_DEPTH 2 -#define FSR3UPSCALER_BIND_UAV_LOCK_INPUT_LUMA 3 - -#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 - -#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" -#include "fsr3upscaler/ffx_fsr3upscaler_common.h" -#include "fsr3upscaler/ffx_fsr3upscaler_sample.h" -#include "fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h" - -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH -#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT -#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH -#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH -#ifndef FFX_FSR3UPSCALER_NUM_THREADS -#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] -#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS - -FFX_PREFER_WAVE64 -FFX_FSR3UPSCALER_NUM_THREADS -FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT -void CS( - int2 iGroupId : SV_GroupID, - int2 iDispatchThreadId : SV_DispatchThreadID, - int2 iGroupThreadId : SV_GroupThreadID, - int iGroupIndex : SV_GroupIndex -) -{ - ReconstructAndDilate(iDispatchThreadId); -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl.meta deleted file mode 100644 index 6489d6d..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: bafb3726a76b97a49bb343d8a4323754 -ShaderIncludeImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl deleted file mode 100644 index 6180885..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl +++ /dev/null @@ -1,90 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#define FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY 0 -#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 1 -#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS 2 -#define FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR 3 -#define FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR 4 -#define FSR3UPSCALER_BIND_SRV_REACTIVE_MASK 4 -#define FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 5 - -#define FSR3UPSCALER_BIND_UAV_AUTOREACTIVE 0 -#define FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION 1 -#define FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR 2 -#define FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR 3 - -#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 -#define FSR3UPSCALER_BIND_CB_AUTOREACTIVE 1 - -#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" -#include "fsr3upscaler/ffx_fsr3upscaler_common.h" -#include "fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h" - -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH -#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT -#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 -#endif // FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT -#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH -#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 -#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH -#ifndef FFX_FSR3UPSCALER_NUM_THREADS -#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] -#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS - -FFX_FSR3UPSCALER_NUM_THREADS -FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT -void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) -{ - FFX_MIN16_I2 uDispatchThreadId = FFX_MIN16_I2(uGroupId * uint2(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT) + uGroupThreadId); - - // ToDo: take into account jitter (i.e. add delta of previous jitter and current jitter to previous UV - // fetch pre- and post-alpha color values - FFX_MIN16_F2 fUv = ( FFX_MIN16_F2(uDispatchThreadId) + FFX_MIN16_F2(0.5f, 0.5f) ) / FFX_MIN16_F2( RenderSize() ); - FFX_MIN16_F2 fPrevUV = fUv + FFX_MIN16_F2( LoadInputMotionVector(uDispatchThreadId) ); - FFX_MIN16_I2 iPrevIdx = FFX_MIN16_I2(fPrevUV * FFX_MIN16_F2(RenderSize()) - 0.5f); - - FFX_MIN16_F3 colorPreAlpha = FFX_MIN16_F3( LoadOpaqueOnly( uDispatchThreadId ) ); - FFX_MIN16_F3 colorPostAlpha = FFX_MIN16_F3( LoadInputColor( uDispatchThreadId ) ); - - FFX_MIN16_F2 outReactiveMask = 0; - - outReactiveMask.y = ComputeTransparencyAndComposition(uDispatchThreadId, iPrevIdx); - - if (outReactiveMask.y > 0.5f) - { - outReactiveMask.x = ComputeReactive(uDispatchThreadId, iPrevIdx); - outReactiveMask.x *= FFX_MIN16_F(fReactiveScale); - outReactiveMask.x = outReactiveMask.x < fReactiveMax ? outReactiveMask.x : FFX_MIN16_F( fReactiveMax ); - } - - outReactiveMask.y *= FFX_MIN16_F(fTcScale ); - - outReactiveMask.x = max( outReactiveMask.x, FFX_MIN16_F( LoadReactiveMask(uDispatchThreadId) ) ); - outReactiveMask.y = max( outReactiveMask.y, FFX_MIN16_F( LoadTransparencyAndCompositionMask(uDispatchThreadId) ) ); - - StoreAutoReactive(uDispatchThreadId, outReactiveMask); - - StorePrevPreAlpha(uDispatchThreadId, colorPreAlpha); - StorePrevPostAlpha(uDispatchThreadId, colorPostAlpha); -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl.meta deleted file mode 100644 index 02c5f46..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 712d171118b59fc4fb28d0d487060d42 -ShaderIncludeImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler.meta deleted file mode 100644 index 2626a2e..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: ae9c6d015ae76544f9e8117e79ea862b -folderAsset: yes -DefaultImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_common_types.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_common_types.h deleted file mode 100644 index f0b62ab..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_common_types.h +++ /dev/null @@ -1,616 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#ifndef FFX_COMMON_TYPES_H -#define FFX_COMMON_TYPES_H - -#if defined(FFX_CPU) -#define FFX_PARAMETER_IN -#define FFX_PARAMETER_OUT -#define FFX_PARAMETER_INOUT -#define FFX_PARAMETER_UNIFORM -#elif defined(FFX_HLSL) -#define FFX_PARAMETER_IN in -#define FFX_PARAMETER_OUT out -#define FFX_PARAMETER_INOUT inout -#define FFX_PARAMETER_UNIFORM uniform -#elif defined(FFX_GLSL) -#define FFX_PARAMETER_IN in -#define FFX_PARAMETER_OUT out -#define FFX_PARAMETER_INOUT inout -#define FFX_PARAMETER_UNIFORM const //[cacao_placeholder] until a better fit is found! -#endif // #if defined(FFX_CPU) - -#if defined(FFX_CPU) -/// A typedef for a boolean value. -/// -/// @ingroup CPUTypes -typedef bool FfxBoolean; - -/// A typedef for a unsigned 8bit integer. -/// -/// @ingroup CPUTypes -typedef uint8_t FfxUInt8; - -/// A typedef for a unsigned 16bit integer. -/// -/// @ingroup CPUTypes -typedef uint16_t FfxUInt16; - -/// A typedef for a unsigned 32bit integer. -/// -/// @ingroup CPUTypes -typedef uint32_t FfxUInt32; - -/// A typedef for a unsigned 64bit integer. -/// -/// @ingroup CPUTypes -typedef uint64_t FfxUInt64; - -/// A typedef for a signed 8bit integer. -/// -/// @ingroup CPUTypes -typedef int8_t FfxInt8; - -/// A typedef for a signed 16bit integer. -/// -/// @ingroup CPUTypes -typedef int16_t FfxInt16; - -/// A typedef for a signed 32bit integer. -/// -/// @ingroup CPUTypes -typedef int32_t FfxInt32; - -/// A typedef for a signed 64bit integer. -/// -/// @ingroup CPUTypes -typedef int64_t FfxInt64; - -/// A typedef for a floating point value. -/// -/// @ingroup CPUTypes -typedef float FfxFloat32; - -/// A typedef for a 2-dimensional floating point value. -/// -/// @ingroup CPUTypes -typedef float FfxFloat32x2[2]; - -/// A typedef for a 3-dimensional floating point value. -/// -/// @ingroup CPUTypes -typedef float FfxFloat32x3[3]; - -/// A typedef for a 4-dimensional floating point value. -/// -/// @ingroup CPUTypes -typedef float FfxFloat32x4[4]; - -/// A typedef for a 2-dimensional 32bit unsigned integer. -/// -/// @ingroup CPUTypes -typedef uint32_t FfxUInt32x2[2]; - -/// A typedef for a 3-dimensional 32bit unsigned integer. -/// -/// @ingroup CPUTypes -typedef uint32_t FfxUInt32x3[3]; - -/// A typedef for a 4-dimensional 32bit unsigned integer. -/// -/// @ingroup CPUTypes -typedef uint32_t FfxUInt32x4[4]; -#endif // #if defined(FFX_CPU) - -#if defined(FFX_HLSL) - -#define FfxFloat32Mat4 matrix -#define FfxFloat32Mat3 matrix - -/// A typedef for a boolean value. -/// -/// @ingroup HLSLTypes -typedef bool FfxBoolean; - -#if FFX_HLSL_SM>=62 - -/// @defgroup HLSL62Types HLSL 6.2 And Above Types -/// HLSL 6.2 and above type defines for all commonly used variables -/// -/// @ingroup HLSLTypes - -/// A typedef for a floating point value. -/// -/// @ingroup HLSL62Types -typedef float32_t FfxFloat32; - -/// A typedef for a 2-dimensional floating point value. -/// -/// @ingroup HLSL62Types -typedef float32_t2 FfxFloat32x2; - -/// A typedef for a 3-dimensional floating point value. -/// -/// @ingroup HLSL62Types -typedef float32_t3 FfxFloat32x3; - -/// A typedef for a 4-dimensional floating point value. -/// -/// @ingroup HLSL62Types -typedef float32_t4 FfxFloat32x4; - -/// A [cacao_placeholder] typedef for matrix type until confirmed. -typedef float4x4 FfxFloat32x4x4; -typedef float3x3 FfxFloat32x3x3; -typedef float2x2 FfxFloat32x2x2; - -/// A typedef for a unsigned 32bit integer. -/// -/// @ingroup HLSL62Types -typedef uint32_t FfxUInt32; - -/// A typedef for a 2-dimensional 32bit unsigned integer. -/// -/// @ingroup HLSL62Types -typedef uint32_t2 FfxUInt32x2; - -/// A typedef for a 3-dimensional 32bit unsigned integer. -/// -/// @ingroup HLSL62Types -typedef uint32_t3 FfxUInt32x3; - -/// A typedef for a 4-dimensional 32bit unsigned integer. -/// -/// @ingroup HLSL62Types -typedef uint32_t4 FfxUInt32x4; - -/// A typedef for a signed 32bit integer. -/// -/// @ingroup HLSL62Types -typedef int32_t FfxInt32; - -/// A typedef for a 2-dimensional signed 32bit integer. -/// -/// @ingroup HLSL62Types -typedef int32_t2 FfxInt32x2; - -/// A typedef for a 3-dimensional signed 32bit integer. -/// -/// @ingroup HLSL62Types -typedef int32_t3 FfxInt32x3; - -/// A typedef for a 4-dimensional signed 32bit integer. -/// -/// @ingroup HLSL62Types -typedef int32_t4 FfxInt32x4; - -#else // #if FFX_HLSL_SM>=62 - -/// @defgroup HLSLBaseTypes HLSL 6.1 And Below Types -/// HLSL 6.1 and below type defines for all commonly used variables -/// -/// @ingroup HLSLTypes - -#define FfxFloat32 float -#define FfxFloat32x2 float2 -#define FfxFloat32x3 float3 -#define FfxFloat32x4 float4 - -/// A [cacao_placeholder] typedef for matrix type until confirmed. -#define FfxFloat32x4x4 float4x4 -#define FfxFloat32x3x3 float3x3 -#define FfxFloat32x2x2 float2x2 - -/// A typedef for a unsigned 32bit integer. -/// -/// @ingroup GPU -typedef uint FfxUInt32; -typedef uint2 FfxUInt32x2; -typedef uint3 FfxUInt32x3; -typedef uint4 FfxUInt32x4; - -typedef int FfxInt32; -typedef int2 FfxInt32x2; -typedef int3 FfxInt32x3; -typedef int4 FfxInt32x4; - -#endif // #if FFX_HLSL_SM>=62 - -#if FFX_HALF - -#if FFX_HLSL_SM >= 62 - -typedef float16_t FfxFloat16; -typedef float16_t2 FfxFloat16x2; -typedef float16_t3 FfxFloat16x3; -typedef float16_t4 FfxFloat16x4; - -/// A typedef for an unsigned 16bit integer. -/// -/// @ingroup HLSLTypes -typedef uint16_t FfxUInt16; -typedef uint16_t2 FfxUInt16x2; -typedef uint16_t3 FfxUInt16x3; -typedef uint16_t4 FfxUInt16x4; - -/// A typedef for a signed 16bit integer. -/// -/// @ingroup HLSLTypes -typedef int16_t FfxInt16; -typedef int16_t2 FfxInt16x2; -typedef int16_t3 FfxInt16x3; -typedef int16_t4 FfxInt16x4; -#elif SHADER_API_PSSL -#pragma argument(realtypes) // Enable true 16-bit types - -typedef half FfxFloat16; -typedef half2 FfxFloat16x2; -typedef half3 FfxFloat16x3; -typedef half4 FfxFloat16x4; - -/// A typedef for an unsigned 16bit integer. -/// -/// @ingroup GPU -typedef ushort FfxUInt16; -typedef ushort2 FfxUInt16x2; -typedef ushort3 FfxUInt16x3; -typedef ushort4 FfxUInt16x4; - -/// A typedef for a signed 16bit integer. -/// -/// @ingroup GPU -typedef short FfxInt16; -typedef short2 FfxInt16x2; -typedef short3 FfxInt16x3; -typedef short4 FfxInt16x4; -#else // #if FFX_HLSL_SM>=62 -typedef min16float FfxFloat16; -typedef min16float2 FfxFloat16x2; -typedef min16float3 FfxFloat16x3; -typedef min16float4 FfxFloat16x4; - -/// A typedef for an unsigned 16bit integer. -/// -/// @ingroup HLSLTypes -typedef min16uint FfxUInt16; -typedef min16uint2 FfxUInt16x2; -typedef min16uint3 FfxUInt16x3; -typedef min16uint4 FfxUInt16x4; - -/// A typedef for a signed 16bit integer. -/// -/// @ingroup HLSLTypes -typedef min16int FfxInt16; -typedef min16int2 FfxInt16x2; -typedef min16int3 FfxInt16x3; -typedef min16int4 FfxInt16x4; -#endif // #if FFX_HLSL_SM>=62 - -#endif // FFX_HALF - -#endif // #if defined(FFX_HLSL) - -#if defined(FFX_GLSL) - -#define FfxFloat32Mat4 mat4 -#define FfxFloat32Mat3 mat3 - -/// A typedef for a boolean value. -/// -/// @ingroup GLSLTypes -#define FfxBoolean bool -#define FfxFloat32 float -#define FfxFloat32x2 vec2 -#define FfxFloat32x3 vec3 -#define FfxFloat32x4 vec4 -#define FfxUInt32 uint -#define FfxUInt32x2 uvec2 -#define FfxUInt32x3 uvec3 -#define FfxUInt32x4 uvec4 -#define FfxInt32 int -#define FfxInt32x2 ivec2 -#define FfxInt32x3 ivec3 -#define FfxInt32x4 ivec4 - -/// A [cacao_placeholder] typedef for matrix type until confirmed. -#define FfxFloat32x4x4 mat4 -#define FfxFloat32x3x3 mat3 -#define FfxFloat32x2x2 mat2 - -#if FFX_HALF -#define FfxFloat16 float16_t -#define FfxFloat16x2 f16vec2 -#define FfxFloat16x3 f16vec3 -#define FfxFloat16x4 f16vec4 -#define FfxUInt16 uint16_t -#define FfxUInt16x2 u16vec2 -#define FfxUInt16x3 u16vec3 -#define FfxUInt16x4 u16vec4 -#define FfxInt16 int16_t -#define FfxInt16x2 i16vec2 -#define FfxInt16x3 i16vec3 -#define FfxInt16x4 i16vec4 -#endif // FFX_HALF -#endif // #if defined(FFX_GLSL) - -// Global toggles: -// #define FFX_HALF (1) -// #define FFX_HLSL_SM (62) - -#if FFX_HALF && !defined(SHADER_API_PSSL) - -#if FFX_HLSL_SM >= 62 - -#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName; -#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; -#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; - -#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName; -#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; -#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; - -#else //FFX_HLSL_SM>=62 - -#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef min16##BaseComponentType TypeName; -#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; -#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; - -#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) FFX_MIN16_SCALAR( TypeName, BaseComponentType ); -#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ); -#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ); - -#endif //FFX_HLSL_SM>=62 - -#else //FFX_HALF - -#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName; -#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; -#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; - -#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName; -#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; -#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; - -#endif //FFX_HALF - -#if defined(FFX_GPU) -// Common typedefs: -#if defined(FFX_HLSL) && !defined(SHADER_API_PSSL) -FFX_MIN16_SCALAR( FFX_MIN16_F , float ); -FFX_MIN16_VECTOR( FFX_MIN16_F2, float, 2 ); -FFX_MIN16_VECTOR( FFX_MIN16_F3, float, 3 ); -FFX_MIN16_VECTOR( FFX_MIN16_F4, float, 4 ); - -FFX_MIN16_SCALAR( FFX_MIN16_I, int ); -FFX_MIN16_VECTOR( FFX_MIN16_I2, int, 2 ); -FFX_MIN16_VECTOR( FFX_MIN16_I3, int, 3 ); -FFX_MIN16_VECTOR( FFX_MIN16_I4, int, 4 ); - -FFX_MIN16_SCALAR( FFX_MIN16_U, uint ); -FFX_MIN16_VECTOR( FFX_MIN16_U2, uint, 2 ); -FFX_MIN16_VECTOR( FFX_MIN16_U3, uint, 3 ); -FFX_MIN16_VECTOR( FFX_MIN16_U4, uint, 4 ); - -FFX_16BIT_SCALAR( FFX_F16_t , float ); -FFX_16BIT_VECTOR( FFX_F16_t2, float, 2 ); -FFX_16BIT_VECTOR( FFX_F16_t3, float, 3 ); -FFX_16BIT_VECTOR( FFX_F16_t4, float, 4 ); - -FFX_16BIT_SCALAR( FFX_I16_t, int ); -FFX_16BIT_VECTOR( FFX_I16_t2, int, 2 ); -FFX_16BIT_VECTOR( FFX_I16_t3, int, 3 ); -FFX_16BIT_VECTOR( FFX_I16_t4, int, 4 ); - -FFX_16BIT_SCALAR( FFX_U16_t, uint ); -FFX_16BIT_VECTOR( FFX_U16_t2, uint, 2 ); -FFX_16BIT_VECTOR( FFX_U16_t3, uint, 3 ); -FFX_16BIT_VECTOR( FFX_U16_t4, uint, 4 ); - -#define TYPEDEF_MIN16_TYPES(Prefix) \ -typedef FFX_MIN16_F Prefix##_F; \ -typedef FFX_MIN16_F2 Prefix##_F2; \ -typedef FFX_MIN16_F3 Prefix##_F3; \ -typedef FFX_MIN16_F4 Prefix##_F4; \ -typedef FFX_MIN16_I Prefix##_I; \ -typedef FFX_MIN16_I2 Prefix##_I2; \ -typedef FFX_MIN16_I3 Prefix##_I3; \ -typedef FFX_MIN16_I4 Prefix##_I4; \ -typedef FFX_MIN16_U Prefix##_U; \ -typedef FFX_MIN16_U2 Prefix##_U2; \ -typedef FFX_MIN16_U3 Prefix##_U3; \ -typedef FFX_MIN16_U4 Prefix##_U4; - -#define TYPEDEF_16BIT_TYPES(Prefix) \ -typedef FFX_16BIT_F Prefix##_F; \ -typedef FFX_16BIT_F2 Prefix##_F2; \ -typedef FFX_16BIT_F3 Prefix##_F3; \ -typedef FFX_16BIT_F4 Prefix##_F4; \ -typedef FFX_16BIT_I Prefix##_I; \ -typedef FFX_16BIT_I2 Prefix##_I2; \ -typedef FFX_16BIT_I3 Prefix##_I3; \ -typedef FFX_16BIT_I4 Prefix##_I4; \ -typedef FFX_16BIT_U Prefix##_U; \ -typedef FFX_16BIT_U2 Prefix##_U2; \ -typedef FFX_16BIT_U3 Prefix##_U3; \ -typedef FFX_16BIT_U4 Prefix##_U4; - -#define TYPEDEF_FULL_PRECISION_TYPES(Prefix) \ -typedef FfxFloat32 Prefix##_F; \ -typedef FfxFloat32x2 Prefix##_F2; \ -typedef FfxFloat32x3 Prefix##_F3; \ -typedef FfxFloat32x4 Prefix##_F4; \ -typedef FfxInt32 Prefix##_I; \ -typedef FfxInt32x2 Prefix##_I2; \ -typedef FfxInt32x3 Prefix##_I3; \ -typedef FfxInt32x4 Prefix##_I4; \ -typedef FfxUInt32 Prefix##_U; \ -typedef FfxUInt32x2 Prefix##_U2; \ -typedef FfxUInt32x3 Prefix##_U3; \ -typedef FfxUInt32x4 Prefix##_U4; -#endif // #if defined(FFX_HLSL) - -#if defined(SHADER_API_PSSL) - -#define unorm -#define globallycoherent - -#if FFX_HALF - -#define FFX_MIN16_F half -#define FFX_MIN16_F2 half2 -#define FFX_MIN16_F3 half3 -#define FFX_MIN16_F4 half4 - -#define FFX_MIN16_I short -#define FFX_MIN16_I2 short2 -#define FFX_MIN16_I3 short3 -#define FFX_MIN16_I4 short4 - -#define FFX_MIN16_U ushort -#define FFX_MIN16_U2 ushort2 -#define FFX_MIN16_U3 ushort3 -#define FFX_MIN16_U4 ushort4 - -#define FFX_16BIT_F half -#define FFX_16BIT_F2 half2 -#define FFX_16BIT_F3 half3 -#define FFX_16BIT_F4 half4 - -#define FFX_16BIT_I short -#define FFX_16BIT_I2 short2 -#define FFX_16BIT_I3 short3 -#define FFX_16BIT_I4 short4 - -#define FFX_16BIT_U ushort -#define FFX_16BIT_U2 ushort2 -#define FFX_16BIT_U3 ushort3 -#define FFX_16BIT_U4 ushort4 - -#else // FFX_HALF - -#define FFX_MIN16_F float -#define FFX_MIN16_F2 float2 -#define FFX_MIN16_F3 float3 -#define FFX_MIN16_F4 float4 - -#define FFX_MIN16_I int -#define FFX_MIN16_I2 int2 -#define FFX_MIN16_I3 int3 -#define FFX_MIN16_I4 int4 - -#define FFX_MIN16_U uint -#define FFX_MIN16_U2 uint2 -#define FFX_MIN16_U3 uint3 -#define FFX_MIN16_U4 uint4 - -#define FFX_16BIT_F float -#define FFX_16BIT_F2 float2 -#define FFX_16BIT_F3 float3 -#define FFX_16BIT_F4 float4 - -#define FFX_16BIT_I int -#define FFX_16BIT_I2 int2 -#define FFX_16BIT_I3 int3 -#define FFX_16BIT_I4 int4 - -#define FFX_16BIT_U uint -#define FFX_16BIT_U2 uint2 -#define FFX_16BIT_U3 uint3 -#define FFX_16BIT_U4 uint4 - -#endif // FFX_HALF - -#endif // #if defined(SHADER_API_PSSL) - -#if defined(FFX_GLSL) - -#if FFX_HALF - -#define FFX_MIN16_F float16_t -#define FFX_MIN16_F2 f16vec2 -#define FFX_MIN16_F3 f16vec3 -#define FFX_MIN16_F4 f16vec4 - -#define FFX_MIN16_I int16_t -#define FFX_MIN16_I2 i16vec2 -#define FFX_MIN16_I3 i16vec3 -#define FFX_MIN16_I4 i16vec4 - -#define FFX_MIN16_U uint16_t -#define FFX_MIN16_U2 u16vec2 -#define FFX_MIN16_U3 u16vec3 -#define FFX_MIN16_U4 u16vec4 - -#define FFX_16BIT_F float16_t -#define FFX_16BIT_F2 f16vec2 -#define FFX_16BIT_F3 f16vec3 -#define FFX_16BIT_F4 f16vec4 - -#define FFX_16BIT_I int16_t -#define FFX_16BIT_I2 i16vec2 -#define FFX_16BIT_I3 i16vec3 -#define FFX_16BIT_I4 i16vec4 - -#define FFX_16BIT_U uint16_t -#define FFX_16BIT_U2 u16vec2 -#define FFX_16BIT_U3 u16vec3 -#define FFX_16BIT_U4 u16vec4 - -#else // FFX_HALF - -#define FFX_MIN16_F float -#define FFX_MIN16_F2 vec2 -#define FFX_MIN16_F3 vec3 -#define FFX_MIN16_F4 vec4 - -#define FFX_MIN16_I int -#define FFX_MIN16_I2 ivec2 -#define FFX_MIN16_I3 ivec3 -#define FFX_MIN16_I4 ivec4 - -#define FFX_MIN16_U uint -#define FFX_MIN16_U2 uvec2 -#define FFX_MIN16_U3 uvec3 -#define FFX_MIN16_U4 uvec4 - -#define FFX_16BIT_F float -#define FFX_16BIT_F2 vec2 -#define FFX_16BIT_F3 vec3 -#define FFX_16BIT_F4 vec4 - -#define FFX_16BIT_I int -#define FFX_16BIT_I2 ivec2 -#define FFX_16BIT_I3 ivec3 -#define FFX_16BIT_I4 ivec4 - -#define FFX_16BIT_U uint -#define FFX_16BIT_U2 uvec2 -#define FFX_16BIT_U3 uvec3 -#define FFX_16BIT_U4 uvec4 - -#endif // FFX_HALF - -#endif // #if defined(FFX_GLSL) - -#endif // #if defined(FFX_GPU) -#endif // #ifndef FFX_COMMON_TYPES_H diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_common_types.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_common_types.h.meta deleted file mode 100644 index 8d9b15f..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_common_types.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: 7974b728d5c1b6d4a8a8e3965d03f96d -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core.h deleted file mode 100644 index 02f6b3f..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core.h +++ /dev/null @@ -1,80 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -/// @defgroup FfxGPU GPU -/// The FidelityFX SDK GPU References -/// -/// @ingroup ffxSDK - -/// @defgroup FfxHLSL HLSL References -/// FidelityFX SDK HLSL GPU References -/// -/// @ingroup FfxGPU - -/// @defgroup FfxGLSL GLSL References -/// FidelityFX SDK GLSL GPU References -/// -/// @ingroup FfxGPU - -/// @defgroup FfxGPUEffects FidelityFX GPU References -/// FidelityFX Effect GPU Reference Documentation -/// -/// @ingroup FfxGPU - -/// @defgroup GPUCore GPU Core -/// GPU defines and functions -/// -/// @ingroup FfxGPU - -#if !defined(FFX_CORE_H) -#define FFX_CORE_H - -#ifdef __hlsl_dx_compiler -#pragma dxc diagnostic push -#pragma dxc diagnostic ignored "-Wambig-lit-shift" -#endif //__hlsl_dx_compiler - -#include "ffx_common_types.h" - -#if defined(FFX_CPU) - #include "ffx_core_cpu.h" -#endif // #if defined(FFX_CPU) - -#if defined(FFX_GLSL) && defined(FFX_GPU) - #include "ffx_core_glsl.h" -#endif // #if defined(FFX_GLSL) && defined(FFX_GPU) - -#if defined(FFX_HLSL) && defined(FFX_GPU) - #include "ffx_core_hlsl.h" -#endif // #if defined(FFX_HLSL) && defined(FFX_GPU) - -#if defined(FFX_GPU) - #include "ffx_core_gpu_common.h" - #include "ffx_core_gpu_common_half.h" - #include "ffx_core_portability.h" -#endif // #if defined(FFX_GPU) - -#ifdef __hlsl_dx_compiler -#pragma dxc diagnostic pop -#endif //__hlsl_dx_compiler - -#endif // #if !defined(FFX_CORE_H) diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core.h.meta deleted file mode 100644 index 562741a..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: b91c5f52b89ff554dacb51045a802ed8 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h deleted file mode 100644 index 865258d..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h +++ /dev/null @@ -1,338 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -/// A define for a true value in a boolean expression. -/// -/// @ingroup CPUTypes -#define FFX_TRUE (1) - -/// A define for a false value in a boolean expression. -/// -/// @ingroup CPUTypes -#define FFX_FALSE (0) - -#if !defined(FFX_STATIC) -/// A define to abstract declaration of static variables and functions. -/// -/// @ingroup CPUTypes -#define FFX_STATIC static -#endif // #if !defined(FFX_STATIC) - -/// @defgroup CPUCore CPU Core -/// Core CPU-side defines and functions -/// -/// @ingroup ffxHost - -#ifdef __clang__ -#pragma clang diagnostic ignored "-Wunused-variable" -#endif - -/// Interpret the bit layout of an IEEE-754 floating point value as an unsigned integer. -/// -/// @param [in] x A 32bit floating value. -/// -/// @returns -/// An unsigned 32bit integer value containing the bit pattern of x. -/// -/// @ingroup CPUCore -FFX_STATIC FfxUInt32 ffxAsUInt32(FfxFloat32 x) -{ - union - { - FfxFloat32 f; - FfxUInt32 u; - } bits; - - bits.f = x; - return bits.u; -} - -FFX_STATIC FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b) -{ - return a[0] * b[0] + a[1] * b[1]; -} - -FFX_STATIC FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b) -{ - return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; -} - -FFX_STATIC FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b) -{ - return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]; -} - -/// Compute the linear interopation between two values. -/// -/// Implemented by calling the GLSL mix instrinsic function. Implements the -/// following math: -/// -/// (1 - t) * x + t * y -/// -/// @param [in] x The first value to lerp between. -/// @param [in] y The second value to lerp between. -/// @param [in] t The value to determine how much of x and how much of y. -/// -/// @returns -/// A linearly interpolated value between x and y according to t. -/// -/// @ingroup CPUCore -FFX_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) -{ - return y * t + (-x * t + x); -} - -/// Compute the reciprocal of a value. -/// -/// @param [in] x The value to compute the reciprocal for. -/// -/// @returns -/// The reciprocal value of x. -/// -/// @ingroup CPUCore -FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 x) -{ - return 1.0f / x; -} - -/// Compute the square root of a value. -/// -/// @param [in] x The first value to compute the min of. -/// -/// @returns -/// The the square root of x. -/// -/// @ingroup CPUCore -FFX_STATIC FfxFloat32 ffxSqrt(FfxFloat32 x) -{ - return sqrt(x); -} - -FFX_STATIC FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b) -{ - return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); -} - -/// Compute the factional part of a decimal value. -/// -/// This function calculates x - floor(x). -/// -/// @param [in] x The value to compute the fractional part from. -/// -/// @returns -/// The fractional part of x. -/// -/// @ingroup CPUCore -FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 x) -{ - return x - floor(x); -} - -/// Compute the reciprocal square root of a value. -/// -/// @param [in] x The value to compute the reciprocal for. -/// -/// @returns -/// The reciprocal square root value of x. -/// -/// @ingroup CPUCore -FFX_STATIC FfxFloat32 rsqrt(FfxFloat32 x) -{ - return ffxReciprocal(ffxSqrt(x)); -} - -FFX_STATIC FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y) -{ - return x < y ? x : y; -} - -FFX_STATIC FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y) -{ - return x < y ? x : y; -} - -FFX_STATIC FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y) -{ - return x > y ? x : y; -} - -FFX_STATIC FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y) -{ - return x > y ? x : y; -} - -/// Clamp a value to a [0..1] range. -/// -/// @param [in] x The value to clamp to [0..1] range. -/// -/// @returns -/// The clamped version of x. -/// -/// @ingroup CPUCore -FFX_STATIC FfxFloat32 ffxSaturate(FfxFloat32 x) -{ - return ffxMin(1.0f, ffxMax(0.0f, x)); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -FFX_STATIC void opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) -{ - d[0] = a[0] + b; - d[1] = a[1] + b; - d[2] = a[2] + b; - return; -} - -FFX_STATIC void opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a) -{ - d[0] = a[0]; - d[1] = a[1]; - d[2] = a[2]; - return; -} - -FFX_STATIC void opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) -{ - d[0] = a[0] * b[0]; - d[1] = a[1] * b[1]; - d[2] = a[2] * b[2]; - return; -} - -FFX_STATIC void opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) -{ - d[0] = a[0] * b; - d[1] = a[1] * b; - d[2] = a[2] * b; - return; -} - -FFX_STATIC void opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a) -{ - d[0] = ffxReciprocal(a[0]); - d[1] = ffxReciprocal(a[1]); - d[2] = ffxReciprocal(a[2]); - return; -} - -/// Convert FfxFloat32 to half (in lower 16-bits of output). -/// -/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf -/// -/// The function supports denormals. -/// -/// Some conversion rules are to make computations possibly "safer" on the GPU, -/// -INF & -NaN -> -65504 -/// +INF & +NaN -> +65504 -/// -/// @param [in] f The 32bit floating point value to convert. -/// -/// @returns -/// The closest 16bit floating point value to f. -/// -/// @ingroup CPUCore -FFX_STATIC FfxUInt32 f32tof16(FfxFloat32 f) -{ - static FfxUInt16 base[512] = { - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, - 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, - 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, - 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, - 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff - }; - - static FfxUInt8 shift[512] = { - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18 - }; - - union - { - FfxFloat32 f; - FfxUInt32 u; - } bits; - - bits.f = f; - FfxUInt32 u = bits.u; - FfxUInt32 i = u >> 23; - return (FfxUInt32)(base[i]) + ((u & 0x7fffff) >> shift[i]); -} - -/// Pack 2x32-bit floating point values in a single 32bit value. -/// -/// This function first converts each component of value into their nearest 16-bit floating -/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the -/// 32bit unsigned integer respectively. -/// -/// @param [in] x A 2-dimensional floating point value to convert and pack. -/// -/// @returns -/// A packed 32bit value containing 2 16bit floating point values. -/// -/// @ingroup CPUCore -FFX_STATIC FfxUInt32 packHalf2x16(FfxFloat32x2 x) -{ - return f32tof16(x[0]) + (f32tof16(x[1]) << 16); -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h.meta deleted file mode 100644 index 9aed874..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: 4c88c0b7a4dec1e479272449c19ca981 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h deleted file mode 100644 index 2f687df..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h +++ /dev/null @@ -1,2784 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -/// A define for a true value in a boolean expression. -/// -/// @ingroup GPUCore -#define FFX_TRUE (true) - -/// A define for a false value in a boolean expression. -/// -/// @ingroup GPUCore -#define FFX_FALSE (false) - -/// A define value for positive infinity. -/// -/// @ingroup GPUCore -#define FFX_POSITIVE_INFINITY_FLOAT ffxAsFloat(0x7f800000u) - -/// A define value for negative infinity. -/// -/// @ingroup GPUCore -#define FFX_NEGATIVE_INFINITY_FLOAT ffxAsFloat(0xff800000u) - -/// A define value for PI. -/// -/// @ingroup GPUCore -#define FFX_PI (3.14159) - -FFX_STATIC const FfxFloat32 FFX_FP16_MIN = 6.10e-05f; -FFX_STATIC const FfxFloat32 FFX_FP16_MAX = 65504.0f; -FFX_STATIC const FfxFloat32 FFX_TONEMAP_EPSILON = 1.0f / FFX_FP16_MAX; - -/// Compute the reciprocal of value. -/// -/// @param [in] value The value to compute the reciprocal of. -/// -/// @returns -/// The 1 / value. -/// -/// @ingroup GPUCore -FfxFloat32 ffxReciprocal(FfxFloat32 value) -{ - return rcp(value); -} - -/// Compute the reciprocal of value. -/// -/// @param [in] value The value to compute the reciprocal of. -/// -/// @returns -/// The 1 / value. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxReciprocal(FfxFloat32x2 value) -{ - return rcp(value); -} - -/// Compute the reciprocal of value. -/// -/// @param [in] value The value to compute the reciprocal of. -/// -/// @returns -/// The 1 / value. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxReciprocal(FfxFloat32x3 value) -{ - return rcp(value); -} - -/// Compute the reciprocal of value. -/// -/// @param [in] value The value to compute the reciprocal of. -/// -/// @returns -/// The 1 / value. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxReciprocal(FfxFloat32x4 value) -{ - return rcp(value); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxMin(FfxFloat32x2 x, FfxFloat32x2 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxMin(FfxFloat32x3 x, FfxFloat32x3 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxMin(FfxFloat32x4 x, FfxFloat32x4 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxInt32 ffxMin(FfxInt32 x, FfxInt32 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxInt32x2 ffxMin(FfxInt32x2 x, FfxInt32x2 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxInt32x3 ffxMin(FfxInt32x3 x, FfxInt32x3 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxInt32x4 ffxMin(FfxInt32x4 x, FfxInt32x4 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxMin(FfxUInt32x2 x, FfxUInt32x2 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxUInt32x3 ffxMin(FfxUInt32x3 x, FfxUInt32x3 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxUInt32x4 ffxMin(FfxUInt32x4 x, FfxUInt32x4 y) -{ - return min(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxMax(FfxFloat32x2 x, FfxFloat32x2 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxMax(FfxFloat32x3 x, FfxFloat32x3 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxMax(FfxFloat32x4 x, FfxFloat32x4 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxInt32 ffxMax(FfxInt32 x, FfxInt32 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxInt32x2 ffxMax(FfxInt32x2 x, FfxInt32x2 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxInt32x3 ffxMax(FfxInt32x3 x, FfxInt32x3 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxInt32x4 ffxMax(FfxInt32x4 x, FfxInt32x4 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxMax(FfxUInt32x2 x, FfxUInt32x2 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxUInt32x3 ffxMax(FfxUInt32x3 x, FfxUInt32x3 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxUInt32x4 ffxMax(FfxUInt32x4 x, FfxUInt32x4 y) -{ - return max(x, y); -} - -/// Compute the value of the first parameter raised to the power of the second. -/// -/// @param [in] x The value to raise to the power y. -/// @param [in] y The power to which to raise x. -/// -/// @returns -/// The value of the first parameter raised to the power of the second. -/// -/// @ingroup GPUCore -FfxFloat32 ffxPow(FfxFloat32 x, FfxFloat32 y) -{ - return pow(x, y); -} - -/// Compute the value of the first parameter raised to the power of the second. -/// -/// @param [in] x The value to raise to the power y. -/// @param [in] y The power to which to raise x. -/// -/// @returns -/// The value of the first parameter raised to the power of the second. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxPow(FfxFloat32x2 x, FfxFloat32x2 y) -{ - return pow(x, y); -} - -/// Compute the value of the first parameter raised to the power of the second. -/// -/// @param [in] x The value to raise to the power y. -/// @param [in] y The power to which to raise x. -/// -/// @returns -/// The value of the first parameter raised to the power of the second. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxPow(FfxFloat32x3 x, FfxFloat32x3 y) -{ - return pow(x, y); -} - -/// Compute the value of the first parameter raised to the power of the second. -/// -/// @param [in] x The value to raise to the power y. -/// @param [in] y The power to which to raise x. -/// -/// @returns -/// The value of the first parameter raised to the power of the second. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxPow(FfxFloat32x4 x, FfxFloat32x4 y) -{ - return pow(x, y); -} - -/// Compute the square root of a value. -/// -/// @param [in] x The first value to compute the min of. -/// -/// @returns -/// The the square root of x. -/// -/// @ingroup GPUCore -FfxFloat32 ffxSqrt(FfxFloat32 x) -{ - return sqrt(x); -} - -/// Compute the square root of a value. -/// -/// @param [in] x The first value to compute the min of. -/// -/// @returns -/// The the square root of x. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxSqrt(FfxFloat32x2 x) -{ - return sqrt(x); -} - -/// Compute the square root of a value. -/// -/// @param [in] x The first value to compute the min of. -/// -/// @returns -/// The the square root of x. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxSqrt(FfxFloat32x3 x) -{ - return sqrt(x); -} - -/// Compute the square root of a value. -/// -/// @param [in] x The first value to compute the min of. -/// -/// @returns -/// The the square root of x. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxSqrt(FfxFloat32x4 x) -{ - return sqrt(x); -} - -/// Copy the sign bit from 's' to positive 'd'. -/// -/// @param [in] d The value to copy the sign bit into. -/// @param [in] s The value to copy the sign bit from. -/// -/// @returns -/// The value of d with the sign bit from s. -/// -/// @ingroup GPUCore -FfxFloat32 ffxCopySignBit(FfxFloat32 d, FfxFloat32 s) -{ - return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & FfxUInt32(0x80000000u))); -} - -/// Copy the sign bit from 's' to positive 'd'. -/// -/// @param [in] d The value to copy the sign bit into. -/// @param [in] s The value to copy the sign bit from. -/// -/// @returns -/// The value of d with the sign bit from s. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxCopySignBit(FfxFloat32x2 d, FfxFloat32x2 s) -{ - return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast2(0x80000000u))); -} - -/// Copy the sign bit from 's' to positive 'd'. -/// -/// @param [in] d The value to copy the sign bit into. -/// @param [in] s The value to copy the sign bit from. -/// -/// @returns -/// The value of d with the sign bit from s. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxCopySignBit(FfxFloat32x3 d, FfxFloat32x3 s) -{ - return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast3(0x80000000u))); -} - -/// Copy the sign bit from 's' to positive 'd'. -/// -/// @param [in] d The value to copy the sign bit into. -/// @param [in] s The value to copy the sign bit from. -/// -/// @returns -/// The value of d with the sign bit from s. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxCopySignBit(FfxFloat32x4 d, FfxFloat32x4 s) -{ - return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast4(0x80000000u))); -} - -/// A single operation to return the following: -/// m = NaN := 0 -/// m >= 0 := 0 -/// m < 0 := 1 -/// -/// Uses the following useful floating point logic, -/// saturate(+a*(-INF)==-INF) := 0 -/// saturate( 0*(-INF)== NaN) := 0 -/// saturate(-a*(-INF)==+INF) := 1 -/// -/// This function is useful when creating masks for branch-free logic. -/// -/// @param [in] m The value to test against 0. -/// -/// @returns -/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. -/// -/// @ingroup GPUCore -FfxFloat32 ffxIsSigned(FfxFloat32 m) -{ - return ffxSaturate(m * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT)); -} - -/// A single operation to return the following: -/// m = NaN := 0 -/// m >= 0 := 0 -/// m < 0 := 1 -/// -/// Uses the following useful floating point logic, -/// saturate(+a*(-INF)==-INF) := 0 -/// saturate( 0*(-INF)== NaN) := 0 -/// saturate(-a*(-INF)==+INF) := 1 -/// -/// This function is useful when creating masks for branch-free logic. -/// -/// @param [in] m The value to test against 0. -/// -/// @returns -/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxIsSigned(FfxFloat32x2 m) -{ - return ffxSaturate(m * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT)); -} - -/// A single operation to return the following: -/// m = NaN := 0 -/// m >= 0 := 0 -/// m < 0 := 1 -/// -/// Uses the following useful floating point logic, -/// saturate(+a*(-INF)==-INF) := 0 -/// saturate( 0*(-INF)== NaN) := 0 -/// saturate(-a*(-INF)==+INF) := 1 -/// -/// This function is useful when creating masks for branch-free logic. -/// -/// @param [in] m The value to test against 0. -/// -/// @returns -/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxIsSigned(FfxFloat32x3 m) -{ - return ffxSaturate(m * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT)); -} - -/// A single operation to return the following: -/// m = NaN := 0 -/// m >= 0 := 0 -/// m < 0 := 1 -/// -/// Uses the following useful floating point logic, -/// saturate(+a*(-INF)==-INF) := 0 -/// saturate( 0*(-INF)== NaN) := 0 -/// saturate(-a*(-INF)==+INF) := 1 -/// -/// This function is useful when creating masks for branch-free logic. -/// -/// @param [in] m The value to test against for have the sign set. -/// -/// @returns -/// 1.0 when the value is negative, or 0.0 when the value is 0 or positive. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxIsSigned(FfxFloat32x4 m) -{ - return ffxSaturate(m * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT)); -} - -/// A single operation to return the following: -/// m = NaN := 1 -/// m > 0 := 0 -/// m <= 0 := 1 -/// -/// This function is useful when creating masks for branch-free logic. -/// -/// @param [in] m The value to test against zero. -/// -/// @returns -/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. -/// -/// @ingroup GPUCore -FfxFloat32 ffxIsGreaterThanZero(FfxFloat32 m) -{ - return ffxSaturate(m * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT)); -} - -/// A single operation to return the following: -/// m = NaN := 1 -/// m > 0 := 0 -/// m <= 0 := 1 -/// -/// This function is useful when creating masks for branch-free logic. -/// -/// @param [in] m The value to test against zero. -/// -/// @returns -/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxIsGreaterThanZero(FfxFloat32x2 m) -{ - return ffxSaturate(m * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT)); -} - -/// A single operation to return the following: -/// m = NaN := 1 -/// m > 0 := 0 -/// m <= 0 := 1 -/// -/// This function is useful when creating masks for branch-free logic. -/// -/// @param [in] m The value to test against zero. -/// -/// @returns -/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxIsGreaterThanZero(FfxFloat32x3 m) -{ - return ffxSaturate(m * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT)); -} - -/// A single operation to return the following: -/// m = NaN := 1 -/// m > 0 := 0 -/// m <= 0 := 1 -/// -/// This function is useful when creating masks for branch-free logic. -/// -/// @param [in] m The value to test against zero. -/// -/// @returns -/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxIsGreaterThanZero(FfxFloat32x4 m) -{ - return ffxSaturate(m * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT)); -} - -/// Convert a 32bit floating point value to sortable integer. -/// -/// - If sign bit=0, flip the sign bit (positives). -/// - If sign bit=1, flip all bits (negatives). -/// -/// The function has the side effects that: -/// - Larger integers are more positive values. -/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). -/// -/// @param [in] value The floating point value to make sortable. -/// -/// @returns -/// The sortable integer value. -/// -/// @ingroup GPUCore -FfxUInt32 ffxFloatToSortableInteger(FfxUInt32 value) -{ - return value ^ ((AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); -} - -/// Convert a sortable integer to a 32bit floating point value. -/// -/// The function has the side effects that: -/// - If sign bit=1, flip the sign bit (positives). -/// - If sign bit=0, flip all bits (negatives). -/// -/// @param [in] value The floating point value to make sortable. -/// -/// @returns -/// The sortable integer value. -/// -/// @ingroup GPUCore -FfxUInt32 ffxSortableIntegerToFloat(FfxUInt32 value) -{ - return value ^ ((~AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); -} - -/// Calculate a low-quality approximation for the square root of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] value The value to calculate an approximate to the square root for. -/// -/// @returns -/// An approximation of the square root, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat32 ffxApproximateSqrt(FfxFloat32 value) -{ - return ffxAsFloat((ffxAsUInt32(value) >> FfxUInt32(1)) + FfxUInt32(0x1fbc4639)); -} - -/// Calculate a low-quality approximation for the reciprocal of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] value The value to calculate an approximate to the reciprocal for. -/// -/// @returns -/// An approximation of the reciprocal, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat32 ffxApproximateReciprocal(FfxFloat32 value) -{ - return ffxAsFloat(FfxUInt32(0x7ef07ebb) - ffxAsUInt32(value)); -} - -/// Calculate a medium-quality approximation for the reciprocal of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] value The value to calculate an approximate to the reciprocal for. -/// -/// @returns -/// An approximation of the reciprocal, estimated to medium quality. -/// -/// @ingroup GPUCore -FfxFloat32 ffxApproximateReciprocalMedium(FfxFloat32 value) -{ - FfxFloat32 b = ffxAsFloat(FfxUInt32(0x7ef19fff) - ffxAsUInt32(value)); - return b * (-b * value + FfxFloat32(2.0)); -} - -/// Calculate a low-quality approximation for the reciprocal of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] value The value to calculate an approximate to the reciprocal square root for. -/// -/// @returns -/// An approximation of the reciprocal square root, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat32 ffxApproximateReciprocalSquareRoot(FfxFloat32 value) -{ - return ffxAsFloat(FfxUInt32(0x5f347d74) - (ffxAsUInt32(value) >> FfxUInt32(1))); -} - -/// Calculate a low-quality approximation for the square root of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] value The value to calculate an approximate to the square root for. -/// -/// @returns -/// An approximation of the square root, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxApproximateSqrt(FfxFloat32x2 value) -{ - return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast2(1u)) + ffxBroadcast2(0x1fbc4639u)); -} - -/// Calculate a low-quality approximation for the reciprocal of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] value The value to calculate an approximate to the reciprocal for. -/// -/// @returns -/// An approximation of the reciprocal, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxApproximateReciprocal(FfxFloat32x2 value) -{ - return ffxAsFloat(ffxBroadcast2(0x7ef07ebbu) - ffxAsUInt32(value)); -} - -/// Calculate a medium-quality approximation for the reciprocal of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] value The value to calculate an approximate to the reciprocal for. -/// -/// @returns -/// An approximation of the reciprocal, estimated to medium quality. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxApproximateReciprocalMedium(FfxFloat32x2 value) -{ - FfxFloat32x2 b = ffxAsFloat(ffxBroadcast2(0x7ef19fffu) - ffxAsUInt32(value)); - return b * (-b * value + ffxBroadcast2(2.0f)); -} - -/// Calculate a low-quality approximation for the square root of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] value The value to calculate an approximate to the square root for. -/// -/// @returns -/// An approximation of the square root, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxApproximateReciprocalSquareRoot(FfxFloat32x2 value) -{ - return ffxAsFloat(ffxBroadcast2(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast2(1u))); -} - -/// Calculate a low-quality approximation for the square root of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] value The value to calculate an approximate to the square root for. -/// -/// @returns -/// An approximation of the square root, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxApproximateSqrt(FfxFloat32x3 value) -{ - return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast3(1u)) + ffxBroadcast3(0x1fbc4639u)); -} - -/// Calculate a low-quality approximation for the reciprocal of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] value The value to calculate an approximate to the reciprocal for. -/// -/// @returns -/// An approximation of the reciprocal, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxApproximateReciprocal(FfxFloat32x3 value) -{ - return ffxAsFloat(ffxBroadcast3(0x7ef07ebbu) - ffxAsUInt32(value)); -} - -/// Calculate a medium-quality approximation for the reciprocal of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] value The value to calculate an approximate to the reciprocal for. -/// -/// @returns -/// An approximation of the reciprocal, estimated to medium quality. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxApproximateReciprocalMedium(FfxFloat32x3 value) -{ - FfxFloat32x3 b = ffxAsFloat(ffxBroadcast3(0x7ef19fffu) - ffxAsUInt32(value)); - return b * (-b * value + ffxBroadcast3(2.0f)); -} - -/// Calculate a low-quality approximation for the square root of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] value The value to calculate an approximate to the square root for. -/// -/// @returns -/// An approximation of the square root, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxApproximateReciprocalSquareRoot(FfxFloat32x3 value) -{ - return ffxAsFloat(ffxBroadcast3(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast3(1u))); -} - -/// Calculate a low-quality approximation for the square root of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] value The value to calculate an approximate to the square root for. -/// -/// @returns -/// An approximation of the square root, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxApproximateSqrt(FfxFloat32x4 value) -{ - return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast4(1u)) + ffxBroadcast4(0x1fbc4639u)); -} - -/// Calculate a low-quality approximation for the reciprocal of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] value The value to calculate an approximate to the reciprocal for. -/// -/// @returns -/// An approximation of the reciprocal, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxApproximateReciprocal(FfxFloat32x4 value) -{ - return ffxAsFloat(ffxBroadcast4(0x7ef07ebbu) - ffxAsUInt32(value)); -} - -/// Calculate a medium-quality approximation for the reciprocal of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] value The value to calculate an approximate to the reciprocal for. -/// -/// @returns -/// An approximation of the reciprocal, estimated to medium quality. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxApproximateReciprocalMedium(FfxFloat32x4 value) -{ - FfxFloat32x4 b = ffxAsFloat(ffxBroadcast4(0x7ef19fffu) - ffxAsUInt32(value)); - return b * (-b * value + ffxBroadcast4(2.0f)); -} - -/// Calculate a low-quality approximation for the square root of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] value The value to calculate an approximate to the square root for. -/// -/// @returns -/// An approximation of the square root, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxApproximateReciprocalSquareRoot(FfxFloat32x4 value) -{ - return ffxAsFloat(ffxBroadcast4(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast4(1u))); -} - -/// Calculate dot product of 'a' and 'b'. -/// -/// @param [in] a First vector input. -/// @param [in] b Second vector input. -/// -/// @returns -/// The value of a dot b. -/// -/// @ingroup GPUCore -FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b) -{ - return dot(a, b); -} - -/// Calculate dot product of 'a' and 'b'. -/// -/// @param [in] a First vector input. -/// @param [in] b Second vector input. -/// -/// @returns -/// The value of a dot b. -/// -/// @ingroup GPUCore -FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b) -{ - return dot(a, b); -} - -/// Calculate dot product of 'a' and 'b'. -/// -/// @param [in] a First vector input. -/// @param [in] b Second vector input. -/// -/// @returns -/// The value of a dot b. -/// -/// @ingroup GPUCore -FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b) -{ - return dot(a, b); -} - - -/// Compute an approximate conversion from PQ to Gamma2 space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between PQ and Gamma2. -/// -/// @returns -/// The value a converted into Gamma2. -/// -/// @ingroup GPUCore -FfxFloat32 ffxApproximatePQToGamma2Medium(FfxFloat32 a) -{ - return a * a * a * a; -} - -/// Compute an approximate conversion from PQ to linear space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between PQ and linear. -/// -/// @returns -/// The value a converted into linear. -/// -/// @ingroup GPUCore -FfxFloat32 ffxApproximatePQToLinear(FfxFloat32 a) -{ - return a * a * a * a * a * a * a * a; -} - -/// Compute an approximate conversion from gamma2 to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between gamma2 and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32 ffxApproximateGamma2ToPQ(FfxFloat32 a) -{ - return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46)); -} - -/// Compute a more accurate approximate conversion from gamma2 to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between gamma2 and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32 ffxApproximateGamma2ToPQMedium(FfxFloat32 a) -{ - FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46)); - FfxFloat32 b4 = b * b * b * b; - return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); -} - -/// Compute a high accuracy approximate conversion from gamma2 to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between gamma2 and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32 ffxApproximateGamma2ToPQHigh(FfxFloat32 a) -{ - return ffxSqrt(ffxSqrt(a)); -} - -/// Compute an approximate conversion from linear to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between linear and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32 ffxApproximateLinearToPQ(FfxFloat32 a) -{ - return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723)); -} - -/// Compute a more accurate approximate conversion from linear to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between linear and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32 ffxApproximateLinearToPQMedium(FfxFloat32 a) -{ - FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723)); - FfxFloat32 b8 = b * b * b * b * b * b * b * b; - return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); -} - -/// Compute a very accurate approximate conversion from linear to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between linear and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32 ffxApproximateLinearToPQHigh(FfxFloat32 a) -{ - return ffxSqrt(ffxSqrt(ffxSqrt(a))); -} - -/// Compute an approximate conversion from PQ to Gamma2 space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between PQ and Gamma2. -/// -/// @returns -/// The value a converted into Gamma2. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxApproximatePQToGamma2Medium(FfxFloat32x2 a) -{ - return a * a * a * a; -} - -/// Compute an approximate conversion from PQ to linear space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between PQ and linear. -/// -/// @returns -/// The value a converted into linear. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxApproximatePQToLinear(FfxFloat32x2 a) -{ - return a * a * a * a * a * a * a * a; -} - -/// Compute an approximate conversion from gamma2 to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between gamma2 and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxApproximateGamma2ToPQ(FfxFloat32x2 a) -{ - return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u)); -} - -/// Compute a more accurate approximate conversion from gamma2 to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between gamma2 and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxApproximateGamma2ToPQMedium(FfxFloat32x2 a) -{ - FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u)); - FfxFloat32x2 b4 = b * b * b * b; - return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); -} - -/// Compute a high accuracy approximate conversion from gamma2 to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between gamma2 and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxApproximateGamma2ToPQHigh(FfxFloat32x2 a) -{ - return ffxSqrt(ffxSqrt(a)); -} - -/// Compute an approximate conversion from linear to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between linear and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxApproximateLinearToPQ(FfxFloat32x2 a) -{ - return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u)); -} - -/// Compute a more accurate approximate conversion from linear to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between linear and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxApproximateLinearToPQMedium(FfxFloat32x2 a) -{ - FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u)); - FfxFloat32x2 b8 = b * b * b * b * b * b * b * b; - return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); -} - -/// Compute a very accurate approximate conversion from linear to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between linear and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxApproximateLinearToPQHigh(FfxFloat32x2 a) -{ - return ffxSqrt(ffxSqrt(ffxSqrt(a))); -} - -/// Compute an approximate conversion from PQ to Gamma2 space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between PQ and Gamma2. -/// -/// @returns -/// The value a converted into Gamma2. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxApproximatePQToGamma2Medium(FfxFloat32x3 a) -{ - return a * a * a * a; -} - -/// Compute an approximate conversion from PQ to linear space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between PQ and linear. -/// -/// @returns -/// The value a converted into linear. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxApproximatePQToLinear(FfxFloat32x3 a) -{ - return a * a * a * a * a * a * a * a; -} - -/// Compute an approximate conversion from gamma2 to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between gamma2 and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxApproximateGamma2ToPQ(FfxFloat32x3 a) -{ - return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u)); -} - -/// Compute a more accurate approximate conversion from gamma2 to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between gamma2 and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxApproximateGamma2ToPQMedium(FfxFloat32x3 a) -{ - FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u)); - FfxFloat32x3 b4 = b * b * b * b; - return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); -} - -/// Compute a high accuracy approximate conversion from gamma2 to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between gamma2 and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxApproximateGamma2ToPQHigh(FfxFloat32x3 a) -{ - return ffxSqrt(ffxSqrt(a)); -} - -/// Compute an approximate conversion from linear to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between linear and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxApproximateLinearToPQ(FfxFloat32x3 a) -{ - return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u)); -} - -/// Compute a more accurate approximate conversion from linear to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between linear and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxApproximateLinearToPQMedium(FfxFloat32x3 a) -{ - FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u)); - FfxFloat32x3 b8 = b * b * b * b * b * b * b * b; - return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); -} - -/// Compute a very accurate approximate conversion from linear to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between linear and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxApproximateLinearToPQHigh(FfxFloat32x3 a) -{ - return ffxSqrt(ffxSqrt(ffxSqrt(a))); -} - -/// Compute an approximate conversion from PQ to Gamma2 space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between PQ and Gamma2. -/// -/// @returns -/// The value a converted into Gamma2. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxApproximatePQToGamma2Medium(FfxFloat32x4 a) -{ - return a * a * a * a; -} - -/// Compute an approximate conversion from PQ to linear space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between PQ and linear. -/// -/// @returns -/// The value a converted into linear. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxApproximatePQToLinear(FfxFloat32x4 a) -{ - return a * a * a * a * a * a * a * a; -} - -/// Compute an approximate conversion from gamma2 to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between gamma2 and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxApproximateGamma2ToPQ(FfxFloat32x4 a) -{ - return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u)); -} - -/// Compute a more accurate approximate conversion from gamma2 to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between gamma2 and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxApproximateGamma2ToPQMedium(FfxFloat32x4 a) -{ - FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u)); - FfxFloat32x4 b4 = b * b * b * b * b * b * b * b; - return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); -} - -/// Compute a high accuracy approximate conversion from gamma2 to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between gamma2 and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxApproximateGamma2ToPQHigh(FfxFloat32x4 a) -{ - return ffxSqrt(ffxSqrt(a)); -} - -/// Compute an approximate conversion from linear to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between linear and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxApproximateLinearToPQ(FfxFloat32x4 a) -{ - return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u)); -} - -/// Compute a more accurate approximate conversion from linear to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between linear and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxApproximateLinearToPQMedium(FfxFloat32x4 a) -{ - FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u)); - FfxFloat32x4 b8 = b * b * b * b * b * b * b * b; - return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); -} - -/// Compute a very accurate approximate conversion from linear to PQ space. -/// -/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do -/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear -/// (8th power and fast 8th root). The maximum error is approximately 0.2%. -/// -/// @param a The value to convert between linear and PQ. -/// -/// @returns -/// The value a converted into PQ. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxApproximateLinearToPQHigh(FfxFloat32x4 a) -{ - return ffxSqrt(ffxSqrt(ffxSqrt(a))); -} - -// An approximation of sine. -// -// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range -// is {-1/4 to 1/4} representing {-1 to 1}. -// -// @param [in] value The value to calculate approximate sine for. -// -// @returns -// The approximate sine of value. -FfxFloat32 ffxParabolicSin(FfxFloat32 value) -{ - return value * abs(value) - value; -} - -// An approximation of sine. -// -// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range -// is {-1/4 to 1/4} representing {-1 to 1}. -// -// @param [in] value The value to calculate approximate sine for. -// -// @returns -// The approximate sine of value. -FfxFloat32x2 ffxParabolicSin(FfxFloat32x2 x) -{ - return x * abs(x) - x; -} - -// An approximation of cosine. -// -// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range -// is {-1/4 to 1/4} representing {-1 to 1}. -// -// @param [in] value The value to calculate approximate cosine for. -// -// @returns -// The approximate cosine of value. -FfxFloat32 ffxParabolicCos(FfxFloat32 x) -{ - x = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75)); - x = x * FfxFloat32(2.0) - FfxFloat32(1.0); - return ffxParabolicSin(x); -} - -// An approximation of cosine. -// -// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range -// is {-1/4 to 1/4} representing {-1 to 1}. -// -// @param [in] value The value to calculate approximate cosine for. -// -// @returns -// The approximate cosine of value. -FfxFloat32x2 ffxParabolicCos(FfxFloat32x2 x) -{ - x = ffxFract(x * ffxBroadcast2(0.5f) + ffxBroadcast2(0.75f)); - x = x * ffxBroadcast2(2.0f) - ffxBroadcast2(1.0f); - return ffxParabolicSin(x); -} - -// An approximation of both sine and cosine. -// -// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range -// is {-1/4 to 1/4} representing {-1 to 1}. -// -// @param [in] value The value to calculate approximate cosine for. -// -// @returns -// A FfxFloat32x2 containing approximations of both sine and cosine of value. -FfxFloat32x2 ffxParabolicSinCos(FfxFloat32 x) -{ - FfxFloat32 y = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75)); - y = y * FfxFloat32(2.0) - FfxFloat32(1.0); - return ffxParabolicSin(FfxFloat32x2(x, y)); -} - -/// Conditional free logic AND operation using values. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// -/// @returns -/// Result of the AND operation. -/// -/// @ingroup GPUCore -FfxUInt32 ffxZeroOneAnd(FfxUInt32 x, FfxUInt32 y) -{ - return min(x, y); -} - -/// Conditional free logic AND operation using two values. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// -/// @returns -/// Result of the AND operation. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x, FfxUInt32x2 y) -{ - return min(x, y); -} - -/// Conditional free logic AND operation using two values. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// -/// @returns -/// Result of the AND operation. -/// -/// @ingroup GPUCore -FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x, FfxUInt32x3 y) -{ - return min(x, y); -} - -/// Conditional free logic AND operation using two values. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// -/// @returns -/// Result of the AND operation. -/// -/// @ingroup GPUCore -FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x, FfxUInt32x4 y) -{ - return min(x, y); -} - -/// Conditional free logic NOT operation using two values. -/// -/// @param [in] x The first value to be fed into the NOT operator. -/// -/// @returns -/// Result of the NOT operation. -/// -/// @ingroup GPUCore -FfxUInt32 ffxZeroOneAnd(FfxUInt32 x) -{ - return x ^ FfxUInt32(1); -} - -/// Conditional free logic NOT operation using two values. -/// -/// @param [in] x The first value to be fed into the NOT operator. -/// -/// @returns -/// Result of the NOT operation. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x) -{ - return x ^ ffxBroadcast2(1u); -} - -/// Conditional free logic NOT operation using two values. -/// -/// @param [in] x The first value to be fed into the NOT operator. -/// -/// @returns -/// Result of the NOT operation. -/// -/// @ingroup GPUCore -FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x) -{ - return x ^ ffxBroadcast3(1u); -} - -/// Conditional free logic NOT operation using two values. -/// -/// @param [in] x The first value to be fed into the NOT operator. -/// -/// @returns -/// Result of the NOT operation. -/// -/// @ingroup GPUCore -FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x) -{ - return x ^ ffxBroadcast4(1u); -} - -/// Conditional free logic OR operation using two values. -/// -/// @param [in] x The first value to be fed into the OR operator. -/// @param [in] y The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the OR operation. -/// -/// @ingroup GPUCore -FfxUInt32 ffxZeroOneOr(FfxUInt32 x, FfxUInt32 y) -{ - return max(x, y); -} - -/// Conditional free logic OR operation using two values. -/// -/// @param [in] x The first value to be fed into the OR operator. -/// @param [in] y The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the OR operation. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxZeroOneOr(FfxUInt32x2 x, FfxUInt32x2 y) -{ - return max(x, y); -} - -/// Conditional free logic OR operation using two values. -/// -/// @param [in] x The first value to be fed into the OR operator. -/// @param [in] y The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the OR operation. -/// -/// @ingroup GPUCore -FfxUInt32x3 ffxZeroOneOr(FfxUInt32x3 x, FfxUInt32x3 y) -{ - return max(x, y); -} - -/// Conditional free logic OR operation using two values. -/// -/// @param [in] x The first value to be fed into the OR operator. -/// @param [in] y The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the OR operation. -/// -/// @ingroup GPUCore -FfxUInt32x4 ffxZeroOneOr(FfxUInt32x4 x, FfxUInt32x4 y) -{ - return max(x, y); -} - -/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the AND OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxUInt32 ffxZeroOneAndToU1(FfxFloat32 x) -{ - return FfxUInt32(FfxFloat32(1.0) - x); -} - -/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the AND OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxZeroOneAndToU2(FfxFloat32x2 x) -{ - return FfxUInt32x2(ffxBroadcast2(1.0) - x); -} - -/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the AND OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxUInt32x3 ffxZeroOneAndToU3(FfxFloat32x3 x) -{ - return FfxUInt32x3(ffxBroadcast3(1.0) - x); -} - -/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the AND OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxUInt32x4 ffxZeroOneAndToU4(FfxFloat32x4 x) -{ - return FfxUInt32x4(ffxBroadcast4(1.0) - x); -} - -/// Conditional free logic AND operation using two values followed by a NOT operation -/// using the resulting value and a third value. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// @param [in] z The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxFloat32 ffxZeroOneAndOr(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) -{ - return ffxSaturate(x * y + z); -} - -/// Conditional free logic AND operation using two values followed by a NOT operation -/// using the resulting value and a third value. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// @param [in] z The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxZeroOneAndOr(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) -{ - return ffxSaturate(x * y + z); -} - -/// Conditional free logic AND operation using two values followed by a NOT operation -/// using the resulting value and a third value. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// @param [in] z The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxZeroOneAndOr(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) -{ - return ffxSaturate(x * y + z); -} - -/// Conditional free logic AND operation using two values followed by a NOT operation -/// using the resulting value and a third value. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// @param [in] z The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxZeroOneAndOr(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) -{ - return ffxSaturate(x * y + z); -} - -/// Given a value, returns 1.0 if greater than zero and 0.0 if not. -/// -/// @param [in] x The value to be compared. -/// -/// @returns -/// Result of the greater than zero comparison. -/// -/// @ingroup GPUCore -FfxFloat32 ffxZeroOneIsGreaterThanZero(FfxFloat32 x) -{ - return ffxSaturate(x * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT)); -} - -/// Given a value, returns 1.0 if greater than zero and 0.0 if not. -/// -/// @param [in] x The value to be compared. -/// -/// @returns -/// Result of the greater than zero comparison. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxZeroOneIsGreaterThanZero(FfxFloat32x2 x) -{ - return ffxSaturate(x * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT)); -} - -/// Given a value, returns 1.0 if greater than zero and 0.0 if not. -/// -/// @param [in] x The value to be compared. -/// -/// @returns -/// Result of the greater than zero comparison. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxZeroOneIsGreaterThanZero(FfxFloat32x3 x) -{ - return ffxSaturate(x * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT)); -} - -/// Given a value, returns 1.0 if greater than zero and 0.0 if not. -/// -/// @param [in] x The value to be compared. -/// -/// @returns -/// Result of the greater than zero comparison. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxZeroOneIsGreaterThanZero(FfxFloat32x4 x) -{ - return ffxSaturate(x * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT)); -} - -/// Conditional free logic signed NOT operation using two FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the AND OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxFloat32 ffxZeroOneAnd(FfxFloat32 x) -{ - return FfxFloat32(1.0) - x; -} - -/// Conditional free logic signed NOT operation using two FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the AND OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxZeroOneAnd(FfxFloat32x2 x) -{ - return ffxBroadcast2(1.0) - x; -} - -/// Conditional free logic signed NOT operation using two FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the AND OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxZeroOneAnd(FfxFloat32x3 x) -{ - return ffxBroadcast3(1.0) - x; -} - -/// Conditional free logic signed NOT operation using two FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the AND OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxZeroOneAnd(FfxFloat32x4 x) -{ - return ffxBroadcast4(1.0) - x; -} - -/// Conditional free logic OR operation using two FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the OR operator. -/// @param [in] y The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the OR operation. -/// -/// @ingroup GPUCore -FfxFloat32 ffxZeroOneOr(FfxFloat32 x, FfxFloat32 y) -{ - return max(x, y); -} - -/// Conditional free logic OR operation using two FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the OR operator. -/// @param [in] y The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the OR operation. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxZeroOneOr(FfxFloat32x2 x, FfxFloat32x2 y) -{ - return max(x, y); -} - -/// Conditional free logic OR operation using two FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the OR operator. -/// @param [in] y The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the OR operation. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxZeroOneOr(FfxFloat32x3 x, FfxFloat32x3 y) -{ - return max(x, y); -} - -/// Conditional free logic OR operation using two FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the OR operator. -/// @param [in] y The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the OR operation. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxZeroOneOr(FfxFloat32x4 x, FfxFloat32x4 y) -{ - return max(x, y); -} - -/// Choose between two FfxFloat32 values if the first paramter is greater than zero. -/// -/// @param [in] x The value to compare against zero. -/// @param [in] y The value to return if the comparision is greater than zero. -/// @param [in] z The value to return if the comparision is less than or equal to zero. -/// -/// @returns -/// The selected value. -/// -/// @ingroup GPUCore -FfxFloat32 ffxZeroOneSelect(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) -{ - FfxFloat32 r = (-x) * z + z; - return x * y + r; -} - -/// Choose between two FfxFloat32 values if the first paramter is greater than zero. -/// -/// @param [in] x The value to compare against zero. -/// @param [in] y The value to return if the comparision is greater than zero. -/// @param [in] z The value to return if the comparision is less than or equal to zero. -/// -/// @returns -/// The selected value. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxZeroOneSelect(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) -{ - FfxFloat32x2 r = (-x) * z + z; - return x * y + r; -} - -/// Choose between two FfxFloat32 values if the first paramter is greater than zero. -/// -/// @param [in] x The value to compare against zero. -/// @param [in] y The value to return if the comparision is greater than zero. -/// @param [in] z The value to return if the comparision is less than or equal to zero. -/// -/// @returns -/// The selected value. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxZeroOneSelect(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) -{ - FfxFloat32x3 r = (-x) * z + z; - return x * y + r; -} - -/// Choose between two FfxFloat32 values if the first paramter is greater than zero. -/// -/// @param [in] x The value to compare against zero. -/// @param [in] y The value to return if the comparision is greater than zero. -/// @param [in] z The value to return if the comparision is less than or equal to zero. -/// -/// @returns -/// The selected value. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxZeroOneSelect(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) -{ - FfxFloat32x4 r = (-x) * z + z; - return x * y + r; -} - -/// Given a value, returns 1.0 if less than zero and 0.0 if not. -/// -/// @param [in] x The value to be compared. -/// -/// @returns -/// Result of the sign value. -/// -/// @ingroup GPUCore -FfxFloat32 ffxZeroOneIsSigned(FfxFloat32 x) -{ - return ffxSaturate(x * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT)); -} - -/// Given a value, returns 1.0 if less than zero and 0.0 if not. -/// -/// @param [in] x The value to be compared. -/// -/// @returns -/// Result of the sign value. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxZeroOneIsSigned(FfxFloat32x2 x) -{ - return ffxSaturate(x * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT)); -} - -/// Given a value, returns 1.0 if less than zero and 0.0 if not. -/// -/// @param [in] x The value to be compared. -/// -/// @returns -/// Result of the sign value. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxZeroOneIsSigned(FfxFloat32x3 x) -{ - return ffxSaturate(x * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT)); -} - -/// Given a value, returns 1.0 if less than zero and 0.0 if not. -/// -/// @param [in] x The value to be compared. -/// -/// @returns -/// Result of the sign value. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxZeroOneIsSigned(FfxFloat32x4 x) -{ - return ffxSaturate(x * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT)); -} - -/// Compute a Rec.709 color space. -/// -/// Rec.709 is used for some HDTVs. -/// -/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. -/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). -/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). -/// -/// @param [in] color The color to convert to Rec. 709. -/// -/// @returns -/// The color in linear space. -/// -/// @ingroup GPUCore -FfxFloat32 ffxRec709FromLinear(FfxFloat32 color) -{ - FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); - FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); - return clamp(j.x, color * j.y, pow(color, j.z) * k.x + k.y); -} - -/// Compute a Rec.709 color space. -/// -/// Rec.709 is used for some HDTVs. -/// -/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. -/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). -/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). -/// -/// @param [in] color The color to convert to Rec. 709. -/// -/// @returns -/// The color in linear space. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxRec709FromLinear(FfxFloat32x2 color) -{ - FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); - FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); - return clamp(j.xx, color * j.yy, pow(color, j.zz) * k.xx + k.yy); -} - -/// Compute a Rec.709 color space. -/// -/// Rec.709 is used for some HDTVs. -/// -/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. -/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). -/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). -/// -/// @param [in] color The color to convert to Rec. 709. -/// -/// @returns -/// The color in linear space. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxRec709FromLinear(FfxFloat32x3 color) -{ - FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); - FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); - return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy); -} - -/// Compute a gamma value from a linear value. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. -/// -/// @param [in] value The value to convert to gamma space from linear. -/// @param [in] power The reciprocal of power value used for the gamma curve. -/// -/// @returns -/// A value in gamma space. -/// -/// @ingroup GPUCore -FfxFloat32 ffxGammaFromLinear(FfxFloat32 value, FfxFloat32 power) -{ - return pow(value, FfxFloat32(power)); -} - -/// Compute a gamma value from a linear value. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. -/// -/// @param [in] value The value to convert to gamma space from linear. -/// @param [in] power The reciprocal of power value used for the gamma curve. -/// -/// @returns -/// A value in gamma space. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxGammaFromLinear(FfxFloat32x2 value, FfxFloat32 power) -{ - return pow(value, ffxBroadcast2(power)); -} - -/// Compute a gamma value from a linear value. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. -/// -/// @param [in] value The value to convert to gamma space from linear. -/// @param [in] power The reciprocal of power value used for the gamma curve. -/// -/// @returns -/// A value in gamma space. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxGammaFromLinear(FfxFloat32x3 value, FfxFloat32 power) -{ - return pow(value, ffxBroadcast3(power)); -} - -/// Compute a PQ value from a linear value. -/// -/// @param [in] value The value to convert to PQ from linear. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat32 ffxPQToLinear(FfxFloat32 value) -{ - FfxFloat32 p = pow(value, FfxFloat32(0.159302)); - return pow((FfxFloat32(0.835938) + FfxFloat32(18.8516) * p) / (FfxFloat32(1.0) + FfxFloat32(18.6875) * p), FfxFloat32(78.8438)); -} - -/// Compute a PQ value from a linear value. -/// -/// @param [in] value The value to convert to PQ from linear. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxPQToLinear(FfxFloat32x2 value) -{ - FfxFloat32x2 p = pow(value, ffxBroadcast2(0.159302)); - return pow((ffxBroadcast2(0.835938) + ffxBroadcast2(18.8516) * p) / (ffxBroadcast2(1.0) + ffxBroadcast2(18.6875) * p), ffxBroadcast2(78.8438)); -} - -/// Compute a PQ value from a linear value. -/// -/// @param [in] value The value to convert to PQ from linear. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxPQToLinear(FfxFloat32x3 value) -{ - FfxFloat32x3 p = pow(value, ffxBroadcast3(0.159302)); - return pow((ffxBroadcast3(0.835938) + ffxBroadcast3(18.8516) * p) / (ffxBroadcast3(1.0) + ffxBroadcast3(18.6875) * p), ffxBroadcast3(78.8438)); -} - -/// Compute a linear value from a SRGB value. -/// -/// @param [in] value The value to convert to linear from SRGB. -/// -/// @returns -/// A value in SRGB space. -/// -/// @ingroup GPUCore -FfxFloat32 ffxSrgbToLinear(FfxFloat32 value) -{ - FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); - FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); - return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y); -} - -/// Compute a linear value from a SRGB value. -/// -/// @param [in] value The value to convert to linear from SRGB. -/// -/// @returns -/// A value in SRGB space. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxSrgbToLinear(FfxFloat32x2 value) -{ - FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); - FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); - return clamp(j.xx, value * j.yy, pow(value, j.zz) * k.xx + k.yy); -} - -/// Compute a linear value from a SRGB value. -/// -/// @param [in] value The value to convert to linear from SRGB. -/// -/// @returns -/// A value in SRGB space. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxSrgbToLinear(FfxFloat32x3 value) -{ - FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); - FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); - return clamp(j.xxx, value * j.yyy, pow(value, j.zzz) * k.xxx + k.yyy); -} - -/// Compute a linear value from a REC.709 value. -/// -/// @param [in] color The value to convert to linear from REC.709. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat32 ffxLinearFromRec709(FfxFloat32 color) -{ - FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); - FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); - return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z)); -} - -/// Compute a linear value from a REC.709 value. -/// -/// @param [in] color The value to convert to linear from REC.709. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxLinearFromRec709(FfxFloat32x2 color) -{ - FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); - FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); - return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz)); -} - -/// Compute a linear value from a REC.709 value. -/// -/// @param [in] color The value to convert to linear from REC.709. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxLinearFromRec709(FfxFloat32x3 color) -{ - FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); - FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); - return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz)); -} - -/// Compute a linear value from a value in a gamma space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] color The value to convert to linear in gamma space. -/// @param [in] power The power value used for the gamma curve. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat32 ffxLinearFromGamma(FfxFloat32 color, FfxFloat32 power) -{ - return pow(color, FfxFloat32(power)); -} - -/// Compute a linear value from a value in a gamma space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] color The value to convert to linear in gamma space. -/// @param [in] power The power value used for the gamma curve. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxLinearFromGamma(FfxFloat32x2 color, FfxFloat32 power) -{ - return pow(color, ffxBroadcast2(power)); -} - -/// Compute a linear value from a value in a gamma space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] color The value to convert to linear in gamma space. -/// @param [in] power The power value used for the gamma curve. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxLinearFromGamma(FfxFloat32x3 color, FfxFloat32 power) -{ - return pow(color, ffxBroadcast3(power)); -} - -/// Compute a linear value from a value in a PQ space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] value The value to convert to linear in PQ space. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat32 ffxLinearFromPQ(FfxFloat32 value) -{ - FfxFloat32 p = pow(value, FfxFloat32(0.0126833)); - return pow(ffxSaturate(p - FfxFloat32(0.835938)) / (FfxFloat32(18.8516) - FfxFloat32(18.6875) * p), FfxFloat32(6.27739)); -} - -/// Compute a linear value from a value in a PQ space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] value The value to convert to linear in PQ space. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxLinearFromPQ(FfxFloat32x2 value) -{ - FfxFloat32x2 p = pow(value, ffxBroadcast2(0.0126833)); - return pow(ffxSaturate(p - ffxBroadcast2(0.835938)) / (ffxBroadcast2(18.8516) - ffxBroadcast2(18.6875) * p), ffxBroadcast2(6.27739)); -} - -/// Compute a linear value from a value in a PQ space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] value The value to convert to linear in PQ space. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 value) -{ - FfxFloat32x3 p = pow(value, ffxBroadcast3(0.0126833)); - return pow(ffxSaturate(p - ffxBroadcast3(0.835938)) / (ffxBroadcast3(18.8516) - ffxBroadcast3(18.6875) * p), ffxBroadcast3(6.27739)); -} - -/// Compute a linear value from a value in a SRGB space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] value The value to convert to linear in SRGB space. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value) -{ - FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); - FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); - return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.x), value * j.y, pow(value * k.x + k.y, j.z)); -} - -/// Compute a linear value from a value in a SRGB space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] value The value to convert to linear in SRGB space. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value) -{ - FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); - FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); - return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xx), value * j.yy, pow(value * k.xx + k.yy, j.zz)); -} - -/// Compute a linear value from a value in a SRGB space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] value The value to convert to linear in SRGB space. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 value) -{ - FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); - FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); - return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xxx), value * j.yyy, pow(value * k.xxx + k.yyy, j.zzz)); -} - -/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear. -/// -/// 543210 -/// ====== -/// ..xxx. -/// yy...y -/// -/// @param [in] a The input 1D coordinates to remap. -/// -/// @returns -/// The remapped 2D coordinates. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxRemapForQuad(FfxUInt32 a) -{ - return FfxUInt32x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u)); -} - -/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. -/// -/// The 64-wide lane indices to 8x8 remapping is performed as follows: -/// -/// 00 01 08 09 10 11 18 19 -/// 02 03 0a 0b 12 13 1a 1b -/// 04 05 0c 0d 14 15 1c 1d -/// 06 07 0e 0f 16 17 1e 1f -/// 20 21 28 29 30 31 38 39 -/// 22 23 2a 2b 32 33 3a 3b -/// 24 25 2c 2d 34 35 3c 3d -/// 26 27 2e 2f 36 37 3e 3f -/// -/// @param [in] a The input 1D coordinate to remap. -/// -/// @returns -/// The remapped 2D coordinates. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxRemapForWaveReduction(FfxUInt32 a) -{ - return FfxUInt32x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u)); -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h.meta deleted file mode 100644 index ec380d2..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: 05b921699d1374a429e32afca13137e2 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h deleted file mode 100644 index 4c73daf..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h +++ /dev/null @@ -1,2979 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#if FFX_HALF -#if FFX_HLSL_SM >= 62 -/// A define value for 16bit positive infinity. -/// -/// @ingroup GPUCore -#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0x7c00u) - -/// A define value for 16bit negative infinity. -/// -/// @ingroup GPUCore -#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0xfc00u) -#else -/// A define value for 16bit positive infinity. -/// -/// @ingroup GPUCore -#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16(0x7c00u) - -/// A define value for 16bit negative infinity. -/// -/// @ingroup GPUCore -#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16(0xfc00u) -#endif // #if FFX_HLSL_SM>=62 - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxFloat16 ffxMin(FfxFloat16 x, FfxFloat16 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxMin(FfxFloat16x2 x, FfxFloat16x2 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxMin(FfxFloat16x3 x, FfxFloat16x3 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxMin(FfxFloat16x4 x, FfxFloat16x4 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxInt16 ffxMin(FfxInt16 x, FfxInt16 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxInt16x2 ffxMin(FfxInt16x2 x, FfxInt16x2 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxInt16x3 ffxMin(FfxInt16x3 x, FfxInt16x3 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxInt16x4 ffxMin(FfxInt16x4 x, FfxInt16x4 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxUInt16 ffxMin(FfxUInt16 x, FfxUInt16 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxUInt16x2 ffxMin(FfxUInt16x2 x, FfxUInt16x2 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxUInt16x3 ffxMin(FfxUInt16x3 x, FfxUInt16x3 y) -{ - return min(x, y); -} - -/// Compute the min of two values. -/// -/// @param [in] x The first value to compute the min of. -/// @param [in] y The second value to compute the min of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxUInt16x4 ffxMin(FfxUInt16x4 x, FfxUInt16x4 y) -{ - return min(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxFloat16 ffxMax(FfxFloat16 x, FfxFloat16 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxMax(FfxFloat16x2 x, FfxFloat16x2 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxMax(FfxFloat16x3 x, FfxFloat16x3 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxMax(FfxFloat16x4 x, FfxFloat16x4 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxInt16 ffxMax(FfxInt16 x, FfxInt16 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxInt16x2 ffxMax(FfxInt16x2 x, FfxInt16x2 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxInt16x3 ffxMax(FfxInt16x3 x, FfxInt16x3 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxInt16x4 ffxMax(FfxInt16x4 x, FfxInt16x4 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxUInt16 ffxMax(FfxUInt16 x, FfxUInt16 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxUInt16x2 ffxMax(FfxUInt16x2 x, FfxUInt16x2 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxUInt16x3 ffxMax(FfxUInt16x3 x, FfxUInt16x3 y) -{ - return max(x, y); -} - -/// Compute the max of two values. -/// -/// @param [in] x The first value to compute the max of. -/// @param [in] y The second value to compute the max of. -/// -/// @returns -/// The the lowest of two values. -/// -/// @ingroup GPUCore -FfxUInt16x4 ffxMax(FfxUInt16x4 x, FfxUInt16x4 y) -{ - return max(x, y); -} - -/// Compute the value of the first parameter raised to the power of the second. -/// -/// @param [in] x The value to raise to the power y. -/// @param [in] y The power to which to raise x. -/// -/// @returns -/// The value of the first parameter raised to the power of the second. -/// -/// @ingroup GPUCore -FfxFloat16 ffxPow(FfxFloat16 x, FfxFloat16 y) -{ - return pow(x, y); -} - -/// Compute the value of the first parameter raised to the power of the second. -/// -/// @param [in] x The value to raise to the power y. -/// @param [in] y The power to which to raise x. -/// -/// @returns -/// The value of the first parameter raised to the power of the second. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxPow(FfxFloat16x2 x, FfxFloat16x2 y) -{ - return pow(x, y); -} - -/// Compute the value of the first parameter raised to the power of the second. -/// -/// @param [in] x The value to raise to the power y. -/// @param [in] y The power to which to raise x. -/// -/// @returns -/// The value of the first parameter raised to the power of the second. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxPow(FfxFloat16x3 x, FfxFloat16x3 y) -{ - return pow(x, y); -} - -/// Compute the value of the first parameter raised to the power of the second. -/// -/// @param [in] x The value to raise to the power y. -/// @param [in] y The power to which to raise x. -/// -/// @returns -/// The value of the first parameter raised to the power of the second. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxPow(FfxFloat16x4 x, FfxFloat16x4 y) -{ - return pow(x, y); -} - -/// Compute the square root of a value. -/// -/// @param [in] x The first value to compute the min of. -/// -/// @returns -/// The the square root of x. -/// -/// @ingroup GPUCore -FfxFloat16 ffxSqrt(FfxFloat16 x) -{ - return sqrt(x); -} - -/// Compute the square root of a value. -/// -/// @param [in] x The first value to compute the min of. -/// -/// @returns -/// The the square root of x. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxSqrt(FfxFloat16x2 x) -{ - return sqrt(x); -} - -/// Compute the square root of a value. -/// -/// @param [in] x The first value to compute the min of. -/// -/// @returns -/// The the square root of x. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxSqrt(FfxFloat16x3 x) -{ - return sqrt(x); -} - -/// Compute the square root of a value. -/// -/// @param [in] x The first value to compute the min of. -/// -/// @returns -/// The the square root of x. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxSqrt(FfxFloat16x4 x) -{ - return sqrt(x); -} - -/// Copy the sign bit from 's' to positive 'd'. -/// -/// @param [in] d The value to copy the sign bit into. -/// @param [in] s The value to copy the sign bit from. -/// -/// @returns -/// The value of d with the sign bit from s. -/// -/// @ingroup GPUCore -FfxFloat16 ffxCopySignBitHalf(FfxFloat16 d, FfxFloat16 s) -{ - return FFX_TO_FLOAT16(FFX_TO_UINT16(d) | (FFX_TO_UINT16(s) & FFX_BROADCAST_UINT16(0x8000u))); -} - -/// Copy the sign bit from 's' to positive 'd'. -/// -/// @param [in] d The value to copy the sign bit into. -/// @param [in] s The value to copy the sign bit from. -/// -/// @returns -/// The value of d with the sign bit from s. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxCopySignBitHalf(FfxFloat16x2 d, FfxFloat16x2 s) -{ - return FFX_TO_FLOAT16X2(FFX_TO_UINT16X2(d) | (FFX_TO_UINT16X2(s) & FFX_BROADCAST_UINT16X2(0x8000u))); -} - -/// Copy the sign bit from 's' to positive 'd'. -/// -/// @param [in] d The value to copy the sign bit into. -/// @param [in] s The value to copy the sign bit from. -/// -/// @returns -/// The value of d with the sign bit from s. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxCopySignBitHalf(FfxFloat16x3 d, FfxFloat16x3 s) -{ - return FFX_TO_FLOAT16X3(FFX_TO_UINT16X3(d) | (FFX_TO_UINT16X3(s) & FFX_BROADCAST_UINT16X3(0x8000u))); -} - -/// Copy the sign bit from 's' to positive 'd'. -/// -/// @param [in] d The value to copy the sign bit into. -/// @param [in] s The value to copy the sign bit from. -/// -/// @returns -/// The value of d with the sign bit from s. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxCopySignBitHalf(FfxFloat16x4 d, FfxFloat16x4 s) -{ - return FFX_TO_FLOAT16X4(FFX_TO_UINT16X4(d) | (FFX_TO_UINT16X4(s) & FFX_BROADCAST_UINT16X4(0x8000u))); -} - -/// A single operation to return the following: -/// m = NaN := 0 -/// m >= 0 := 0 -/// m < 0 := 1 -/// -/// Uses the following useful floating point logic, -/// saturate(+a*(-INF)==-INF) := 0 -/// saturate( 0*(-INF)== NaN) := 0 -/// saturate(-a*(-INF)==+INF) := 1 -/// -/// This function is useful when creating masks for branch-free logic. -/// -/// @param [in] m The value to test against 0. -/// -/// @returns -/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. -/// -/// @ingroup GPUCore -FfxFloat16 ffxIsSignedHalf(FfxFloat16 m) -{ - return FfxFloat16(ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF))); -} - -/// A single operation to return the following: -/// m = NaN := 0 -/// m >= 0 := 0 -/// m < 0 := 1 -/// -/// Uses the following useful floating point logic, -/// saturate(+a*(-INF)==-INF) := 0 -/// saturate( 0*(-INF)== NaN) := 0 -/// saturate(-a*(-INF)==+INF) := 1 -/// -/// This function is useful when creating masks for branch-free logic. -/// -/// @param [in] m The value to test against 0. -/// -/// @returns -/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxIsSignedHalf(FfxFloat16x2 m) -{ - return FfxFloat16x2(ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF))); -} - -/// A single operation to return the following: -/// m = NaN := 0 -/// m >= 0 := 0 -/// m < 0 := 1 -/// -/// Uses the following useful floating point logic, -/// saturate(+a*(-INF)==-INF) := 0 -/// saturate( 0*(-INF)== NaN) := 0 -/// saturate(-a*(-INF)==+INF) := 1 -/// -/// This function is useful when creating masks for branch-free logic. -/// -/// @param [in] m The value to test against 0. -/// -/// @returns -/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxIsSignedHalf(FfxFloat16x3 m) -{ - return FfxFloat16x3(ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF))); -} - -/// A single operation to return the following: -/// m = NaN := 0 -/// m >= 0 := 0 -/// m < 0 := 1 -/// -/// Uses the following useful floating point logic, -/// saturate(+a*(-INF)==-INF) := 0 -/// saturate( 0*(-INF)== NaN) := 0 -/// saturate(-a*(-INF)==+INF) := 1 -/// -/// This function is useful when creating masks for branch-free logic. -/// -/// @param [in] m The value to test against 0. -/// -/// @returns -/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxIsSignedHalf(FfxFloat16x4 m) -{ - return FfxFloat16x4(ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF))); -} - -/// A single operation to return the following: -/// m = NaN := 1 -/// m > 0 := 0 -/// m <= 0 := 1 -/// -/// This function is useful when creating masks for branch-free logic. -/// -/// @param [in] m The value to test against zero. -/// -/// @returns -/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. -/// -/// @ingroup GPUCore -FfxFloat16 ffxIsGreaterThanZeroHalf(FfxFloat16 m) -{ - return FfxFloat16(ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF))); -} - -/// A single operation to return the following: -/// m = NaN := 1 -/// m > 0 := 0 -/// m <= 0 := 1 -/// -/// This function is useful when creating masks for branch-free logic. -/// -/// @param [in] m The value to test against zero. -/// -/// @returns -/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxIsGreaterThanZeroHalf(FfxFloat16x2 m) -{ - return FfxFloat16x2(ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF))); -} - -/// A single operation to return the following: -/// m = NaN := 1 -/// m > 0 := 0 -/// m <= 0 := 1 -/// -/// This function is useful when creating masks for branch-free logic. -/// -/// @param [in] m The value to test against zero. -/// -/// @returns -/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxIsGreaterThanZeroHalf(FfxFloat16x3 m) -{ - return FfxFloat16x3(ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF))); -} - -/// A single operation to return the following: -/// m = NaN := 1 -/// m > 0 := 0 -/// m <= 0 := 1 -/// -/// This function is useful when creating masks for branch-free logic. -/// -/// @param [in] m The value to test against zero. -/// -/// @returns -/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxIsGreaterThanZeroHalf(FfxFloat16x4 m) -{ - return FfxFloat16x4(ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF))); -} - -/// Convert a 16bit floating point value to sortable integer. -/// -/// - If sign bit=0, flip the sign bit (positives). -/// - If sign bit=1, flip all bits (negatives). -/// -/// The function has the side effects that: -/// - Larger integers are more positive values. -/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). -/// -/// @param [in] x The floating point value to make sortable. -/// -/// @returns -/// The sortable integer value. -/// -/// @ingroup GPUCore -FfxUInt16 ffxFloatToSortableIntegerHalf(FfxUInt16 x) -{ - return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000)); -} - -/// Convert a sortable integer to a 16bit floating point value. -/// -/// The function has the side effects that: -/// - If sign bit=1, flip the sign bit (positives). -/// - If sign bit=0, flip all bits (negatives). -/// -/// @param [in] x The sortable integer value to make floating point. -/// -/// @returns -/// The floating point value. -/// -/// @ingroup GPUCore -FfxUInt16 ffxSortableIntegerToFloatHalf(FfxUInt16 x) -{ - return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000)); -} - -/// Convert a pair of 16bit floating point values to a pair of sortable integers. -/// -/// - If sign bit=0, flip the sign bit (positives). -/// - If sign bit=1, flip all bits (negatives). -/// -/// The function has the side effects that: -/// - Larger integers are more positive values. -/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). -/// -/// @param [in] x The floating point values to make sortable. -/// -/// @returns -/// The sortable integer values. -/// -/// @ingroup GPUCore -FfxUInt16x2 ffxFloatToSortableIntegerHalf(FfxUInt16x2 x) -{ - return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000)); -} - -/// Convert a pair of sortable integers to a pair of 16bit floating point values. -/// -/// The function has the side effects that: -/// - If sign bit=1, flip the sign bit (positives). -/// - If sign bit=0, flip all bits (negatives). -/// -/// @param [in] x The sortable integer values to make floating point. -/// -/// @returns -/// The floating point values. -/// -/// @ingroup GPUCore -FfxUInt16x2 ffxSortableIntegerToFloatHalf(FfxUInt16x2 x) -{ - return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000)); -} - -/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. -/// -/// The resulting integer will contain bytes in the following order, from most to least significant: -/// [Zero] Y0 [Zero] X0 -/// -/// @param [in] i The integer pair to pack. -/// -/// @returns -/// The packed integer value. -/// -/// @ingroup GPUCore -FfxUInt32 ffxPackBytesZeroY0ZeroX0(FfxUInt32x2 i) -{ - return ((i.x) & 0xffu) | ((i.y << 16) & 0xff0000u); -} - -/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. -/// -/// The resulting integer will contain bytes in the following order, from most to least significant: -/// [Zero] Y1 [Zero] X1 -/// -/// @param [in] i The integer pair to pack. -/// -/// @returns -/// The packed integer value. -/// -/// @ingroup GPUCore -FfxUInt32 ffxPackBytesZeroY1ZeroX1(FfxUInt32x2 i) -{ - return ((i.x >> 8) & 0xffu) | ((i.y << 8) & 0xff0000u); -} - -/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. -/// -/// The resulting integer will contain bytes in the following order, from most to least significant: -/// [Zero] Y2 [Zero] X2 -/// -/// @param [in] i The integer pair to pack. -/// -/// @returns -/// The packed integer value. -/// -/// @ingroup GPUCore -FfxUInt32 ffxPackBytesZeroY2ZeroX2(FfxUInt32x2 i) -{ - return ((i.x >> 16) & 0xffu) | ((i.y) & 0xff0000u); -} - -/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. -/// -/// The resulting integer will contain bytes in the following order, from most to least significant: -/// [Zero] Y3 [Zero] X3 -/// -/// @param [in] i The integer pair to pack. -/// -/// @returns -/// The packed integer value. -/// -/// @ingroup GPUCore -FfxUInt32 ffxPackBytesZeroY3ZeroX3(FfxUInt32x2 i) -{ - return ((i.x >> 24) & 0xffu) | ((i.y >> 8) & 0xff0000u); -} - -/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. -/// -/// The resulting integer will contain bytes in the following order, from most to least significant: -/// Y3 Y2 Y1 X0 -/// -/// @param [in] i The integer pair to pack. -/// -/// @returns -/// The packed integer value. -/// -/// @ingroup GPUCore -FfxUInt32 ffxPackBytesY3Y2Y1X0(FfxUInt32x2 i) -{ - return ((i.x) & 0x000000ffu) | (i.y & 0xffffff00u); -} - -/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. -/// -/// The resulting integer will contain bytes in the following order, from most to least significant: -/// Y3 Y2 Y1 X2 -/// -/// @param [in] i The integer pair to pack. -/// -/// @returns -/// The packed integer value. -/// -/// @ingroup GPUCore -FfxUInt32 ffxPackBytesY3Y2Y1X2(FfxUInt32x2 i) -{ - return ((i.x >> 16) & 0x000000ffu) | (i.y & 0xffffff00u); -} - -/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. -/// -/// The resulting integer will contain bytes in the following order, from most to least significant: -/// Y3 Y2 X0 Y0 -/// -/// @param [in] i The integer pair to pack. -/// -/// @returns -/// The packed integer value. -/// -/// @ingroup GPUCore -FfxUInt32 ffxPackBytesY3Y2X0Y0(FfxUInt32x2 i) -{ - return ((i.x << 8) & 0x0000ff00u) | (i.y & 0xffff00ffu); -} - -/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. -/// -/// The resulting integer will contain bytes in the following order, from most to least significant: -/// Y3 Y2 X2 Y0 -/// -/// @param [in] i The integer pair to pack. -/// -/// @returns -/// The packed integer value. -/// -/// @ingroup GPUCore -FfxUInt32 ffxPackBytesY3Y2X2Y0(FfxUInt32x2 i) -{ - return ((i.x >> 8) & 0x0000ff00u) | (i.y & 0xffff00ffu); -} - -/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. -/// -/// The resulting integer will contain bytes in the following order, from most to least significant: -/// Y3 X0 Y1 Y0 -/// -/// @param [in] i The integer pair to pack. -/// -/// @returns -/// The packed integer value. -/// -/// @ingroup GPUCore -FfxUInt32 ffxPackBytesY3X0Y1Y0(FfxUInt32x2 i) -{ - return ((i.x << 16) & 0x00ff0000u) | (i.y & 0xff00ffffu); -} - -/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. -/// -/// The resulting integer will contain bytes in the following order, from most to least significant: -/// Y3 X2 Y1 Y0 -/// -/// @param [in] i The integer pair to pack. -/// -/// @returns -/// The packed integer value. -/// -/// @ingroup GPUCore -FfxUInt32 ffxPackBytesY3X2Y1Y0(FfxUInt32x2 i) -{ - return ((i.x) & 0x00ff0000u) | (i.y & 0xff00ffffu); -} - -/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. -/// -/// The resulting integer will contain bytes in the following order, from most to least significant: -/// X0 Y2 Y1 Y0 -/// -/// @param [in] i The integer pair to pack. -/// -/// @returns -/// The packed integer value. -/// -/// @ingroup GPUCore -FfxUInt32 ffxPackBytesX0Y2Y1Y0(FfxUInt32x2 i) -{ - return ((i.x << 24) & 0xff000000u) | (i.y & 0x00ffffffu); -} - -/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. -/// -/// The resulting integer will contain bytes in the following order, from most to least significant: -/// X2 Y2 Y1 Y0 -/// -/// @param [in] i The integer pair to pack. -/// -/// @returns -/// The packed integer value. -/// -/// @ingroup GPUCore -FfxUInt32 ffxPackBytesX2Y2Y1Y0(FfxUInt32x2 i) -{ - return ((i.x << 8) & 0xff000000u) | (i.y & 0x00ffffffu); -} - -/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. -/// -/// The resulting integer will contain bytes in the following order, from most to least significant: -/// Y2 X2 Y0 X0 -/// -/// @param [in] i The integer pair to pack. -/// -/// @returns -/// The packed integer value. -/// -/// @ingroup GPUCore -FfxUInt32 ffxPackBytesY2X2Y0X0(FfxUInt32x2 i) -{ - return ((i.x) & 0x00ff00ffu) | ((i.y << 8) & 0xff00ff00u); -} - -/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. -/// -/// The resulting integer will contain bytes in the following order, from most to least significant: -/// Y2 Y0 X2 X0 -/// -/// @param [in] i The integer pair to pack. -/// -/// @returns -/// The packed integer value. -/// -/// @ingroup GPUCore -FfxUInt32 ffxPackBytesY2Y0X2X0(FfxUInt32x2 i) -{ - return (((i.x) & 0xffu) | ((i.x >> 8) & 0xff00u) | ((i.y << 16) & 0xff0000u) | ((i.y << 8) & 0xff000000u)); -} - -/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}. -/// -/// @param [in] x The first float16x2 value to pack. -/// @param [in] y The second float16x2 value to pack. -/// -/// @returns -/// The packed FfxUInt32x2 value. -/// -/// @ingroup GPUCore -FfxUInt16x2 ffxPackX0Y0X1Y1UnsignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y) -{ - x *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0); - y *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0); - return FFX_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(x)), FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(y))))); -} - -/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], -/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. -/// -/// r=ffxPermuteUByte0Float16x2ToUint2(d,i) -/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits -/// Where 'k1' is an SGPR with 0x???? -/// Where 'k2' is an SGPR with 0x???? -/// V_PK_FMA_F16 i,i,k0.x,0 -/// V_PERM_B32 r.x,i,i,k1 -/// V_PERM_B32 r.y,i,i,k2 -/// -/// @param [in] d The FfxUInt32x2 value to be packed. -/// @param [in] i The FfxFloat16x2 value to be packed. -/// -/// @returns -/// The packed FfxUInt32x2 value. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxPermuteUByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) -{ - FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); - return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); -} - -/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], -/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. -/// -/// r=ffxPermuteUByte1Float16x2ToUint2(d,i) -/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits -/// Where 'k1' is an SGPR with 0x???? -/// Where 'k2' is an SGPR with 0x???? -/// V_PK_FMA_F16 i,i,k0.x,0 -/// V_PERM_B32 r.x,i,i,k1 -/// V_PERM_B32 r.y,i,i,k2 -/// -/// @param [in] d The FfxUInt32x2 value to be packed. -/// @param [in] i The FfxFloat16x2 value to be packed. -/// -/// @returns -/// The packed FfxUInt32x2 value. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxPermuteUByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) -{ - FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); - return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); -} - -/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], -/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. -/// -/// r=ffxPermuteUByte2Float16x2ToUint2(d,i) -/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits -/// Where 'k1' is an SGPR with 0x???? -/// Where 'k2' is an SGPR with 0x???? -/// V_PK_FMA_F16 i,i,k0.x,0 -/// V_PERM_B32 r.x,i,i,k1 -/// V_PERM_B32 r.y,i,i,k2 -/// -/// @param [in] d The FfxUInt32x2 value to be packed. -/// @param [in] i The FfxFloat16x2 value to be packed. -/// -/// @returns -/// The packed FfxUInt32x2 value. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxPermuteUByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) -{ - FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); - return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); -} - -/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], -/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. -/// -/// r=ffxPermuteUByte3Float16x2ToUint2(d,i) -/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits -/// Where 'k1' is an SGPR with 0x???? -/// Where 'k2' is an SGPR with 0x???? -/// V_PK_FMA_F16 i,i,k0.x,0 -/// V_PERM_B32 r.x,i,i,k1 -/// V_PERM_B32 r.y,i,i,k2 -/// -/// @param [in] d The FfxUInt32x2 value to be packed. -/// @param [in] i The FfxFloat16x2 value to be packed. -/// -/// @returns -/// The packed FfxUInt32x2 value. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxPermuteUByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) -{ - FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); - return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); -} - -/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. -/// -/// @param [in] i The FfxUInt32x2 value to be unpacked. -/// -/// @returns -/// The unpacked FfxFloat16x2. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxPermuteUByte0Uint2ToFloat16x2(FfxUInt32x2 i) -{ - return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); -} - -/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. -/// -/// @param [in] i The FfxUInt32x2 value to be unpacked. -/// -/// @returns -/// The unpacked FfxFloat16x2. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxPermuteUByte1Uint2ToFloat16x2(FfxUInt32x2 i) -{ - return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); -} - -/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. -/// -/// @param [in] i The FfxUInt32x2 value to be unpacked. -/// -/// @returns -/// The unpacked FfxFloat16x2. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxPermuteUByte2Uint2ToFloat16x2(FfxUInt32x2 i) -{ - return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); -} - -/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. -/// -/// @param [in] i The FfxUInt32x2 value to be unpacked. -/// -/// @returns -/// The unpacked FfxFloat16x2. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxPermuteUByte3Uint2ToFloat16x2(FfxUInt32x2 i) -{ - return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); -} - -/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}. -/// -/// @param [in] x The first float16x2 value to pack. -/// @param [in] y The second float16x2 value to pack. -/// -/// @returns -/// The packed FfxUInt32x2 value. -/// -/// @ingroup GPUCore -FfxUInt16x2 ffxPackX0Y0X1Y1SignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y) -{ - x = x * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0); - y = y * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0); - return FFX_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(x)), FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(y))))); -} - -/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], -/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. -/// -/// Handles signed byte values. -/// -/// @param [in] d The FfxUInt32x2 value to be packed. -/// @param [in] i The FfxFloat16x2 value to be packed. -/// -/// @returns -/// The packed FfxUInt32x2 value. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxPermuteSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) -{ - FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); - return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); -} - -/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], -/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. -/// -/// Handles signed byte values. -/// -/// @param [in] d The FfxUInt32x2 value to be packed. -/// @param [in] i The FfxFloat16x2 value to be packed. -/// -/// @returns -/// The packed FfxUInt32x2 value. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxPermuteSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) -{ - FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); - return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); -} - -/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], -/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. -/// -/// Handles signed byte values. -/// -/// @param [in] d The FfxUInt32x2 value to be packed. -/// @param [in] i The FfxFloat16x2 value to be packed. -/// -/// @returns -/// The packed FfxUInt32x2 value. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxPermuteSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) -{ - FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); - return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); -} - -/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], -/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. -/// -/// Handles signed byte values. -/// -/// @param [in] d The FfxUInt32x2 value to be packed. -/// @param [in] i The FfxFloat16x2 value to be packed. -/// -/// @returns -/// The packed FfxUInt32x2 value. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxPermuteSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) -{ - FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); - return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); -} - -/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], -/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. -/// -/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). -/// This is useful if there is a desire for cleared values to decode as zero. -/// -/// Handles signed byte values. -/// -/// @param [in] d The FfxUInt32x2 value to be packed. -/// @param [in] i The FfxFloat16x2 value to be packed. -/// -/// @returns -/// The packed FfxUInt32x2 value. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxPermuteZeroBasedSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) -{ - FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; - return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); -} - -/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], -/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. -/// -/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). -/// This is useful if there is a desire for cleared values to decode as zero. -/// -/// Handles signed byte values. -/// -/// @param [in] d The FfxUInt32x2 value to be packed. -/// @param [in] i The FfxFloat16x2 value to be packed. -/// -/// @returns -/// The packed FfxUInt32x2 value. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxPermuteZeroBasedSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) -{ - FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; - return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); -} - -/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], -/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. -/// -/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). -/// This is useful if there is a desire for cleared values to decode as zero. -/// -/// Handles signed byte values. -/// -/// @param [in] d The FfxUInt32x2 value to be packed. -/// @param [in] i The FfxFloat16x2 value to be packed. -/// -/// @returns -/// The packed FfxUInt32x2 value. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxPermuteZeroBasedSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) -{ - FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; - return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); -} - -/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], -/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. -/// -/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). -/// This is useful if there is a desire for cleared values to decode as zero. -/// -/// Handles signed byte values. -/// -/// @param [in] d The FfxUInt32x2 value to be packed. -/// @param [in] i The FfxFloat16x2 value to be packed. -/// -/// @returns -/// The packed FfxUInt32x2 value. -/// -/// @ingroup GPUCore -FfxUInt32x2 ffxPermuteZeroBasedSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) -{ - FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; - return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); -} - -/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. -/// -/// Handles signed byte values. -/// -/// @param [in] i The FfxUInt32x2 value to be unpacked. -/// -/// @returns -/// The unpacked FfxFloat16x2. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxPermuteSByte0Uint2ToFloat16x2(FfxUInt32x2 i) -{ - return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); -} - -/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. -/// -/// Handles signed byte values. -/// -/// @param [in] i The FfxUInt32x2 value to be unpacked. -/// -/// @returns -/// The unpacked FfxFloat16x2. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxPermuteSByte1Uint2ToFloat16x2(FfxUInt32x2 i) -{ - return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); -} - -/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. -/// -/// Handles signed byte values. -/// -/// @param [in] i The FfxUInt32x2 value to be unpacked. -/// -/// @returns -/// The unpacked FfxFloat16x2. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxPermuteSByte2Uint2ToFloat16x2(FfxUInt32x2 i) -{ - return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); -} - -/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. -/// -/// Handles signed byte values. -/// -/// @param [in] i The FfxUInt32x2 value to be unpacked. -/// -/// @returns -/// The unpacked FfxFloat16x2. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxPermuteSByte3Uint2ToFloat16x2(FfxUInt32x2 i) -{ - return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); -} - -/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. -/// -/// Handles signed byte values. -/// -/// @param [in] i The FfxUInt32x2 value to be unpacked. -/// -/// @returns -/// The unpacked FfxFloat16x2. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxPermuteZeroBasedSByte0Uint2ToFloat16x2(FfxUInt32x2 i) -{ - return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); -} - -/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. -/// -/// Handles signed byte values. -/// -/// @param [in] i The FfxUInt32x2 value to be unpacked. -/// -/// @returns -/// The unpacked FfxFloat16x2. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxPermuteZeroBasedSByte1Uint2ToFloat16x2(FfxUInt32x2 i) -{ - return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); -} - -/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. -/// -/// Handles signed byte values. -/// -/// @param [in] i The FfxUInt32x2 value to be unpacked. -/// -/// @returns -/// The unpacked FfxFloat16x2. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxPermuteZeroBasedSByte2Uint2ToFloat16x2(FfxUInt32x2 i) -{ - return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); -} - -/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. -/// -/// Handles signed byte values. -/// -/// @param [in] i The FfxUInt32x2 value to be unpacked. -/// -/// @returns -/// The unpacked FfxFloat16x2. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxPermuteZeroBasedSByte3Uint2ToFloat16x2(FfxUInt32x2 i) -{ - return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); -} - -/// Calculate a half-precision low-quality approximation for the square root of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] a The value to calculate an approximate to the square root for. -/// -/// @returns -/// An approximation of the square root, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat16 ffxApproximateSqrtHalf(FfxFloat16 a) -{ - return FFX_TO_FLOAT16((FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1)) + FFX_BROADCAST_UINT16(0x1de2)); -} - -/// Calculate a half-precision low-quality approximation for the square root of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] a The value to calculate an approximate to the square root for. -/// -/// @returns -/// An approximation of the square root, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxApproximateSqrtHalf(FfxFloat16x2 a) -{ - return FFX_TO_FLOAT16X2((FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1)) + FFX_BROADCAST_UINT16X2(0x1de2)); -} - -/// Calculate a half-precision low-quality approximation for the square root of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] a The value to calculate an approximate to the square root for. -/// -/// @returns -/// An approximation of the square root, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxApproximateSqrtHalf(FfxFloat16x3 a) -{ - return FFX_TO_FLOAT16X3((FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1)) + FFX_BROADCAST_UINT16X3(0x1de2)); -} - -/// Calculate a half-precision low-quality approximation for the reciprocal of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] a The value to calculate an approximate to the reciprocal for. -/// -/// @returns -/// An approximation of the reciprocal, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat16 ffxApproximateReciprocalHalf(FfxFloat16 a) -{ - return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x7784) - FFX_TO_UINT16(a)); -} - -/// Calculate a half-precision low-quality approximation for the reciprocal of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] a The value to calculate an approximate to the reciprocal for. -/// -/// @returns -/// An approximation of the reciprocal, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxApproximateReciprocalHalf(FfxFloat16x2 a) -{ - return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x7784) - FFX_TO_UINT16X2(a)); -} - -/// Calculate a half-precision low-quality approximation for the reciprocal of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] a The value to calculate an approximate to the reciprocal for. -/// -/// @returns -/// An approximation of the reciprocal, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxApproximateReciprocalHalf(FfxFloat16x3 a) -{ - return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x7784) - FFX_TO_UINT16X3(a)); -} - -/// Calculate a half-precision low-quality approximation for the reciprocal of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] a The value to calculate an approximate to the reciprocal for. -/// -/// @returns -/// An approximation of the reciprocal, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxApproximateReciprocalHalf(FfxFloat16x4 a) -{ - return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x7784) - FFX_TO_UINT16X4(a)); -} - -/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] a The value to calculate an approximate to the reciprocal for. -/// -/// @returns -/// An approximation of the reciprocal, estimated to medium quality. -/// -/// @ingroup GPUCore -FfxFloat16 ffxApproximateReciprocalMediumHalf(FfxFloat16 a) -{ - FfxFloat16 b = FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x778d) - FFX_TO_UINT16(a)); - return b * (-b * a + FFX_BROADCAST_FLOAT16(2.0)); -} - -/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] a The value to calculate an approximate to the reciprocal for. -/// -/// @returns -/// An approximation of the reciprocal, estimated to medium quality. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxApproximateReciprocalMediumHalf(FfxFloat16x2 a) -{ - FfxFloat16x2 b = FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x778d) - FFX_TO_UINT16X2(a)); - return b * (-b * a + FFX_BROADCAST_FLOAT16X2(2.0)); -} - -/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] a The value to calculate an approximate to the reciprocal for. -/// -/// @returns -/// An approximation of the reciprocal, estimated to medium quality. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxApproximateReciprocalMediumHalf(FfxFloat16x3 a) -{ - FfxFloat16x3 b = FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x778d) - FFX_TO_UINT16X3(a)); - return b * (-b * a + FFX_BROADCAST_FLOAT16X3(2.0)); -} - -/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] a The value to calculate an approximate to the reciprocal for. -/// -/// @returns -/// An approximation of the reciprocal, estimated to medium quality. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxApproximateReciprocalMediumHalf(FfxFloat16x4 a) -{ - FfxFloat16x4 b = FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x778d) - FFX_TO_UINT16X4(a)); - return b * (-b * a + FFX_BROADCAST_FLOAT16X4(2.0)); -} - -/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. -/// -/// @returns -/// An approximation of the reciprocal of the square root, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat16 ffxApproximateReciprocalSquareRootHalf(FfxFloat16 a) -{ - return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x59a3) - (FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1))); -} - -/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. -/// -/// @returns -/// An approximation of the reciprocal of the square root, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x2 a) -{ - return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x59a3) - (FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1))); -} - -/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. -/// -/// @returns -/// An approximation of the reciprocal of the square root, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x3 a) -{ - return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x59a3) - (FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1))); -} - -/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. -/// -/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent -/// presentation materials: -/// -/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -/// -/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. -/// -/// @returns -/// An approximation of the reciprocal of the square root, estimated to low quality. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x4 a) -{ - return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x59a3) - (FFX_TO_UINT16X4(a) >> FFX_BROADCAST_UINT16X4(1))); -} - -/// An approximation of sine. -/// -/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range -/// is {-1/4 to 1/4} representing {-1 to 1}. -/// -/// @param [in] x The value to calculate approximate sine for. -/// -/// @returns -/// The approximate sine of value. -FfxFloat16 ffxParabolicSinHalf(FfxFloat16 x) -{ - return x * abs(x) - x; -} - -/// An approximation of sine. -/// -/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range -/// is {-1/4 to 1/4} representing {-1 to 1}. -/// -/// @param [in] x The value to calculate approximate sine for. -/// -/// @returns -/// The approximate sine of value. -FfxFloat16x2 ffxParabolicSinHalf(FfxFloat16x2 x) -{ - return x * abs(x) - x; -} - -/// An approximation of cosine. -/// -/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range -/// is {-1/4 to 1/4} representing {-1 to 1}. -/// -/// @param [in] x The value to calculate approximate cosine for. -/// -/// @returns -/// The approximate cosine of value. -FfxFloat16 ffxParabolicCosHalf(FfxFloat16 x) -{ - x = ffxFract(x * FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16(0.75)); - x = x * FFX_BROADCAST_FLOAT16(2.0) - FFX_BROADCAST_FLOAT16(1.0); - return ffxParabolicSinHalf(x); -} - -/// An approximation of cosine. -/// -/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range -/// is {-1/4 to 1/4} representing {-1 to 1}. -/// -/// @param [in] x The value to calculate approximate cosine for. -/// -/// @returns -/// The approximate cosine of value. -FfxFloat16x2 ffxParabolicCosHalf(FfxFloat16x2 x) -{ - x = ffxFract(x * FFX_BROADCAST_FLOAT16X2(0.5) + FFX_BROADCAST_FLOAT16X2(0.75)); - x = x * FFX_BROADCAST_FLOAT16X2(2.0) - FFX_BROADCAST_FLOAT16X2(1.0); - return ffxParabolicSinHalf(x); -} - -/// An approximation of both sine and cosine. -/// -/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range -/// is {-1/4 to 1/4} representing {-1 to 1}. -/// -/// @param [in] x The value to calculate approximate cosine for. -/// -/// @returns -/// A FfxFloat32x2 containing approximations of both sine and cosine of value. -FfxFloat16x2 ffxParabolicSinCosHalf(FfxFloat16 x) -{ - FfxFloat16 y = ffxFract(x * FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16(0.75)); - y = y * FFX_BROADCAST_FLOAT16(2.0) - FFX_BROADCAST_FLOAT16(1.0); - return ffxParabolicSinHalf(FfxFloat16x2(x, y)); -} - -/// Conditional free logic AND operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// -/// @returns -/// Result of the AND operation. -/// -/// @ingroup GPUCore -FfxUInt16 ffxZeroOneAndHalf(FfxUInt16 x, FfxUInt16 y) -{ - return min(x, y); -} - -/// Conditional free logic AND operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// -/// @returns -/// Result of the AND operation. -/// -/// @ingroup GPUCore -FfxUInt16x2 ffxZeroOneAndHalf(FfxUInt16x2 x, FfxUInt16x2 y) -{ - return min(x, y); -} - -/// Conditional free logic AND operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// -/// @returns -/// Result of the AND operation. -/// -/// @ingroup GPUCore -FfxUInt16x3 ffxZeroOneAndHalf(FfxUInt16x3 x, FfxUInt16x3 y) -{ - return min(x, y); -} - -/// Conditional free logic AND operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// -/// @returns -/// Result of the AND operation. -/// -/// @ingroup GPUCore -FfxUInt16x4 ffxZeroOneAndHalf(FfxUInt16x4 x, FfxUInt16x4 y) -{ - return min(x, y); -} - -/// Conditional free logic NOT operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the NOT operator. -/// @param [in] y The second value to be fed into the NOT operator. -/// -/// @returns -/// Result of the NOT operation. -/// -/// @ingroup GPUCore -FfxUInt16 ffxZeroOneNotHalf(FfxUInt16 x) -{ - return x ^ FFX_BROADCAST_UINT16(1); -} - -/// Conditional free logic NOT operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the NOT operator. -/// @param [in] y The second value to be fed into the NOT operator. -/// -/// @returns -/// Result of the NOT operation. -/// -/// @ingroup GPUCore -FfxUInt16x2 ffxZeroOneNotHalf(FfxUInt16x2 x) -{ - return x ^ FFX_BROADCAST_UINT16X2(1); -} - -/// Conditional free logic NOT operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the NOT operator. -/// @param [in] y The second value to be fed into the NOT operator. -/// -/// @returns -/// Result of the NOT operation. -/// -/// @ingroup GPUCore -FfxUInt16x3 ffxZeroOneNotHalf(FfxUInt16x3 x) -{ - return x ^ FFX_BROADCAST_UINT16X3(1); -} - -/// Conditional free logic NOT operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the NOT operator. -/// @param [in] y The second value to be fed into the NOT operator. -/// -/// @returns -/// Result of the NOT operation. -/// -/// @ingroup GPUCore -FfxUInt16x4 ffxZeroOneNotHalf(FfxUInt16x4 x) -{ - return x ^ FFX_BROADCAST_UINT16X4(1); -} - -/// Conditional free logic OR operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the OR operator. -/// @param [in] y The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the OR operation. -/// -/// @ingroup GPUCore -FfxUInt16 ffxZeroOneOrHalf(FfxUInt16 x, FfxUInt16 y) -{ - return max(x, y); -} - -/// Conditional free logic OR operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the OR operator. -/// @param [in] y The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the OR operation. -/// -/// @ingroup GPUCore -FfxUInt16x2 ffxZeroOneOrHalf(FfxUInt16x2 x, FfxUInt16x2 y) -{ - return max(x, y); -} - -/// Conditional free logic OR operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the OR operator. -/// @param [in] y The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the OR operation. -/// -/// @ingroup GPUCore -FfxUInt16x3 ffxZeroOneOrHalf(FfxUInt16x3 x, FfxUInt16x3 y) -{ - return max(x, y); -} - -/// Conditional free logic OR operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the OR operator. -/// @param [in] y The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the OR operation. -/// -/// @ingroup GPUCore -FfxUInt16x4 ffxZeroOneOrHalf(FfxUInt16x4 x, FfxUInt16x4 y) -{ - return max(x, y); -} - -/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. -/// -/// @param [in] x The value to converted to a Uint. -/// -/// @returns -/// The converted Uint value. -/// -/// @ingroup GPUCore -FfxUInt16 ffxZeroOneFloat16ToUint16(FfxFloat16 x) -{ - return FFX_TO_UINT16(x * FFX_TO_FLOAT16(FFX_TO_UINT16(1))); -} - -/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. -/// -/// @param [in] x The value to converted to a Uint. -/// -/// @returns -/// The converted Uint value. -/// -/// @ingroup GPUCore -FfxUInt16x2 ffxZeroOneFloat16x2ToUint16x2(FfxFloat16x2 x) -{ - return FFX_TO_UINT16X2(x * FFX_TO_FLOAT16X2(FfxUInt16x2(1, 1))); -} - -/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. -/// -/// @param [in] x The value to converted to a Uint. -/// -/// @returns -/// The converted Uint value. -/// -/// @ingroup GPUCore -FfxUInt16x3 ffxZeroOneFloat16x3ToUint16x3(FfxFloat16x3 x) -{ - return FFX_TO_UINT16X3(x * FFX_TO_FLOAT16X3(FfxUInt16x3(1, 1, 1))); -} - -/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. -/// -/// @param [in] x The value to converted to a Uint. -/// -/// @returns -/// The converted Uint value. -/// -/// @ingroup GPUCore -FfxUInt16x4 ffxZeroOneFloat16x4ToUint16x4(FfxFloat16x4 x) -{ - return FFX_TO_UINT16X4(x * FFX_TO_FLOAT16X4(FfxUInt16x4(1, 1, 1, 1))); -} - -/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. -/// -/// @param [in] x The value to converted to a half-precision FfxFloat32. -/// -/// @returns -/// The converted half-precision FfxFloat32 value. -/// -/// @ingroup GPUCore -FfxFloat16 ffxZeroOneUint16ToFloat16(FfxUInt16 x) -{ - return FFX_TO_FLOAT16(x * FFX_TO_UINT16(FFX_TO_FLOAT16(1.0))); -} - -/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. -/// -/// @param [in] x The value to converted to a half-precision FfxFloat32. -/// -/// @returns -/// The converted half-precision FfxFloat32 value. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxZeroOneUint16x2ToFloat16x2(FfxUInt16x2 x) -{ - return FFX_TO_FLOAT16X2(x * FFX_TO_UINT16X2(FfxUInt16x2(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); -} - -/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. -/// -/// @param [in] x The value to converted to a half-precision FfxFloat32. -/// -/// @returns -/// The converted half-precision FfxFloat32 value. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxZeroOneUint16x3ToFloat16x3(FfxUInt16x3 x) -{ - return FFX_TO_FLOAT16X3(x * FFX_TO_UINT16X3(FfxUInt16x3(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); -} - -/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. -/// -/// @param [in] x The value to converted to a half-precision FfxFloat32. -/// -/// @returns -/// The converted half-precision FfxFloat32 value. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxZeroOneUint16x4ToFloat16x4(FfxUInt16x4 x) -{ - return FFX_TO_FLOAT16X4(x * FFX_TO_UINT16X4(FfxUInt16x4(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); -} - -/// Conditional free logic AND operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// -/// @returns -/// Result of the AND operation. -/// -/// @ingroup GPUCore -FfxFloat16 ffxZeroOneAndHalf(FfxFloat16 x, FfxFloat16 y) -{ - return min(x, y); -} - -/// Conditional free logic AND operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// -/// @returns -/// Result of the AND operation. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxZeroOneAndHalf(FfxFloat16x2 x, FfxFloat16x2 y) -{ - return min(x, y); -} - -/// Conditional free logic AND operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// -/// @returns -/// Result of the AND operation. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxZeroOneAndHalf(FfxFloat16x3 x, FfxFloat16x3 y) -{ - return min(x, y); -} - -/// Conditional free logic AND operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// -/// @returns -/// Result of the AND operation. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxZeroOneAndHalf(FfxFloat16x4 x, FfxFloat16x4 y) -{ - return min(x, y); -} - -/// Conditional free logic AND NOT operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the AND NOT operator. -/// @param [in] y The second value to be fed into the AND NOT operator. -/// -/// @returns -/// Result of the AND NOT operation. -/// -/// @ingroup GPUCore -FfxFloat16 ffxSignedZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y) -{ - return (-x) * y + FFX_BROADCAST_FLOAT16(1.0); -} - -/// Conditional free logic AND NOT operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the AND NOT operator. -/// @param [in] y The second value to be fed into the AND NOT operator. -/// -/// @returns -/// Result of the AND NOT operation. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxSignedZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y) -{ - return (-x) * y + FFX_BROADCAST_FLOAT16X2(1.0); -} - -/// Conditional free logic AND NOT operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the AND NOT operator. -/// @param [in] y The second value to be fed into the AND NOT operator. -/// -/// @returns -/// Result of the AND NOT operation. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxSignedZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y) -{ - return (-x) * y + FFX_BROADCAST_FLOAT16X3(1.0); -} - -/// Conditional free logic AND NOT operation using two half-precision values. -/// -/// @param [in] x The first value to be fed into the AND NOT operator. -/// @param [in] y The second value to be fed into the AND NOT operator. -/// -/// @returns -/// Result of the AND NOT operation. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxSignedZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y) -{ - return (-x) * y + FFX_BROADCAST_FLOAT16X4(1.0); -} - -/// Conditional free logic AND operation using two half-precision values followed by -/// a NOT operation using the resulting value and a third half-precision value. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// @param [in] z The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxFloat16 ffxZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) -{ - return FfxFloat16(ffxSaturate(x * y + z)); -} - -/// Conditional free logic AND operation using two half-precision values followed by -/// a NOT operation using the resulting value and a third half-precision value. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// @param [in] z The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) -{ - return FfxFloat16x2(ffxSaturate(x * y + z)); -} - -/// Conditional free logic AND operation using two half-precision values followed by -/// a NOT operation using the resulting value and a third half-precision value. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// @param [in] z The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) -{ - return FfxFloat16x3(ffxSaturate(x * y + z)); -} - -/// Conditional free logic AND operation using two half-precision values followed by -/// a NOT operation using the resulting value and a third half-precision value. -/// -/// @param [in] x The first value to be fed into the AND operator. -/// @param [in] y The second value to be fed into the AND operator. -/// @param [in] z The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) -{ - return FfxFloat16x4(ffxSaturate(x * y + z)); -} - -/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. -/// -/// @param [in] x The value to be compared. -/// -/// @returns -/// Result of the greater than zero comparison. -/// -/// @ingroup GPUCore -FfxFloat16 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16 x) -{ - return FfxFloat16(ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF))); -} - -/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. -/// -/// @param [in] x The value to be compared. -/// -/// @returns -/// Result of the greater than zero comparison. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x2 x) -{ - return FfxFloat16x2(ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF))); -} - -/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. -/// -/// @param [in] x The value to be compared. -/// -/// @returns -/// Result of the greater than zero comparison. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x3 x) -{ - return FfxFloat16x3(ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF))); -} - -/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. -/// -/// @param [in] x The value to be compared. -/// -/// @returns -/// Result of the greater than zero comparison. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x4 x) -{ - return FfxFloat16x4(ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF))); -} - -/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the AND OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxFloat16 ffxZeroOneNotHalf(FfxFloat16 x) -{ - return FFX_BROADCAST_FLOAT16(1.0) - x; -} - -/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the AND OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxZeroOneNotHalf(FfxFloat16x2 x) -{ - return FFX_BROADCAST_FLOAT16X2(1.0) - x; -} - -/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the AND OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxZeroOneNotHalf(FfxFloat16x3 x) -{ - return FFX_BROADCAST_FLOAT16X3(1.0) - x; -} - -/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the AND OR operator. -/// -/// @returns -/// Result of the AND OR operation. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxZeroOneNotHalf(FfxFloat16x4 x) -{ - return FFX_BROADCAST_FLOAT16X4(1.0) - x; -} - -/// Conditional free logic OR operation using two half-precision FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the OR operator. -/// @param [in] y The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the OR operation. -/// -/// @ingroup GPUCore -FfxFloat16 ffxZeroOneOrHalf(FfxFloat16 x, FfxFloat16 y) -{ - return max(x, y); -} - -/// Conditional free logic OR operation using two half-precision FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the OR operator. -/// @param [in] y The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the OR operation. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxZeroOneOrHalf(FfxFloat16x2 x, FfxFloat16x2 y) -{ - return max(x, y); -} - -/// Conditional free logic OR operation using two half-precision FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the OR operator. -/// @param [in] y The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the OR operation. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxZeroOneOrHalf(FfxFloat16x3 x, FfxFloat16x3 y) -{ - return max(x, y); -} - -/// Conditional free logic OR operation using two half-precision FfxFloat32 values. -/// -/// @param [in] x The first value to be fed into the OR operator. -/// @param [in] y The second value to be fed into the OR operator. -/// -/// @returns -/// Result of the OR operation. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxZeroOneOrHalf(FfxFloat16x4 x, FfxFloat16x4 y) -{ - return max(x, y); -} - -/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. -/// -/// @param [in] x The value to compare against zero. -/// @param [in] y The value to return if the comparision is greater than zero. -/// @param [in] z The value to return if the comparision is less than or equal to zero. -/// -/// @returns -/// The selected value. -/// -/// @ingroup GPUCore -FfxFloat16 ffxZeroOneSelectHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) -{ - FfxFloat16 r = (-x) * z + z; - return x * y + r; -} - -/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. -/// -/// @param [in] x The value to compare against zero. -/// @param [in] y The value to return if the comparision is greater than zero. -/// @param [in] z The value to return if the comparision is less than or equal to zero. -/// -/// @returns -/// The selected value. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxZeroOneSelectHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) -{ - FfxFloat16x2 r = (-x) * z + z; - return x * y + r; -} - -/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. -/// -/// @param [in] x The value to compare against zero. -/// @param [in] y The value to return if the comparision is greater than zero. -/// @param [in] z The value to return if the comparision is less than or equal to zero. -/// -/// @returns -/// The selected value. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxZeroOneSelectHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) -{ - FfxFloat16x3 r = (-x) * z + z; - return x * y + r; -} - -/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. -/// -/// @param [in] x The value to compare against zero. -/// @param [in] y The value to return if the comparision is greater than zero. -/// @param [in] z The value to return if the comparision is less than or equal to zero. -/// -/// @returns -/// The selected value. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxZeroOneSelectHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) -{ - FfxFloat16x4 r = (-x) * z + z; - return x * y + r; -} - -/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. -/// -/// @param [in] x The value to be compared. -/// -/// @returns -/// Result of the sign value. -/// -/// @ingroup GPUCore -FfxFloat16 ffxZeroOneIsSignedHalf(FfxFloat16 x) -{ - return FfxFloat16(ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF))); -} - -/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. -/// -/// @param [in] x The value to be compared. -/// -/// @returns -/// Result of the sign value. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxZeroOneIsSignedHalf(FfxFloat16x2 x) -{ - return FfxFloat16x2(ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF))); -} - -/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. -/// -/// @param [in] x The value to be compared. -/// -/// @returns -/// Result of the sign value. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxZeroOneIsSignedHalf(FfxFloat16x3 x) -{ - return FfxFloat16x3(ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF))); -} - -/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. -/// -/// @param [in] x The value to be compared. -/// -/// @returns -/// Result of the sign value. -/// -/// @ingroup GPUCore -FfxFloat16x4 ffxZeroOneIsSignedHalf(FfxFloat16x4 x) -{ - return FfxFloat16x4(ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF))); -} - -/// Compute a Rec.709 color space. -/// -/// Rec.709 is used for some HDTVs. -/// -/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. -/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). -/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). -/// -/// @param [in] c The color to convert to Rec. 709. -/// -/// @returns -/// The color in Rec.709 space. -/// -/// @ingroup GPUCore -FfxFloat16 ffxRec709FromLinearHalf(FfxFloat16 c) -{ - FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); - FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); - return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y); -} - -/// Compute a Rec.709 color space. -/// -/// Rec.709 is used for some HDTVs. -/// -/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. -/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). -/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). -/// -/// @param [in] c The color to convert to Rec. 709. -/// -/// @returns -/// The color in Rec.709 space. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxRec709FromLinearHalf(FfxFloat16x2 c) -{ - FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); - FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); - return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy); -} - -/// Compute a Rec.709 color space. -/// -/// Rec.709 is used for some HDTVs. -/// -/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. -/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). -/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). -/// -/// @param [in] c The color to convert to Rec. 709. -/// -/// @returns -/// The color in Rec.709 space. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxRec709FromLinearHalf(FfxFloat16x3 c) -{ - FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); - FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); - return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy); -} - -/// Compute a gamma value from a linear value. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. -/// -/// @param [in] c The value to convert to gamma space from linear. -/// @param [in] rcpX The reciprocal of power value used for the gamma curve. -/// -/// @returns -/// A value in gamma space. -/// -/// @ingroup GPUCore -FfxFloat16 ffxGammaFromLinearHalf(FfxFloat16 c, FfxFloat16 rcpX) -{ - return pow(c, FFX_BROADCAST_FLOAT16(rcpX)); -} - -/// Compute a gamma value from a linear value. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. -/// -/// @param [in] c The value to convert to gamma space from linear. -/// @param [in] rcpX The reciprocal of power value used for the gamma curve. -/// -/// @returns -/// A value in gamma space. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxGammaFromLinearHalf(FfxFloat16x2 c, FfxFloat16 rcpX) -{ - return pow(c, FFX_BROADCAST_FLOAT16X2(rcpX)); -} - -/// Compute a gamma value from a linear value. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. -/// -/// @param [in] c The value to convert to gamma space from linear. -/// @param [in] rcpX The reciprocal of power value used for the gamma curve. -/// -/// @returns -/// A value in gamma space. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxGammaFromLinearHalf(FfxFloat16x3 c, FfxFloat16 rcpX) -{ - return pow(c, FFX_BROADCAST_FLOAT16X3(rcpX)); -} - -/// Compute an SRGB value from a linear value. -/// -/// @param [in] c The value to convert to SRGB from linear. -/// -/// @returns -/// A value in SRGB space. -/// -/// @ingroup GPUCore -FfxFloat16 ffxSrgbFromLinearHalf(FfxFloat16 c) -{ - FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); - FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); - return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y); -} - -/// Compute an SRGB value from a linear value. -/// -/// @param [in] c The value to convert to SRGB from linear. -/// -/// @returns -/// A value in SRGB space. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxSrgbFromLinearHalf(FfxFloat16x2 c) -{ - FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); - FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); - return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy); -} - -/// Compute an SRGB value from a linear value. -/// -/// @param [in] c The value to convert to SRGB from linear. -/// -/// @returns -/// A value in SRGB space. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxSrgbFromLinearHalf(FfxFloat16x3 c) -{ - FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); - FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); - return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy); -} - -/// Compute the square root of a value. -/// -/// @param [in] c The value to compute the square root for. -/// -/// @returns -/// A square root of the input value. -/// -/// @ingroup GPUCore -FfxFloat16 ffxSquareRootHalf(FfxFloat16 c) -{ - return sqrt(c); -} - -/// Compute the square root of a value. -/// -/// @param [in] c The value to compute the square root for. -/// -/// @returns -/// A square root of the input value. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxSquareRootHalf(FfxFloat16x2 c) -{ - return sqrt(c); -} - -/// Compute the square root of a value. -/// -/// @param [in] c The value to compute the square root for. -/// -/// @returns -/// A square root of the input value. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxSquareRootHalf(FfxFloat16x3 c) -{ - return sqrt(c); -} - -/// Compute the cube root of a value. -/// -/// @param [in] c The value to compute the cube root for. -/// -/// @returns -/// A cube root of the input value. -/// -/// @ingroup GPUCore -FfxFloat16 ffxCubeRootHalf(FfxFloat16 c) -{ - return pow(c, FFX_BROADCAST_FLOAT16(1.0 / 3.0)); -} - -/// Compute the cube root of a value. -/// -/// @param [in] c The value to compute the cube root for. -/// -/// @returns -/// A cube root of the input value. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxCubeRootHalf(FfxFloat16x2 c) -{ - return pow(c, FFX_BROADCAST_FLOAT16X2(1.0 / 3.0)); -} - -/// Compute the cube root of a value. -/// -/// @param [in] c The value to compute the cube root for. -/// -/// @returns -/// A cube root of the input value. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxCubeRootHalf(FfxFloat16x3 c) -{ - return pow(c, FFX_BROADCAST_FLOAT16X3(1.0 / 3.0)); -} - -/// Compute a linear value from a REC.709 value. -/// -/// @param [in] c The value to convert to linear from REC.709. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat16 ffxLinearFromRec709Half(FfxFloat16 c) -{ - FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); - FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); - return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z)); -} - -/// Compute a linear value from a REC.709 value. -/// -/// @param [in] c The value to convert to linear from REC.709. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxLinearFromRec709Half(FfxFloat16x2 c) -{ - FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); - FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); - return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz)); -} - -/// Compute a linear value from a REC.709 value. -/// -/// @param [in] c The value to convert to linear from REC.709. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxLinearFromRec709Half(FfxFloat16x3 c) -{ - FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); - FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); - return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz)); -} - -/// Compute a linear value from a value in a gamma space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] c The value to convert to linear in gamma space. -/// @param [in] x The power value used for the gamma curve. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat16 ffxLinearFromGammaHalf(FfxFloat16 c, FfxFloat16 x) -{ - return pow(c, FFX_BROADCAST_FLOAT16(x)); -} - -/// Compute a linear value from a value in a gamma space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] c The value to convert to linear in gamma space. -/// @param [in] x The power value used for the gamma curve. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxLinearFromGammaHalf(FfxFloat16x2 c, FfxFloat16 x) -{ - return pow(c, FFX_BROADCAST_FLOAT16X2(x)); -} - -/// Compute a linear value from a value in a gamma space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] c The value to convert to linear in gamma space. -/// @param [in] x The power value used for the gamma curve. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x) -{ - return pow(c, FFX_BROADCAST_FLOAT16X3(x)); -} - -/// Compute a linear value from a value in a SRGB space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] c The value to convert to linear in SRGB space. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c) -{ - FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); - FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); - return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z)); -} - -/// Compute a linear value from a value in a SRGB space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] c The value to convert to linear in SRGB space. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c) -{ - FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); - FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); - return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz)); -} - -/// Compute a linear value from a value in a SRGB space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] c The value to convert to linear in SRGB space. -/// -/// @returns -/// A value in linear space. -/// -/// @ingroup GPUCore -FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c) -{ - FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); - FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); - return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz)); -} - -/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear. -/// -/// 543210 -/// ====== -/// ..xxx. -/// yy...y -/// -/// @param [in] a The input 1D coordinates to remap. -/// -/// @returns -/// The remapped 2D coordinates. -/// -/// @ingroup GPUCore -FfxUInt16x2 ffxRemapForQuadHalf(FfxUInt32 a) -{ - return FfxUInt16x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u)); -} - -/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. -/// -/// The 64-wide lane indices to 8x8 remapping is performed as follows: -/// -/// 00 01 08 09 10 11 18 19 -/// 02 03 0a 0b 12 13 1a 1b -/// 04 05 0c 0d 14 15 1c 1d -/// 06 07 0e 0f 16 17 1e 1f -/// 20 21 28 29 30 31 38 39 -/// 22 23 2a 2b 32 33 3a 3b -/// 24 25 2c 2d 34 35 3c 3d -/// 26 27 2e 2f 36 37 3e 3f -/// -/// @param [in] a The input 1D coordinate to remap. -/// -/// @returns -/// The remapped 2D coordinates. -/// -/// @ingroup GPUCore -FfxUInt16x2 ffxRemapForWaveReductionHalf(FfxUInt32 a) -{ - return FfxUInt16x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u)); -} - -#endif // FFX_HALF diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h.meta deleted file mode 100644 index e78eec4..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: 1bdb323791a91a5438ee8e1e63187840 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h deleted file mode 100644 index 337eb06..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h +++ /dev/null @@ -1,1651 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -/// @defgroup HLSLCore HLSL Core -/// HLSL core defines and functions -/// -/// @ingroup FfxHLSL - -#define DECLARE_SRV_REGISTER(regIndex) t##regIndex -#define DECLARE_UAV_REGISTER(regIndex) u##regIndex -#define DECLARE_CB_REGISTER(regIndex) b##regIndex -#define FFX_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex)) -#define FFX_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) -#define FFX_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) - -/// A define for abstracting shared memory between shading languages. -/// -/// @ingroup HLSLCore -#define FFX_GROUPSHARED groupshared - -/// A define for abstracting compute memory barriers between shading languages. -/// -/// @ingroup HLSLCore -#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync - -/// A define for abstracting compute atomic additions between shading languages. -/// -/// @ingroup HLSLCore -#define FFX_ATOMIC_ADD(x, y) InterlockedAdd(x, y) - -/// A define added to accept static markup on functions to aid CPU/GPU portability of code. -/// -/// @ingroup HLSLCore -#define FFX_STATIC static - -/// A define for abstracting loop unrolling between shading languages. -/// -/// @ingroup HLSLCore -#define FFX_UNROLL [unroll] - -/// A define for abstracting a 'greater than' comparison operator between two types. -/// -/// @ingroup HLSLCore -#define FFX_GREATER_THAN(x, y) x > y - -/// A define for abstracting a 'greater than or equal' comparison operator between two types. -/// -/// @ingroup HLSLCore -#define FFX_GREATER_THAN_EQUAL(x, y) x >= y - -/// A define for abstracting a 'less than' comparison operator between two types. -/// -/// @ingroup HLSLCore -#define FFX_LESS_THAN(x, y) x < y - -/// A define for abstracting a 'less than or equal' comparison operator between two types. -/// -/// @ingroup HLSLCore -#define FFX_LESS_THAN_EQUAL(x, y) x <= y - -/// A define for abstracting an 'equal' comparison operator between two types. -/// -/// @ingroup HLSLCore -#define FFX_EQUAL(x, y) x == y - -/// A define for abstracting a 'not equal' comparison operator between two types. -/// -/// @ingroup HLSLCore -#define FFX_NOT_EQUAL(x, y) x != y - -/// A define for abstracting matrix multiply operations between shading languages. -/// -/// @ingroup HLSLCore -#define FFX_MATRIX_MULTIPLY(a, b) mul(a, b) - -/// A define for abstracting vector transformations between shading languages. -/// -/// @ingroup HLSLCore -#define FFX_TRANSFORM_VECTOR(a, b) mul(a, b) - -/// A define for abstracting modulo operations between shading languages. -/// -/// @ingroup HLSLCore -#define FFX_MODULO(a, b) (fmod(a, b)) - -/// Broadcast a scalar value to a 1-dimensional floating point vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x) - -/// Broadcast a scalar value to a 2-dimensional floating point vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32(x) - -/// Broadcast a scalar value to a 3-dimensional floating point vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32(x) - -/// Broadcast a scalar value to a 4-dimensional floating point vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32(x) - -/// Broadcast a scalar value to a 1-dimensional unsigned integer vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_UINT32(x) FfxUInt32(x) - -/// Broadcast a scalar value to a 2-dimensional unsigned integer vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_UINT32X2(x) FfxUInt32(x) - -/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_UINT32X3(x) FfxUInt32(x) - -/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_UINT32X4(x) FfxUInt32(x) - -/// Broadcast a scalar value to a 1-dimensional signed integer vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_INT32(x) FfxInt32(x) - -/// Broadcast a scalar value to a 2-dimensional signed integer vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_INT32X2(x) FfxInt32(x) - -/// Broadcast a scalar value to a 3-dimensional signed integer vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_INT32X3(x) FfxInt32(x) - -/// Broadcast a scalar value to a 4-dimensional signed integer vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_INT32X4(x) FfxInt32(x) - -/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_MIN_FLOAT16(a) FFX_MIN16_F(a) - -/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_MIN_FLOAT16X2(a) FFX_MIN16_F(a) - -/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_MIN_FLOAT16X3(a) FFX_MIN16_F(a) - -/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_MIN_FLOAT16X4(a) FFX_MIN16_F(a) - -/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_MIN_UINT16(a) FFX_MIN16_U(a) - -/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_MIN_UINT16X2(a) FFX_MIN16_U(a) - -/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_MIN_UINT16X3(a) FFX_MIN16_U(a) - -/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_MIN_UINT16X4(a) FFX_MIN16_U(a) - -/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_MIN_INT16(a) FFX_MIN16_I(a) - -/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_MIN_INT16X2(a) FFX_MIN16_I(a) - -/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_MIN_INT16X3(a) FFX_MIN16_I(a) - -/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector. -/// -/// @ingroup HLSLCore -#define FFX_BROADCAST_MIN_INT16X4(a) FFX_MIN16_I(a) - -/// Pack 2x32-bit floating point values in a single 32bit value. -/// -/// This function first converts each component of value into their nearest 16-bit floating -/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the -/// 32bit unsigned integer respectively. -/// -/// @param [in] value A 2-dimensional floating point value to convert and pack. -/// -/// @returns -/// A packed 32bit value containing 2 16bit floating point values. -/// -/// @ingroup HLSLCore -FfxUInt32 packHalf2x16(FfxFloat32x2 value) -{ - return f32tof16(value.x) | (f32tof16(value.y) << 16); -} - -/// Broadcast a scalar value to a 2-dimensional floating point vector. -/// -/// @param [in] value The value to to broadcast. -/// -/// @returns -/// A 2-dimensional floating point vector with value in each component. -/// -/// @ingroup HLSLCore -FfxFloat32x2 ffxBroadcast2(FfxFloat32 value) -{ - return FfxFloat32x2(value, value); -} - -/// Broadcast a scalar value to a 3-dimensional floating point vector. -/// -/// @param [in] value The value to to broadcast. -/// -/// @returns -/// A 3-dimensional floating point vector with value in each component. -/// -/// @ingroup HLSLCore -FfxFloat32x3 ffxBroadcast3(FfxFloat32 value) -{ - return FfxFloat32x3(value, value, value); -} - -/// Broadcast a scalar value to a 4-dimensional floating point vector. -/// -/// @param [in] value The value to to broadcast. -/// -/// @returns -/// A 4-dimensional floating point vector with value in each component. -/// -/// @ingroup HLSLCore -FfxFloat32x4 ffxBroadcast4(FfxFloat32 value) -{ - return FfxFloat32x4(value, value, value, value); -} - -/// Broadcast a scalar value to a 2-dimensional signed integer vector. -/// -/// @param [in] value The value to to broadcast. -/// -/// @returns -/// A 2-dimensional signed integer vector with value in each component. -/// -/// @ingroup HLSLCore -FfxInt32x2 ffxBroadcast2(FfxInt32 value) -{ - return FfxInt32x2(value, value); -} - -/// Broadcast a scalar value to a 3-dimensional signed integer vector. -/// -/// @param [in] value The value to to broadcast. -/// -/// @returns -/// A 3-dimensional signed integer vector with value in each component. -/// -/// @ingroup HLSLCore -FfxUInt32x3 ffxBroadcast3(FfxInt32 value) -{ - return FfxUInt32x3(value, value, value); -} - -/// Broadcast a scalar value to a 4-dimensional signed integer vector. -/// -/// @param [in] value The value to to broadcast. -/// -/// @returns -/// A 4-dimensional signed integer vector with value in each component. -/// -/// @ingroup HLSLCore -FfxInt32x4 ffxBroadcast4(FfxInt32 value) -{ - return FfxInt32x4(value, value, value, value); -} - -/// Broadcast a scalar value to a 2-dimensional unsigned integer vector. -/// -/// @param [in] value The value to to broadcast. -/// -/// @returns -/// A 2-dimensional unsigned integer vector with value in each component. -/// -/// @ingroup HLSLCore -FfxUInt32x2 ffxBroadcast2(FfxUInt32 value) -{ - return FfxUInt32x2(value, value); -} - -/// Broadcast a scalar value to a 3-dimensional unsigned integer vector. -/// -/// @param [in] value The value to to broadcast. -/// -/// @returns -/// A 3-dimensional unsigned integer vector with value in each component. -/// -/// @ingroup HLSLCore -FfxUInt32x3 ffxBroadcast3(FfxUInt32 value) -{ - return FfxUInt32x3(value, value, value); -} - -/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. -/// -/// @param [in] value The value to to broadcast. -/// -/// @returns -/// A 4-dimensional unsigned integer vector with value in each component. -/// -/// @ingroup HLSLCore -FfxUInt32x4 ffxBroadcast4(FfxUInt32 value) -{ - return FfxUInt32x4(value, value, value, value); -} - -FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits) -{ - FfxUInt32 mask = (1u << bits) - 1; - return (src >> off) & mask; -} - -FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask) -{ - return (ins & mask) | (src & (~mask)); -} - -FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits) -{ - FfxUInt32 mask = (1u << bits) - 1; - return (ins & mask) | (src & (~mask)); -} - -/// Interprets the bit pattern of x as an unsigned integer. -/// -/// @param [in] x The input value. -/// -/// @returns -/// The input interpreted as an unsigned integer. -/// -/// @ingroup HLSLCore -FfxUInt32 ffxAsUInt32(FfxFloat32 x) -{ - return asuint(x); -} - -/// Interprets the bit pattern of x as an unsigned integer. -/// -/// @param [in] x The input value. -/// -/// @returns -/// The input interpreted as an unsigned integer. -/// -/// @ingroup HLSLCore -FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x) -{ - return asuint(x); -} - -/// Interprets the bit pattern of x as an unsigned integer. -/// -/// @param [in] x The input value. -/// -/// @returns -/// The input interpreted as an unsigned integer. -/// -/// @ingroup HLSLCore -FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x) -{ - return asuint(x); -} - -/// Interprets the bit pattern of x as an unsigned integer. -/// -/// @param [in] x The input value. -/// -/// @returns -/// The input interpreted as an unsigned integer. -/// -/// @ingroup HLSLCore -FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x) -{ - return asuint(x); -} - -/// Interprets the bit pattern of x as a floating-point number. -/// -/// @param [in] x The input value. -/// -/// @returns -/// The input interpreted as a floating-point number. -/// -/// @ingroup HLSLCore -FfxFloat32 ffxAsFloat(FfxUInt32 x) -{ - return asfloat(x); -} - -/// Interprets the bit pattern of x as a floating-point number. -/// -/// @param [in] x The input value. -/// -/// @returns -/// The input interpreted as a floating-point number. -/// -/// @ingroup HLSLCore -FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x) -{ - return asfloat(x); -} - -/// Interprets the bit pattern of x as a floating-point number. -/// -/// @param [in] x The input value. -/// -/// @returns -/// The input interpreted as a floating-point number. -/// -/// @ingroup HLSLCore -FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x) -{ - return asfloat(x); -} - -/// Interprets the bit pattern of x as a floating-point number. -/// -/// @param [in] x The input value. -/// -/// @returns -/// The input interpreted as a floating-point number. -/// -/// @ingroup HLSLCore -FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x) -{ - return asfloat(x); -} - -/// Compute the linear interopation between two values. -/// -/// Implemented by calling the HLSL mix instrinsic function. Implements the -/// following math: -/// -/// (1 - t) * x + t * y -/// -/// @param [in] x The first value to lerp between. -/// @param [in] y The second value to lerp between. -/// @param [in] t The value to determine how much of x and how much of y. -/// -/// @returns -/// A linearly interpolated value between x and y according to t. -/// -/// @ingroup HLSLCore -FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) -{ - return lerp(x, y, t); -} - -/// Compute the linear interopation between two values. -/// -/// Implemented by calling the HLSL mix instrinsic function. Implements the -/// following math: -/// -/// (1 - t) * x + t * y -/// -/// @param [in] x The first value to lerp between. -/// @param [in] y The second value to lerp between. -/// @param [in] t The value to determine how much of x and how much of y. -/// -/// @returns -/// A linearly interpolated value between x and y according to t. -/// -/// @ingroup HLSLCore -FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t) -{ - return lerp(x, y, t); -} - -/// Compute the linear interopation between two values. -/// -/// Implemented by calling the HLSL mix instrinsic function. Implements the -/// following math: -/// -/// (1 - t) * x + t * y -/// -/// @param [in] x The first value to lerp between. -/// @param [in] y The second value to lerp between. -/// @param [in] t The value to determine how much of x and how much of y. -/// -/// @returns -/// A linearly interpolated value between x and y according to t. -/// -/// @ingroup HLSLCore -FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t) -{ - return lerp(x, y, t); -} - -/// Compute the linear interopation between two values. -/// -/// Implemented by calling the HLSL mix instrinsic function. Implements the -/// following math: -/// -/// (1 - t) * x + t * y -/// -/// @param [in] x The first value to lerp between. -/// @param [in] y The second value to lerp between. -/// @param [in] t The value to determine how much of x and how much of y. -/// -/// @returns -/// A linearly interpolated value between x and y according to t. -/// -/// @ingroup HLSLCore -FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t) -{ - return lerp(x, y, t); -} - -/// Compute the linear interopation between two values. -/// -/// Implemented by calling the HLSL mix instrinsic function. Implements the -/// following math: -/// -/// (1 - t) * x + t * y -/// -/// @param [in] x The first value to lerp between. -/// @param [in] y The second value to lerp between. -/// @param [in] t The value to determine how much of x and how much of y. -/// -/// @returns -/// A linearly interpolated value between x and y according to t. -/// -/// @ingroup HLSLCore -FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t) -{ - return lerp(x, y, t); -} - -/// Compute the linear interopation between two values. -/// -/// Implemented by calling the HLSL mix instrinsic function. Implements the -/// following math: -/// -/// (1 - t) * x + t * y -/// -/// @param [in] x The first value to lerp between. -/// @param [in] y The second value to lerp between. -/// @param [in] t The value to determine how much of x and how much of y. -/// -/// @returns -/// A linearly interpolated value between x and y according to t. -/// -/// @ingroup HLSLCore -FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t) -{ - return lerp(x, y, t); -} - -/// Compute the linear interopation between two values. -/// -/// Implemented by calling the HLSL mix instrinsic function. Implements the -/// following math: -/// -/// (1 - t) * x + t * y -/// -/// @param [in] x The first value to lerp between. -/// @param [in] y The second value to lerp between. -/// @param [in] t The value to determine how much of x and how much of y. -/// -/// @returns -/// A linearly interpolated value between x and y according to t. -/// -/// @ingroup HLSLCore -FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t) -{ - return lerp(x, y, t); -} - -/// Clamp a value to a [0..1] range. -/// -/// @param [in] x The value to clamp to [0..1] range. -/// -/// @returns -/// The clamped version of x. -/// -/// @ingroup HLSLCore -FfxFloat32 ffxSaturate(FfxFloat32 x) -{ - return saturate(x); -} - -/// Clamp a value to a [0..1] range. -/// -/// @param [in] x The value to clamp to [0..1] range. -/// -/// @returns -/// The clamped version of x. -/// -/// @ingroup HLSLCore -FfxFloat32x2 ffxSaturate(FfxFloat32x2 x) -{ - return saturate(x); -} - -/// Clamp a value to a [0..1] range. -/// -/// @param [in] x The value to clamp to [0..1] range. -/// -/// @returns -/// The clamped version of x. -/// -/// @ingroup HLSLCore -FfxFloat32x3 ffxSaturate(FfxFloat32x3 x) -{ - return saturate(x); -} - -/// Clamp a value to a [0..1] range. -/// -/// @param [in] x The value to clamp to [0..1] range. -/// -/// @returns -/// The clamped version of x. -/// -/// @ingroup HLSLCore -FfxFloat32x4 ffxSaturate(FfxFloat32x4 x) -{ - return saturate(x); -} - -/// Compute the factional part of a decimal value. -/// -/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. -/// -/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is -/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic -/// function. -/// -/// @param [in] x The value to compute the fractional part from. -/// -/// @returns -/// The fractional part of x. -/// -/// @ingroup HLSLCore -FfxFloat32 ffxFract(FfxFloat32 x) -{ - return x - floor(x); -} - -/// Compute the factional part of a decimal value. -/// -/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. -/// -/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is -/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic -/// function. -/// -/// @param [in] x The value to compute the fractional part from. -/// -/// @returns -/// The fractional part of x. -/// -/// @ingroup HLSLCore -FfxFloat32x2 ffxFract(FfxFloat32x2 x) -{ - return x - floor(x); -} - -/// Compute the factional part of a decimal value. -/// -/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. -/// -/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is -/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic -/// function. -/// -/// @param [in] x The value to compute the fractional part from. -/// -/// @returns -/// The fractional part of x. -/// -/// @ingroup HLSLCore -FfxFloat32x3 ffxFract(FfxFloat32x3 x) -{ - return x - floor(x); -} - -/// Compute the factional part of a decimal value. -/// -/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. -/// -/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is -/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic -/// function. -/// -/// @param [in] x The value to compute the fractional part from. -/// -/// @returns -/// The fractional part of x. -/// -/// @ingroup HLSLCore -FfxFloat32x4 ffxFract(FfxFloat32x4 x) -{ - return x - floor(x); -} - -/// Compute the maximum of three values. -/// -/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the max calculation. -/// @param [in] y The second value to include in the max calcuation. -/// @param [in] z The third value to include in the max calcuation. -/// -/// @returns -/// The maximum value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) -{ - return max(x, max(y, z)); -} - -/// Compute the maximum of three values. -/// -/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the max calculation. -/// @param [in] y The second value to include in the max calcuation. -/// @param [in] z The third value to include in the max calcuation. -/// -/// @returns -/// The maximum value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) -{ - return max(x, max(y, z)); -} - -/// Compute the maximum of three values. -/// -/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the max calculation. -/// @param [in] y The second value to include in the max calcuation. -/// @param [in] z The third value to include in the max calcuation. -/// -/// @returns -/// The maximum value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) -{ - return max(x, max(y, z)); -} - -/// Compute the maximum of three values. -/// -/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the max calculation. -/// @param [in] y The second value to include in the max calcuation. -/// @param [in] z The third value to include in the max calcuation. -/// -/// @returns -/// The maximum value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) -{ - return max(x, max(y, z)); -} - -/// Compute the maximum of three values. -/// -/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the max calculation. -/// @param [in] y The second value to include in the max calcuation. -/// @param [in] z The third value to include in the max calcuation. -/// -/// @returns -/// The maximum value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) -{ - return max(x, max(y, z)); -} - -/// Compute the maximum of three values. -/// -/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the max calculation. -/// @param [in] y The second value to include in the max calcuation. -/// @param [in] z The third value to include in the max calcuation. -/// -/// @returns -/// The maximum value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) -{ - return max(x, max(y, z)); -} - -/// Compute the maximum of three values. -/// -/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the max calculation. -/// @param [in] y The second value to include in the max calcuation. -/// @param [in] z The third value to include in the max calcuation. -/// -/// @returns -/// The maximum value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) -{ - return max(x, max(y, z)); -} - -/// Compute the maximum of three values. -/// -/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the max calculation. -/// @param [in] y The second value to include in the max calcuation. -/// @param [in] z The third value to include in the max calcuation. -/// -/// @returns -/// The maximum value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) -{ - return max(x, max(y, z)); -} - -/// Compute the median of three values. -/// -/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the median calculation. -/// @param [in] y The second value to include in the median calcuation. -/// @param [in] z The third value to include in the median calcuation. -/// -/// @returns -/// The median value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) -{ - return max(min(x, y), min(max(x, y), z)); -} - -/// Compute the median of three values. -/// -/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the median calculation. -/// @param [in] y The second value to include in the median calcuation. -/// @param [in] z The third value to include in the median calcuation. -/// -/// @returns -/// The median value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) -{ - return max(min(x, y), min(max(x, y), z)); -} - -/// Compute the median of three values. -/// -/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the median calculation. -/// @param [in] y The second value to include in the median calcuation. -/// @param [in] z The third value to include in the median calcuation. -/// -/// @returns -/// The median value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) -{ - return max(min(x, y), min(max(x, y), z)); -} - -/// Compute the median of three values. -/// -/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the median calculation. -/// @param [in] y The second value to include in the median calcuation. -/// @param [in] z The third value to include in the median calcuation. -/// -/// @returns -/// The median value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) -{ - return max(min(x, y), min(max(x, y), z)); -} - -/// Compute the median of three values. -/// -/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the median calculation. -/// @param [in] y The second value to include in the median calcuation. -/// @param [in] z The third value to include in the median calcuation. -/// -/// @returns -/// The median value of x, y, and z. -/// -/// @ingroup HLSL -FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z) -{ - return max(min(x, y), min(max(x, y), z)); - // return min(max(min(y, z), x), max(y, z)); - // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y)); -} - -/// Compute the median of three values. -/// -/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the median calculation. -/// @param [in] y The second value to include in the median calcuation. -/// @param [in] z The third value to include in the median calcuation. -/// -/// @returns -/// The median value of x, y, and z. -/// -/// @ingroup HLSL -FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z) -{ - return max(min(x, y), min(max(x, y), z)); - // return min(max(min(y, z), x), max(y, z)); - // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y)); -} - -/// Compute the median of three values. -/// -/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the median calculation. -/// @param [in] y The second value to include in the median calcuation. -/// @param [in] z The third value to include in the median calcuation. -/// -/// @returns -/// The median value of x, y, and z. -/// -/// @ingroup HLSL -FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z) -{ - return max(min(x, y), min(max(x, y), z)); -} - -/// Compute the median of three values. -/// -/// NOTE: This function should compile down to a single V_MED3_I32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the median calculation. -/// @param [in] y The second value to include in the median calcuation. -/// @param [in] z The third value to include in the median calcuation. -/// -/// @returns -/// The median value of x, y, and z. -/// -/// @ingroup HLSL -FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z) -{ - return max(min(x, y), min(max(x, y), z)); -} - -/// Compute the minimum of three values. -/// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the min calculation. -/// @param [in] y The second value to include in the min calcuation. -/// @param [in] z The third value to include in the min calcuation. -/// -/// @returns -/// The minimum value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) -{ - return min(x, min(y, z)); -} - -/// Compute the minimum of three values. -/// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the min calculation. -/// @param [in] y The second value to include in the min calcuation. -/// @param [in] z The third value to include in the min calcuation. -/// -/// @returns -/// The minimum value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) -{ - return min(x, min(y, z)); -} - -/// Compute the minimum of three values. -/// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the min calculation. -/// @param [in] y The second value to include in the min calcuation. -/// @param [in] z The third value to include in the min calcuation. -/// -/// @returns -/// The minimum value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) -{ - return min(x, min(y, z)); -} - -/// Compute the minimum of three values. -/// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the min calculation. -/// @param [in] y The second value to include in the min calcuation. -/// @param [in] z The third value to include in the min calcuation. -/// -/// @returns -/// The minimum value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) -{ - return min(x, min(y, z)); -} - -/// Compute the minimum of three values. -/// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the min calculation. -/// @param [in] y The second value to include in the min calcuation. -/// @param [in] z The third value to include in the min calcuation. -/// -/// @returns -/// The minimum value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) -{ - return min(x, min(y, z)); -} - -/// Compute the minimum of three values. -/// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the min calculation. -/// @param [in] y The second value to include in the min calcuation. -/// @param [in] z The third value to include in the min calcuation. -/// -/// @returns -/// The minimum value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) -{ - return min(x, min(y, z)); -} - -/// Compute the minimum of three values. -/// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the min calculation. -/// @param [in] y The second value to include in the min calculation. -/// @param [in] z The third value to include in the min calculation. -/// -/// @returns -/// The minimum value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) -{ - return min(x, min(y, z)); -} - -/// Compute the minimum of three values. -/// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. -/// -/// @param [in] x The first value to include in the min calculation. -/// @param [in] y The second value to include in the min calcuation. -/// @param [in] z The third value to include in the min calcuation. -/// -/// @returns -/// The minimum value of x, y, and z. -/// -/// @ingroup HLSLCore -FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) -{ - return min(x, min(y, z)); -} - - -FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b) -{ - return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); -} - -FfxUInt32 ffxPackF32(FfxFloat32x2 v){ - FfxUInt32x2 p = FfxUInt32x2(f32tof16(FfxFloat32x2(v).x), f32tof16(FfxFloat32x2(v).y)); - return p.x | (p.y << 16); -} - -FfxFloat32x2 ffxUnpackF32(FfxUInt32 a){ - return f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16)); -} - -//============================================================================================================================== -// HLSL HALF -//============================================================================================================================== -//============================================================================================================================== -// Need to use manual unpack to get optimal execution (don't use packed types in buffers directly). -// Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/ -FFX_MIN16_F2 ffxUint32ToFloat16x2(FfxUInt32 x) -{ - FfxFloat32x2 t = f16tof32(FfxUInt32x2(x & 0xFFFF, x >> 16)); - return FFX_MIN16_F2(t); -} -FFX_MIN16_F4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x) -{ - return FFX_MIN16_F4(ffxUint32ToFloat16x2(x.x), ffxUint32ToFloat16x2(x.y)); -} -FFX_MIN16_U2 ffxUint32ToUint16x2(FfxUInt32 x) -{ - FfxUInt32x2 t = FfxUInt32x2(x & 0xFFFF, x >> 16); - return FFX_MIN16_U2(t); -} -FFX_MIN16_U4 ffxUint32x2ToUint16x4(FfxUInt32x2 x) -{ - return FFX_MIN16_U4(ffxUint32ToUint16x2(x.x), ffxUint32ToUint16x2(x.y)); -} - -/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. -/// @param v Value to invert. -/// @return If v = 0 returns 0. If v != 0 returns 1/v. -FfxFloat32 ffxInvertSafe(FfxFloat32 v){ - FfxFloat32 s = sign(v); - FfxFloat32 s2 = s*s; - return s2/(v + s2 - 1.0); -} - -/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. -/// @param v Value to invert. -/// @return If v = 0 returns 0. If v != 0 returns 1/v. -FfxFloat32x2 ffxInvertSafe(FfxFloat32x2 v){ - FfxFloat32x2 s = sign(v); - FfxFloat32x2 s2 = s*s; - return s2/(v + s2 - FfxFloat32x2(1.0, 1.0)); -} - -/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. -/// @param v Value to invert. -/// @return If v = 0 returns 0. If v != 0 returns 1/v. -FfxFloat32x3 ffxInvertSafe(FfxFloat32x3 v){ - FfxFloat32x3 s = sign(v); - FfxFloat32x3 s2 = s*s; - return s2/(v + s2 - FfxFloat32x3(1.0, 1.0, 1.0)); -} - -/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. -/// @param v Value to invert. -/// @return If v = 0 returns 0. If v != 0 returns 1/v. -FfxFloat32x4 ffxInvertSafe(FfxFloat32x4 v){ - FfxFloat32x4 s = sign(v); - FfxFloat32x4 s2 = s*s; - return s2/(v + s2 - FfxFloat32x4(1.0, 1.0, 1.0, 1.0)); -} - -#define FFX_UINT32_TO_FLOAT16X2(x) ffxUint32ToFloat16x2(FfxUInt32(x)) -#if FFX_HALF - -#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x)) -#define FFX_UINT32_TO_UINT16X2(x) ffxUint32ToUint16x2(FfxUInt32(x)) -#define FFX_UINT32X2_TO_UINT16X4(x) ffxUint32x2ToUint16x4(FfxUInt32x2(x)) - -FfxUInt32 ffxPackF16(FfxFloat16x2 v){ - FfxUInt32x2 p = FfxUInt32x2(f32tof16(FfxFloat32x2(v).x), f32tof16(FfxFloat32x2(v).y)); - return p.x | (p.y << 16); -} - -FfxFloat16x2 ffxUnpackF16(FfxUInt32 a){ - return FfxFloat16x2(f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16))); -} - -//------------------------------------------------------------------------------------------------------------------------------ -FfxUInt32 FFX_MIN16_F2ToUint32(FFX_MIN16_F2 x) -{ - return f32tof16(x.x) + (f32tof16(x.y) << 16); -} -FfxUInt32x2 FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4 x) -{ - return FfxUInt32x2(FFX_MIN16_F2ToUint32(x.xy), FFX_MIN16_F2ToUint32(x.zw)); -} -FfxUInt32 FFX_MIN16_U2ToUint32(FFX_MIN16_U2 x) -{ - return FfxUInt32(x.x) + (FfxUInt32(x.y) << 16); -} -FfxUInt32x2 FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4 x) -{ - return FfxUInt32x2(FFX_MIN16_U2ToUint32(x.xy), FFX_MIN16_U2ToUint32(x.zw)); -} -#define FFX_FLOAT16X2_TO_UINT32(x) FFX_MIN16_F2ToUint32(FFX_MIN16_F2(x)) -#define FFX_FLOAT16X4_TO_UINT32X2(x) FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4(x)) -#define FFX_UINT16X2_TO_UINT32(x) FFX_MIN16_U2ToUint32(FFX_MIN16_U2(x)) -#define FFX_UINT16X4_TO_UINT32X2(x) FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4(x)) - -#if (FFX_HLSL_SM >= 62) && !defined(FFX_NO_16_BIT_CAST) -#define FFX_TO_UINT16(x) asuint16(x) -#define FFX_TO_UINT16X2(x) asuint16(x) -#define FFX_TO_UINT16X3(x) asuint16(x) -#define FFX_TO_UINT16X4(x) asuint16(x) -#else -#define FFX_TO_UINT16(a) FFX_MIN16_U(f32tof16(FfxFloat32(a))) -#define FFX_TO_UINT16X2(a) FFX_MIN16_U2(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y)) -#define FFX_TO_UINT16X3(a) FFX_MIN16_U3(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z)) -#define FFX_TO_UINT16X4(a) FFX_MIN16_U4(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z), FFX_TO_UINT16((a).w)) -#endif // #if (FFX_HLSL_SM>=62) && !defined(FFX_NO_16_BIT_CAST) - -#if (FFX_HLSL_SM >= 62) && !defined(FFX_NO_16_BIT_CAST) -#define FFX_TO_FLOAT16(x) asfloat16(x) -#define FFX_TO_FLOAT16X2(x) asfloat16(x) -#define FFX_TO_FLOAT16X3(x) asfloat16(x) -#define FFX_TO_FLOAT16X4(x) asfloat16(x) -#else -#define FFX_TO_FLOAT16(a) FFX_MIN16_F(f16tof32(FfxUInt32(a))) -#define FFX_TO_FLOAT16X2(a) FFX_MIN16_F2(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y)) -#define FFX_TO_FLOAT16X3(a) FFX_MIN16_F3(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z)) -#define FFX_TO_FLOAT16X4(a) FFX_MIN16_F4(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z), FFX_TO_FLOAT16((a).w)) -#endif // #if (FFX_HLSL_SM>=62) && !defined(FFX_NO_16_BIT_CAST) - -//============================================================================================================================== -#define FFX_BROADCAST_FLOAT16(a) FFX_MIN16_F(a) -#define FFX_BROADCAST_FLOAT16X2(a) FFX_MIN16_F(a) -#define FFX_BROADCAST_FLOAT16X3(a) FFX_MIN16_F(a) -#define FFX_BROADCAST_FLOAT16X4(a) FFX_MIN16_F(a) - -//------------------------------------------------------------------------------------------------------------------------------ -#define FFX_BROADCAST_INT16(a) FFX_MIN16_I(a) -#define FFX_BROADCAST_INT16X2(a) FFX_MIN16_I(a) -#define FFX_BROADCAST_INT16X3(a) FFX_MIN16_I(a) -#define FFX_BROADCAST_INT16X4(a) FFX_MIN16_I(a) - -//------------------------------------------------------------------------------------------------------------------------------ -#define FFX_BROADCAST_UINT16(a) FFX_MIN16_U(a) -#define FFX_BROADCAST_UINT16X2(a) FFX_MIN16_U(a) -#define FFX_BROADCAST_UINT16X3(a) FFX_MIN16_U(a) -#define FFX_BROADCAST_UINT16X4(a) FFX_MIN16_U(a) - -//============================================================================================================================== -FFX_MIN16_U ffxAbsHalf(FFX_MIN16_U a) -{ - return FFX_MIN16_U(abs(FFX_MIN16_I(a))); -} -FFX_MIN16_U2 ffxAbsHalf(FFX_MIN16_U2 a) -{ - return FFX_MIN16_U2(abs(FFX_MIN16_I2(a))); -} -FFX_MIN16_U3 ffxAbsHalf(FFX_MIN16_U3 a) -{ - return FFX_MIN16_U3(abs(FFX_MIN16_I3(a))); -} -FFX_MIN16_U4 ffxAbsHalf(FFX_MIN16_U4 a) -{ - return FFX_MIN16_U4(abs(FFX_MIN16_I4(a))); -} -//------------------------------------------------------------------------------------------------------------------------------ -FFX_MIN16_F ffxClampHalf(FFX_MIN16_F x, FFX_MIN16_F n, FFX_MIN16_F m) -{ - return max(n, min(x, m)); -} -FFX_MIN16_F2 ffxClampHalf(FFX_MIN16_F2 x, FFX_MIN16_F2 n, FFX_MIN16_F2 m) -{ - return max(n, min(x, m)); -} -FFX_MIN16_F3 ffxClampHalf(FFX_MIN16_F3 x, FFX_MIN16_F3 n, FFX_MIN16_F3 m) -{ - return max(n, min(x, m)); -} -FFX_MIN16_F4 ffxClampHalf(FFX_MIN16_F4 x, FFX_MIN16_F4 n, FFX_MIN16_F4 m) -{ - return max(n, min(x, m)); -} -//------------------------------------------------------------------------------------------------------------------------------ -// V_FRACT_F16 (note DX frac() is different). -FFX_MIN16_F ffxFract(FFX_MIN16_F x) -{ - return x - floor(x); -} -FFX_MIN16_F2 ffxFract(FFX_MIN16_F2 x) -{ - return x - floor(x); -} -FFX_MIN16_F3 ffxFract(FFX_MIN16_F3 x) -{ - return x - floor(x); -} -FFX_MIN16_F4 ffxFract(FFX_MIN16_F4 x) -{ - return x - floor(x); -} -//------------------------------------------------------------------------------------------------------------------------------ -FFX_MIN16_F ffxLerp(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F a) -{ - return lerp(x, y, a); -} -FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F a) -{ - return lerp(x, y, a); -} -FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 a) -{ - return lerp(x, y, a); -} -FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F a) -{ - return lerp(x, y, a); -} -FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 a) -{ - return lerp(x, y, a); -} -FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F a) -{ - return lerp(x, y, a); -} -FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 a) -{ - return lerp(x, y, a); -} -//------------------------------------------------------------------------------------------------------------------------------ -FFX_MIN16_F ffxMax3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z) -{ - return max(x, max(y, z)); -} -FFX_MIN16_F2 ffxMax3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z) -{ - return max(x, max(y, z)); -} -FFX_MIN16_F3 ffxMax3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z) -{ - return max(x, max(y, z)); -} -FFX_MIN16_F4 ffxMax3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z) -{ - return max(x, max(y, z)); -} -//------------------------------------------------------------------------------------------------------------------------------ -FFX_MIN16_F ffxMin3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z) -{ - return min(x, min(y, z)); -} -FFX_MIN16_F2 ffxMin3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z) -{ - return min(x, min(y, z)); -} -FFX_MIN16_F3 ffxMin3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z) -{ - return min(x, min(y, z)); -} -FFX_MIN16_F4 ffxMin3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z) -{ - return min(x, min(y, z)); -} -//------------------------------------------------------------------------------------------------------------------------------ -FFX_MIN16_F ffxMed3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z) -{ - return max(min(x, y), min(max(x, y), z)); -} -FFX_MIN16_F2 ffxMed3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z) -{ - return max(min(x, y), min(max(x, y), z)); -} -FFX_MIN16_F3 ffxMed3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z) -{ - return max(min(x, y), min(max(x, y), z)); -} -FFX_MIN16_F4 ffxMed3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z) -{ - return max(min(x, y), min(max(x, y), z)); -} -//------------------------------------------------------------------------------------------------------------------------------ -FFX_MIN16_I ffxMed3Half(FFX_MIN16_I x, FFX_MIN16_I y, FFX_MIN16_I z) -{ - return max(min(x, y), min(max(x, y), z)); -} -FFX_MIN16_I2 ffxMed3Half(FFX_MIN16_I2 x, FFX_MIN16_I2 y, FFX_MIN16_I2 z) -{ - return max(min(x, y), min(max(x, y), z)); -} -FFX_MIN16_I3 ffxMed3Half(FFX_MIN16_I3 x, FFX_MIN16_I3 y, FFX_MIN16_I3 z) -{ - return max(min(x, y), min(max(x, y), z)); -} -FFX_MIN16_I4 ffxMed3Half(FFX_MIN16_I4 x, FFX_MIN16_I4 y, FFX_MIN16_I4 z) -{ - return max(min(x, y), min(max(x, y), z)); -} -//------------------------------------------------------------------------------------------------------------------------------ -FFX_MIN16_F ffxReciprocalHalf(FFX_MIN16_F x) -{ - return rcp(x); -} -FFX_MIN16_F2 ffxReciprocalHalf(FFX_MIN16_F2 x) -{ - return rcp(x); -} -FFX_MIN16_F3 ffxReciprocalHalf(FFX_MIN16_F3 x) -{ - return rcp(x); -} -FFX_MIN16_F4 ffxReciprocalHalf(FFX_MIN16_F4 x) -{ - return rcp(x); -} -//------------------------------------------------------------------------------------------------------------------------------ -FFX_MIN16_F ffxReciprocalSquareRootHalf(FFX_MIN16_F x) -{ - return rsqrt(x); -} -FFX_MIN16_F2 ffxReciprocalSquareRootHalf(FFX_MIN16_F2 x) -{ - return rsqrt(x); -} -FFX_MIN16_F3 ffxReciprocalSquareRootHalf(FFX_MIN16_F3 x) -{ - return rsqrt(x); -} -FFX_MIN16_F4 ffxReciprocalSquareRootHalf(FFX_MIN16_F4 x) -{ - return rsqrt(x); -} -//------------------------------------------------------------------------------------------------------------------------------ -FFX_MIN16_F ffxSaturate(FFX_MIN16_F x) -{ - return saturate(x); -} -FFX_MIN16_F2 ffxSaturate(FFX_MIN16_F2 x) -{ - return saturate(x); -} -FFX_MIN16_F3 ffxSaturate(FFX_MIN16_F3 x) -{ - return saturate(x); -} -FFX_MIN16_F4 ffxSaturate(FFX_MIN16_F4 x) -{ - return saturate(x); -} -//------------------------------------------------------------------------------------------------------------------------------ -FFX_MIN16_U ffxBitShiftRightHalf(FFX_MIN16_U a, FFX_MIN16_U b) -{ - return FFX_MIN16_U(FFX_MIN16_I(a) >> FFX_MIN16_I(b)); -} -FFX_MIN16_U2 ffxBitShiftRightHalf(FFX_MIN16_U2 a, FFX_MIN16_U2 b) -{ - return FFX_MIN16_U2(FFX_MIN16_I2(a) >> FFX_MIN16_I2(b)); -} -FFX_MIN16_U3 ffxBitShiftRightHalf(FFX_MIN16_U3 a, FFX_MIN16_U3 b) -{ - return FFX_MIN16_U3(FFX_MIN16_I3(a) >> FFX_MIN16_I3(b)); -} -FFX_MIN16_U4 ffxBitShiftRightHalf(FFX_MIN16_U4 a, FFX_MIN16_U4 b) -{ - return FFX_MIN16_U4(FFX_MIN16_I4(a) >> FFX_MIN16_I4(b)); -} -#endif // FFX_HALF - -//============================================================================================================================== -// HLSL WAVE -//============================================================================================================================== -#if defined(FFX_WAVE) -// Where 'x' must be a compile time literal. -FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x) -{ - return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); -} -FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x) -{ - return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); -} -FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x) -{ - return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); -} -FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x) -{ - return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); -} -FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x) -{ - return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); -} -FfxUInt32x2 AWaveXorU1(FfxUInt32x2 v, FfxUInt32 x) -{ - return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); -} -FfxUInt32x3 AWaveXorU1(FfxUInt32x3 v, FfxUInt32 x) -{ - return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); -} -FfxUInt32x4 AWaveXorU1(FfxUInt32x4 v, FfxUInt32 x) -{ - return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); -} -FfxBoolean AWaveIsFirstLane() -{ - return WaveIsFirstLane(); -} -FfxUInt32 AWaveLaneIndex() -{ - return WaveGetLaneIndex(); -} -FfxBoolean AWaveReadAtLaneIndexB1(FfxBoolean v, FfxUInt32 x) -{ - return WaveReadLaneAt(v, x); -} -FfxUInt32 AWavePrefixCountBits(FfxBoolean v) -{ - return WavePrefixCountBits(v); -} -FfxUInt32 AWaveActiveCountBits(FfxBoolean v) -{ - return WaveActiveCountBits(v); -} -FfxUInt32 AWaveReadLaneFirstU1(FfxUInt32 v) -{ - return WaveReadLaneFirst(v); -} -FfxUInt32 WaveOr(FfxUInt32 a) -{ - return WaveActiveBitOr(a); -} -FfxFloat32 WaveMin(FfxFloat32 a) -{ - return WaveActiveMin(a); -} -FfxFloat32 WaveMax(FfxFloat32 a) -{ - return WaveActiveMax(a); -} -FfxUInt32 WaveLaneCount() -{ - return WaveGetLaneCount(); -} -FfxBoolean WaveAllTrue(FfxBoolean v) -{ - return WaveActiveAllTrue(v); -} -FfxFloat32 QuadReadX(FfxFloat32 v) -{ - return QuadReadAcrossX(v); -} -FfxFloat32x2 QuadReadX(FfxFloat32x2 v) -{ - return QuadReadAcrossX(v); -} -FfxFloat32 QuadReadY(FfxFloat32 v) -{ - return QuadReadAcrossY(v); -} -FfxFloat32x2 QuadReadY(FfxFloat32x2 v) -{ - return QuadReadAcrossY(v); -} - -#if FFX_HALF -FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x) -{ - return FFX_UINT32_TO_FLOAT16X2(WaveReadLaneAt(FFX_FLOAT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x)); -} -FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x) -{ - return FFX_UINT32X2_TO_FLOAT16X4(WaveReadLaneAt(FFX_FLOAT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x)); -} -FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x) -{ - return FFX_UINT32_TO_UINT16X2(WaveReadLaneAt(FFX_UINT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x)); -} -FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x) -{ - return FFX_UINT32X2_TO_UINT16X4(WaveReadLaneAt(FFX_UINT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x)); -} -#endif // FFX_HALF -#endif // #if defined(FFX_WAVE) diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h.meta deleted file mode 100644 index ff56bee..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: 299f67e8b7e1d1a48a577bf8b328ac92 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_portability.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_portability.h deleted file mode 100644 index 84a62d6..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_portability.h +++ /dev/null @@ -1,51 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -FfxFloat32x3 opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) -{ - d = a + ffxBroadcast3(b); - return d; -} - -FfxFloat32x3 opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a) -{ - d = a; - return d; -} - -FfxFloat32x3 opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) -{ - d = a * b; - return d; -} - -FfxFloat32x3 opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) -{ - d = a * ffxBroadcast3(b); - return d; -} - -FfxFloat32x3 opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a) -{ - d = rcp(a); - return d; -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_portability.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_portability.h.meta deleted file mode 100644 index 25ff64b..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_core_portability.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: 8d2ace0bd52e0e1438e08ddaccd3ba24 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h deleted file mode 100644 index c425de7..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h +++ /dev/null @@ -1,288 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#ifndef FFX_FSR3UPSCALER_ACCUMULATE_H -#define FFX_FSR3UPSCALER_ACCUMULATE_H - -FfxFloat32 GetPxHrVelocity(FfxFloat32x2 fMotionVector) -{ - return length(fMotionVector * DisplaySize()); -} -#if FFX_HALF -FFX_MIN16_F GetPxHrVelocity(FFX_MIN16_F2 fMotionVector) -{ - return length(fMotionVector * FFX_MIN16_F2(DisplaySize())); -} -#endif - -void Accumulate(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, FfxFloat32x3 fAccumulation, FFX_PARAMETER_IN FfxFloat32x4 fUpsampledColorAndWeight) -{ - // Aviod invalid values when accumulation and upsampled weight is 0 - fAccumulation = ffxMax(FSR3UPSCALER_EPSILON.xxx, fAccumulation + fUpsampledColorAndWeight.www); - -#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT - //YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation) - fUpsampledColorAndWeight.xyz = RGBToYCoCg(Tonemap(YCoCgToRGB(fUpsampledColorAndWeight.xyz))); - fHistoryColor = RGBToYCoCg(Tonemap(YCoCgToRGB(fHistoryColor))); -#endif - - const FfxFloat32x3 fAlpha = fUpsampledColorAndWeight.www / fAccumulation; - fHistoryColor = ffxLerp(fHistoryColor, fUpsampledColorAndWeight.xyz, fAlpha); - - fHistoryColor = YCoCgToRGB(fHistoryColor); - -#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT - fHistoryColor = InverseTonemap(fHistoryColor); -#endif -} - -void RectifyHistory( - const AccumulationPassCommonParams params, - RectificationBox clippingBox, - FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, - FFX_PARAMETER_INOUT FfxFloat32x3 fAccumulation, - FfxFloat32 fLockContributionThisFrame, - FfxFloat32 fTemporalReactiveFactor, - FfxFloat32 fLumaInstabilityFactor) -{ - const FfxFloat32 fVecolityFactor = ffxSaturate(params.fHrVelocity / 20.0f); - const FfxFloat32 fBoxScaleT = ffxMax(params.fDepthClipFactor, ffxMax(params.fAccumulationMask, fVecolityFactor)); - const FfxFloat32 fBoxScale = ffxLerp(3.0f, 1.0f, fBoxScaleT); - - const FfxFloat32x3 fScaledBoxVec = clippingBox.boxVec * fBoxScale; - const FfxFloat32x3 boxMin = clippingBox.boxCenter - fScaledBoxVec; - const FfxFloat32x3 boxMax = clippingBox.boxCenter + fScaledBoxVec; - - if (any(FFX_GREATER_THAN(boxMin, fHistoryColor)) || any(FFX_GREATER_THAN(fHistoryColor, boxMax))) { - - const FfxFloat32x3 fClampedHistoryColor = clamp(fHistoryColor, boxMin, boxMax); - - FfxFloat32x3 fHistoryContribution = ffxMax(fLumaInstabilityFactor, fLockContributionThisFrame).xxx; - - const FfxFloat32 fReactiveFactor = params.fDilatedReactiveFactor; - const FfxFloat32 fReactiveContribution = 1.0f - ffxPow(fReactiveFactor, 1.0f / 2.0f); - fHistoryContribution *= fReactiveContribution; - - // Scale history color using rectification info, also using accumulation mask to avoid potential invalid color protection - fHistoryColor = ffxLerp(fClampedHistoryColor, fHistoryColor, ffxSaturate(fHistoryContribution)); - - // Scale accumulation using rectification info - const FfxFloat32x3 fAccumulationMin = ffxMin(fAccumulation, FFX_BROADCAST_FLOAT32X3(0.1f)); - fAccumulation = ffxLerp(fAccumulationMin, fAccumulation, ffxSaturate(fHistoryContribution)); - } -} - -void WriteUpscaledOutput(FfxInt32x2 iPxHrPos, FfxFloat32x3 fUpscaledColor) -{ - StoreUpscaledOutput(iPxHrPos, fUpscaledColor); -} - -void FinalizeLockStatus(const AccumulationPassCommonParams params, FfxFloat32x2 fLockStatus, FfxFloat32 fUpsampledWeight) -{ - // we expect similar motion for next frame - // kill lock if that location is outside screen, avoid locks to be clamped to screen borders - FfxFloat32x2 fEstimatedUvNextFrame = params.fHrUv - params.fMotionVector; - if (IsUvInside(fEstimatedUvNextFrame) == false) { - KillLock(fLockStatus); - } - else { - // Decrease lock lifetime - const FfxFloat32 fLifetimeDecreaseLanczosMax = FfxFloat32(JitterSequenceLength()) * FfxFloat32(fAverageLanczosWeightPerFrame); - const FfxFloat32 fLifetimeDecrease = FfxFloat32(fUpsampledWeight / fLifetimeDecreaseLanczosMax); - fLockStatus[LOCK_LIFETIME_REMAINING] = ffxMax(FfxFloat32(0), fLockStatus[LOCK_LIFETIME_REMAINING] - fLifetimeDecrease); - } - - StoreLockStatus(params.iPxHrPos, fLockStatus); -} - - -FfxFloat32x3 ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FfxFloat32 fThisFrameReactiveFactor, FfxBoolean bInMotionLastFrame, FfxFloat32 fUpsampledWeight, LockState lockState) -{ - // Always assume max accumulation was reached - FfxFloat32 fBaseAccumulation = fMaxAccumulationLanczosWeight * FfxFloat32(params.bIsExistingSample) * (1.0f - fThisFrameReactiveFactor) * (1.0f - params.fDepthClipFactor); - - fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight * 10.0f, ffxMax(FfxFloat32(bInMotionLastFrame), ffxSaturate(params.fHrVelocity * FfxFloat32(10))))); - - fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight, ffxSaturate(params.fHrVelocity / FfxFloat32(20)))); - - return fBaseAccumulation.xxx; -} - -FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams params, RectificationBox clippingBox, FfxFloat32 fThisFrameReactiveFactor, FfxFloat32 fLuminanceDiff) -{ - const FfxFloat32 fUnormThreshold = 1.0f / 255.0f; - const FfxInt32 N_MINUS_1 = 0; - const FfxInt32 N_MINUS_2 = 1; - const FfxInt32 N_MINUS_3 = 2; - const FfxInt32 N_MINUS_4 = 3; - - FfxFloat32 fCurrentFrameLuma = clippingBox.boxCenter.x; - -#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT - fCurrentFrameLuma = fCurrentFrameLuma / (1.0f + ffxMax(0.0f, fCurrentFrameLuma)); -#endif - - fCurrentFrameLuma = round(fCurrentFrameLuma * 255.0f) / 255.0f; - - const FfxBoolean bSampleLumaHistory = (ffxMax(ffxMax(params.fDepthClipFactor, params.fAccumulationMask), fLuminanceDiff) < 0.1f) && (params.bIsNewSample == false); - FfxFloat32x4 fCurrentFrameLumaHistory = bSampleLumaHistory ? SampleLumaHistory(params.fReprojectedHrUv) : FFX_BROADCAST_FLOAT32X4(0.0f); - - FfxFloat32 fLumaInstability = 0.0f; - FfxFloat32 fDiffs0 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[N_MINUS_1]); - - FfxFloat32 fMin = abs(fDiffs0); - - if (fMin >= fUnormThreshold) { - for (int i = N_MINUS_2; i <= N_MINUS_4; i++) { - FfxFloat32 fDiffs1 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[i]); - - if (sign(fDiffs0) == sign(fDiffs1)) { - - // Scale difference to protect historically similar values - const FfxFloat32 fMinBias = 1.0f; - fMin = ffxMin(fMin, abs(fDiffs1) * fMinBias); - } - } - - const FfxFloat32 fBoxSize = clippingBox.boxVec.x; - const FfxFloat32 fBoxSizeFactor = ffxPow(ffxSaturate(fBoxSize / 0.1f), 6.0f); - - fLumaInstability = FfxFloat32(fMin != abs(fDiffs0)) * fBoxSizeFactor; - fLumaInstability = FfxFloat32(fLumaInstability > fUnormThreshold); - - fLumaInstability *= 1.0f - ffxMax(params.fAccumulationMask, ffxPow(fThisFrameReactiveFactor, 1.0f / 6.0f)); - } - - //shift history - fCurrentFrameLumaHistory[N_MINUS_4] = fCurrentFrameLumaHistory[N_MINUS_3]; - fCurrentFrameLumaHistory[N_MINUS_3] = fCurrentFrameLumaHistory[N_MINUS_2]; - fCurrentFrameLumaHistory[N_MINUS_2] = fCurrentFrameLumaHistory[N_MINUS_1]; - fCurrentFrameLumaHistory[N_MINUS_1] = fCurrentFrameLuma; - - StoreLumaHistory(params.iPxHrPos, fCurrentFrameLumaHistory); - - return fLumaInstability * FfxFloat32(fCurrentFrameLumaHistory[N_MINUS_4] != 0); -} - -FfxFloat32 ComputeTemporalReactiveFactor(const AccumulationPassCommonParams params, FfxFloat32 fTemporalReactiveFactor) -{ - FfxFloat32 fNewFactor = ffxMin(0.99f, fTemporalReactiveFactor); - - fNewFactor = ffxMax(fNewFactor, ffxLerp(fNewFactor, 0.4f, ffxSaturate(params.fHrVelocity))); - - fNewFactor = ffxMax(fNewFactor * fNewFactor, ffxMax(params.fDepthClipFactor * 0.1f, params.fDilatedReactiveFactor)); - - // Force reactive factor for new samples - fNewFactor = params.bIsNewSample ? 1.0f : fNewFactor; - - if (ffxSaturate(params.fHrVelocity * 10.0f) >= 1.0f) { - fNewFactor = ffxMax(FSR3UPSCALER_EPSILON, fNewFactor) * -1.0f; - } - - return fNewFactor; -} - -AccumulationPassCommonParams InitParams(FfxInt32x2 iPxHrPos) -{ - AccumulationPassCommonParams params; - - params.iPxHrPos = iPxHrPos; - const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize(); - params.fHrUv = fHrUv; - - const FfxFloat32x2 fLrUvJittered = fHrUv + Jitter() / RenderSize(); - params.fLrUv_HwSampler = ClampUv(fLrUvJittered, RenderSize(), MaxRenderSize()); - - params.fMotionVector = GetMotionVector(iPxHrPos, fHrUv); - params.fHrVelocity = GetPxHrVelocity(params.fMotionVector); - - ComputeReprojectedUVs(params, params.fReprojectedHrUv, params.bIsExistingSample); - - params.fDepthClipFactor = ffxSaturate(SampleDepthClip(params.fLrUv_HwSampler)); - - const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(params.fLrUv_HwSampler); - params.fDilatedReactiveFactor = fDilatedReactiveMasks.x; - params.fAccumulationMask = fDilatedReactiveMasks.y; - params.bIsResetFrame = (0 == FrameIndex()); - - params.bIsNewSample = (params.bIsExistingSample == false || params.bIsResetFrame); - - return params; -} - -void Accumulate(FfxInt32x2 iPxHrPos) -{ - const AccumulationPassCommonParams params = InitParams(iPxHrPos); - - FfxFloat32x3 fHistoryColor = FfxFloat32x3(0, 0, 0); - FfxFloat32x2 fLockStatus; - InitializeNewLockSample(fLockStatus); - - FfxFloat32 fTemporalReactiveFactor = 0.0f; - FfxBoolean bInMotionLastFrame = FFX_FALSE; - LockState lockState = { FFX_FALSE , FFX_FALSE }; - if (params.bIsExistingSample && !params.bIsResetFrame) { - ReprojectHistoryColor(params, fHistoryColor, fTemporalReactiveFactor, bInMotionLastFrame); - lockState = ReprojectHistoryLockStatus(params, fLockStatus); - } - - FfxFloat32 fThisFrameReactiveFactor = ffxMax(params.fDilatedReactiveFactor, fTemporalReactiveFactor); - - FfxFloat32 fLuminanceDiff = 0.0f; - FfxFloat32 fLockContributionThisFrame = 0.0f; - UpdateLockStatus(params, fThisFrameReactiveFactor, lockState, fLockStatus, fLockContributionThisFrame, fLuminanceDiff); - - // Load upsampled input color - RectificationBox clippingBox; - FfxFloat32x4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(params, clippingBox, fThisFrameReactiveFactor); - - const FfxFloat32 fLumaInstabilityFactor = ComputeLumaInstabilityFactor(params, clippingBox, fThisFrameReactiveFactor, fLuminanceDiff); - - - FfxFloat32x3 fAccumulation = ComputeBaseAccumulationWeight(params, fThisFrameReactiveFactor, bInMotionLastFrame, fUpsampledColorAndWeight.w, lockState); - - if (params.bIsNewSample) { - fHistoryColor = YCoCgToRGB(fUpsampledColorAndWeight.xyz); - } - else { - RectifyHistory(params, clippingBox, fHistoryColor, fAccumulation, fLockContributionThisFrame, fThisFrameReactiveFactor, fLumaInstabilityFactor); - - Accumulate(params, fHistoryColor, fAccumulation, fUpsampledColorAndWeight); - } - - fHistoryColor = UnprepareRgb(fHistoryColor, Exposure()); - - FinalizeLockStatus(params, fLockStatus, fUpsampledColorAndWeight.w); - - // Get new temporal reactive factor - fTemporalReactiveFactor = ComputeTemporalReactiveFactor(params, fThisFrameReactiveFactor); - - StoreInternalColorAndWeight(iPxHrPos, FfxFloat32x4(fHistoryColor, fTemporalReactiveFactor)); - - // Output final color when RCAS is disabled -#if FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING == 0 - WriteUpscaledOutput(iPxHrPos, fHistoryColor); -#endif - StoreNewLocks(iPxHrPos, 0); -} - -#endif // FFX_FSR3UPSCALER_ACCUMULATE_H diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h.meta deleted file mode 100644 index 08c98e7..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: 3fc2f7a2c8c31324a949e1761bf599cc -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h deleted file mode 100644 index c70bc1d..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h +++ /dev/null @@ -1,928 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#include "ffx_fsr3upscaler_resources.h" - -#if defined(FFX_GPU) -#ifdef __hlsl_dx_compiler -#pragma dxc diagnostic push -#pragma dxc diagnostic ignored "-Wambig-lit-shift" -#endif //__hlsl_dx_compiler -#include "ffx_core.h" -#ifdef __hlsl_dx_compiler -#pragma dxc diagnostic pop -#endif //__hlsl_dx_compiler -#endif // #if defined(FFX_GPU) - -#if defined(FFX_GPU) -#ifndef FFX_PREFER_WAVE64 -#define FFX_PREFER_WAVE64 -#endif // FFX_PREFER_WAVE64 - -#if defined(FFX_GPU) -#pragma warning(disable: 3205) // conversion from larger type to smaller -#endif // #if defined(FFX_GPU) - -#define DECLARE_SRV_REGISTER(regIndex) t##regIndex -#define DECLARE_UAV_REGISTER(regIndex) u##regIndex -#define DECLARE_CB_REGISTER(regIndex) b##regIndex -#define FFX_FSR3UPSCALER_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex)) -#define FFX_FSR3UPSCALER_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) -#define FFX_FSR3UPSCALER_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) - -#if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) - cbuffer cbFSR3Upscaler : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) - { - FfxInt32x2 iRenderSize; - FfxInt32x2 iMaxRenderSize; - FfxInt32x2 iDisplaySize; - FfxInt32x2 iInputColorResourceDimensions; - FfxInt32x2 iLumaMipDimensions; - FfxInt32 iLumaMipLevelToUse; - FfxInt32 iFrameIndex; - - FfxFloat32x4 fDeviceToViewDepth; - FfxFloat32x2 fJitter; - FfxFloat32x2 fMotionVectorScale; - FfxFloat32x2 fDownscaleFactor; - FfxFloat32x2 fMotionVectorJitterCancellation; - FfxFloat32 fPreExposure; - FfxFloat32 fPreviousFramePreExposure; - FfxFloat32 fTanHalfFOV; - FfxFloat32 fJitterSequenceLength; - FfxFloat32 fDeltaTime; - FfxFloat32 fDynamicResChangeFactor; - FfxFloat32 fViewSpaceToMetersFactor; - - FfxInt32 iDummy; - }; - -#define FFX_FSR3UPSCALER_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR3Upscaler) / 4) // Number of 32-bit values. This must be kept in sync with the cbFSR3Upscaler size. - -/* Define getter functions in the order they are defined in the CB! */ -FfxInt32x2 RenderSize() -{ - return iRenderSize; -} - -FfxInt32x2 MaxRenderSize() -{ - return iMaxRenderSize; -} - -FfxInt32x2 DisplaySize() -{ - return iDisplaySize; -} - -FfxInt32x2 InputColorResourceDimensions() -{ - return iInputColorResourceDimensions; -} - -FfxInt32x2 LumaMipDimensions() -{ - return iLumaMipDimensions; -} - -FfxInt32 LumaMipLevelToUse() -{ - return iLumaMipLevelToUse; -} - -FfxInt32 FrameIndex() -{ - return iFrameIndex; -} - -FfxFloat32x2 Jitter() -{ - return fJitter; -} - -FfxFloat32x4 DeviceToViewSpaceTransformFactors() -{ - return fDeviceToViewDepth; -} - -FfxFloat32x2 MotionVectorScale() -{ - return fMotionVectorScale; -} - -FfxFloat32x2 DownscaleFactor() -{ - return fDownscaleFactor; -} - -FfxFloat32x2 MotionVectorJitterCancellation() -{ - return fMotionVectorJitterCancellation; -} - -FfxFloat32 PreExposure() -{ - return fPreExposure; -} - -FfxFloat32 PreviousFramePreExposure() -{ - return fPreviousFramePreExposure; -} - -FfxFloat32 TanHalfFoV() -{ - return fTanHalfFOV; -} - -FfxFloat32 JitterSequenceLength() -{ - return fJitterSequenceLength; -} - -FfxFloat32 DeltaTime() -{ - return fDeltaTime; -} - -FfxFloat32 DynamicResChangeFactor() -{ - return fDynamicResChangeFactor; -} - -FfxFloat32 ViewSpaceToMetersFactor() -{ - return fViewSpaceToMetersFactor; -} -#endif // #if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) - -#define FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(p) FFX_FSR3UPSCALER_ROOTSIG_STR(p) -#define FFX_FSR3UPSCALER_ROOTSIG_STR(p) #p -#define FFX_FSR3UPSCALER_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \ - "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \ - "RootConstants(num32BitConstants=" FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_CONSTANT_BUFFER_1_SIZE) ", b0), " \ - "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ - "addressU = TEXTURE_ADDRESS_CLAMP, " \ - "addressV = TEXTURE_ADDRESS_CLAMP, " \ - "addressW = TEXTURE_ADDRESS_CLAMP, " \ - "comparisonFunc = COMPARISON_NEVER, " \ - "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ - "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ - "addressU = TEXTURE_ADDRESS_CLAMP, " \ - "addressV = TEXTURE_ADDRESS_CLAMP, " \ - "addressW = TEXTURE_ADDRESS_CLAMP, " \ - "comparisonFunc = COMPARISON_NEVER, " \ - "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] - -#define FFX_FSR3UPSCALER_CONSTANT_BUFFER_2_SIZE 6 // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size. - -#define FFX_FSR3UPSCALER_CB2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \ - "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \ - "RootConstants(num32BitConstants=" FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_CONSTANT_BUFFER_1_SIZE) ", b0), " \ - "RootConstants(num32BitConstants=" FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_CONSTANT_BUFFER_2_SIZE) ", b1), " \ - "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ - "addressU = TEXTURE_ADDRESS_CLAMP, " \ - "addressV = TEXTURE_ADDRESS_CLAMP, " \ - "addressW = TEXTURE_ADDRESS_CLAMP, " \ - "comparisonFunc = COMPARISON_NEVER, " \ - "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ - "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ - "addressU = TEXTURE_ADDRESS_CLAMP, " \ - "addressV = TEXTURE_ADDRESS_CLAMP, " \ - "addressW = TEXTURE_ADDRESS_CLAMP, " \ - "comparisonFunc = COMPARISON_NEVER, " \ - "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] -#if defined(FFX_FSR3UPSCALER_EMBED_ROOTSIG) -#define FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT FFX_FSR3UPSCALER_ROOTSIG -#define FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT FFX_FSR3UPSCALER_CB2_ROOTSIG -#else -#define FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT -#define FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT -#endif // #if FFX_FSR3UPSCALER_EMBED_ROOTSIG - -#if defined(FSR3UPSCALER_BIND_CB_AUTOREACTIVE) -cbuffer cbGenerateReactive : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_AUTOREACTIVE) -{ - FfxFloat32 fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels - FfxFloat32 fTcScale; - FfxFloat32 fReactiveScale; - FfxFloat32 fReactiveMax; -}; - -FfxFloat32 TcThreshold() -{ - return fTcThreshold; -} - -FfxFloat32 TcScale() -{ - return fTcScale; -} - -FfxFloat32 ReactiveScale() -{ - return fReactiveScale; -} - -FfxFloat32 ReactiveMax() -{ - return fReactiveMax; -} -#endif // #if defined(FSR3UPSCALER_BIND_CB_AUTOREACTIVE) - -#if defined(FSR3UPSCALER_BIND_CB_RCAS) -cbuffer cbRCAS : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_RCAS) -{ - FfxUInt32x4 rcasConfig; -}; - -FfxUInt32x4 RCASConfig() -{ - return rcasConfig; -} -#endif // #if defined(FSR3UPSCALER_BIND_CB_RCAS) - - -#if defined(FSR3UPSCALER_BIND_CB_REACTIVE) -cbuffer cbGenerateReactive : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_REACTIVE) -{ - FfxFloat32 gen_reactive_scale; - FfxFloat32 gen_reactive_threshold; - FfxFloat32 gen_reactive_binaryValue; - FfxUInt32 gen_reactive_flags; -}; - -FfxFloat32 GenReactiveScale() -{ - return gen_reactive_scale; -} - -FfxFloat32 GenReactiveThreshold() -{ - return gen_reactive_threshold; -} - -FfxFloat32 GenReactiveBinaryValue() -{ - return gen_reactive_binaryValue; -} - -FfxUInt32 GenReactiveFlags() -{ - return gen_reactive_flags; -} -#endif // #if defined(FSR3UPSCALER_BIND_CB_REACTIVE) - -#if defined(FSR3UPSCALER_BIND_CB_SPD) -cbuffer cbSPD : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_SPD) { - - FfxUInt32 mips; - FfxUInt32 numWorkGroups; - FfxUInt32x2 workGroupOffset; - FfxUInt32x2 renderSize; -}; - -FfxUInt32 MipCount() -{ - return mips; -} - -FfxUInt32 NumWorkGroups() -{ - return numWorkGroups; -} - -FfxUInt32x2 WorkGroupOffset() -{ - return workGroupOffset; -} - -FfxUInt32x2 SPD_RenderSize() -{ - return renderSize; -} -#endif // #if defined(FSR3UPSCALER_BIND_CB_SPD) - -// Declare and sample camera buffers as regular textures, unless overridden -#if !defined(UNITY_FSR3_TEX2D) -#define UNITY_FSR3_TEX2D(type) Texture2D -#endif -#if !defined(UNITY_FSR3_RWTEX2D) -#define UNITY_FSR3_RWTEX2D(type) RWTexture2D -#endif -#if !defined(UNITY_FSR3_POS) -#define UNITY_FSR3_POS(pxPos) (pxPos) -#endif -#if !defined(UNITY_FSR3_UV) -#define UNITY_FSR3_UV(uv) (uv) -#endif - -SamplerState s_PointClamp : register(s0); -SamplerState s_LinearClamp : register(s1); - - // SRVs - #if defined FSR3UPSCALER_BIND_SRV_INPUT_COLOR - UNITY_FSR3_TEX2D(FfxFloat32x4) r_input_color_jittered : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_COLOR); - #endif - #if defined FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY - UNITY_FSR3_TEX2D(FfxFloat32x4) r_input_opaque_only : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY); - #endif - #if defined FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS - UNITY_FSR3_TEX2D(FfxFloat32x4) r_input_motion_vectors : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS); - #endif - #if defined FSR3UPSCALER_BIND_SRV_INPUT_DEPTH - UNITY_FSR3_TEX2D(FfxFloat32) r_input_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH); - #endif - #if defined FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE - Texture2D r_input_exposure : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE); - #endif - #if defined FSR3UPSCALER_BIND_SRV_AUTO_EXPOSURE - Texture2D r_auto_exposure : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_AUTO_EXPOSURE); - #endif - #if defined FSR3UPSCALER_BIND_SRV_REACTIVE_MASK - Texture2D r_reactive_mask : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_REACTIVE_MASK); - #endif - #if defined FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK - Texture2D r_transparency_and_composition_mask : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK); - #endif - #if defined FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH - Texture2D r_reconstructed_previous_nearest_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH); - #endif - #if defined FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS - Texture2D r_dilated_motion_vectors : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS); - #endif - #if defined FSR3UPSCALER_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS - Texture2D r_previous_dilated_motion_vectors : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS); - #endif - #if defined FSR3UPSCALER_BIND_SRV_DILATED_DEPTH - Texture2D r_dilated_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_DEPTH); - #endif - #if defined FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED - Texture2D r_internal_upscaled_color : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED); - #endif - #if defined FSR3UPSCALER_BIND_SRV_LOCK_STATUS - Texture2D r_lock_status : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LOCK_STATUS); - #endif - #if defined FSR3UPSCALER_BIND_SRV_LOCK_INPUT_LUMA - Texture2D r_lock_input_luma : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LOCK_INPUT_LUMA); - #endif - #if defined FSR3UPSCALER_BIND_SRV_NEW_LOCKS - Texture2D r_new_locks : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_NEW_LOCKS); - #endif - #if defined FSR3UPSCALER_BIND_SRV_PREPARED_INPUT_COLOR - Texture2D r_prepared_input_color : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_PREPARED_INPUT_COLOR); - #endif - #if defined FSR3UPSCALER_BIND_SRV_LUMA_HISTORY - Texture2D r_luma_history : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LUMA_HISTORY); - #endif - #if defined FSR3UPSCALER_BIND_SRV_RCAS_INPUT - Texture2D r_rcas_input : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_RCAS_INPUT); - #endif - #if defined FSR3UPSCALER_BIND_SRV_LANCZOS_LUT - Texture2D r_lanczos_lut : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT); - #endif - #if defined FSR3UPSCALER_BIND_SRV_SCENE_LUMINANCE_MIPS - Texture2D r_imgMips : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_SCENE_LUMINANCE_MIPS); - #endif - #if defined FSR3UPSCALER_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT - Texture2D r_upsample_maximum_bias_lut : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT); - #endif - #if defined FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS - Texture2D r_dilated_reactive_masks : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS); - #endif - - #if defined FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR - Texture2D r_input_prev_color_pre_alpha : FFX_FSR3UPSCALER_DECLARE_SRV(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR); - #endif - #if defined FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR - Texture2D r_input_prev_color_post_alpha : FFX_FSR3UPSCALER_DECLARE_SRV(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR); - #endif - - // UAV declarations - #if defined FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH - RWTexture2D rw_reconstructed_previous_nearest_depth : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH); - #endif - #if defined FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS - RWTexture2D rw_dilated_motion_vectors : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS); - #endif - #if defined FSR3UPSCALER_BIND_UAV_DILATED_DEPTH - RWTexture2D rw_dilated_depth : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_DEPTH); - #endif - #if defined FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED - RWTexture2D rw_internal_upscaled_color : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED); - #endif - #if defined FSR3UPSCALER_BIND_UAV_LOCK_STATUS - RWTexture2D rw_lock_status : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_LOCK_STATUS); - #endif - #if defined FSR3UPSCALER_BIND_UAV_LOCK_INPUT_LUMA - RWTexture2D rw_lock_input_luma : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_LOCK_INPUT_LUMA); - #endif - #if defined FSR3UPSCALER_BIND_UAV_NEW_LOCKS - RWTexture2D rw_new_locks : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_NEW_LOCKS); - #endif - #if defined FSR3UPSCALER_BIND_UAV_PREPARED_INPUT_COLOR - RWTexture2D rw_prepared_input_color : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_PREPARED_INPUT_COLOR); - #endif - #if defined FSR3UPSCALER_BIND_UAV_LUMA_HISTORY - RWTexture2D rw_luma_history : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_LUMA_HISTORY); - #endif - #if defined FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT - UNITY_FSR3_RWTEX2D(FfxFloat32x4) rw_upscaled_output : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT); - #endif - #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE - globallycoherent RWTexture2D rw_img_mip_shading_change : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE); - #endif - #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5 - globallycoherent RWTexture2D rw_img_mip_5 : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5); - #endif - #if defined FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS - RWTexture2D rw_dilated_reactive_masks : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS); - #endif - #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE - RWTexture2D rw_exposure : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_EXPOSURE); - #endif - #if defined FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE - RWTexture2D rw_auto_exposure : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE); - #endif - #if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC - globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC); - #endif - - #if defined FSR3UPSCALER_BIND_UAV_AUTOREACTIVE - RWTexture2D rw_output_autoreactive : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_AUTOREACTIVE); - #endif - #if defined FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION - RWTexture2D rw_output_autocomposition : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION); - #endif - #if defined FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR - RWTexture2D rw_output_prev_color_pre_alpha : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR); - #endif - #if defined FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR - RWTexture2D rw_output_prev_color_post_alpha : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR); - #endif - -#if defined(FSR3UPSCALER_BIND_SRV_SCENE_LUMINANCE_MIPS) -FfxFloat32 LoadMipLuma(FfxUInt32x2 iPxPos, FfxUInt32 mipLevel) -{ - return r_imgMips.mips[mipLevel][iPxPos]; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_SCENE_LUMINANCE_MIPS) -FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxUInt32 mipLevel) -{ - return r_imgMips.SampleLevel(s_LinearClamp, fUV, mipLevel); -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH) -FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos) -{ - return r_input_depth[UNITY_FSR3_POS(iPxPos)]; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH) -FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV) -{ - return r_input_depth.SampleLevel(s_LinearClamp, UNITY_FSR3_UV(fUV), 0).x; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_REACTIVE_MASK) -FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos) -{ - return r_reactive_mask[iPxPos]; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) -FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos) -{ - return r_transparency_and_composition_mask[iPxPos]; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_INPUT_COLOR) -FfxFloat32x3 LoadInputColor(FfxUInt32x2 iPxPos) -{ - return r_input_color_jittered[UNITY_FSR3_POS(iPxPos)].rgb; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_INPUT_COLOR) -FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV) -{ - return r_input_color_jittered.SampleLevel(s_LinearClamp, UNITY_FSR3_UV(fUV), 0).rgb; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_PREPARED_INPUT_COLOR) -FfxFloat32x3 LoadPreparedInputColor(FfxUInt32x2 iPxPos) -{ - return r_prepared_input_color[iPxPos].xyz; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS) -FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos) -{ - FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[UNITY_FSR3_POS(iPxDilatedMotionVectorPos)].xy; - - FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale(); - -#if FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS - fUvMotionVector -= MotionVectorJitterCancellation(); -#endif - - return fUvMotionVector; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED) -FfxFloat32x4 LoadHistory(FfxUInt32x2 iPxHistory) -{ - return r_internal_upscaled_color[iPxHistory]; -} -#endif - -#if defined(FSR3UPSCALER_BIND_UAV_LUMA_HISTORY) -void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory) -{ - rw_luma_history[iPxPos] = fLumaHistory; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_LUMA_HISTORY) -FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV) -{ - return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0); -} -#endif - -FfxFloat32x4 LoadRCAS_Input(FfxInt32x2 iPxPos) -{ -#if defined(FSR3UPSCALER_BIND_SRV_RCAS_INPUT) - return r_rcas_input[iPxPos]; -#else - return 0.0; -#endif -} - -#if defined(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED) -void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory) -{ - rw_internal_upscaled_color[iPxHistory] = fHistory; -} -#endif - -#if defined(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED) -void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight) -{ - rw_internal_upscaled_color[iPxPos] = fColorAndWeight; -} -#endif - -#if defined(FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT) -void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) -{ - rw_upscaled_output[UNITY_FSR3_POS(iPxPos)] = FfxFloat32x4(fColor, 1.f); -} -#endif - -//LOCK_LIFETIME_REMAINING == 0 -//Should make LockInitialLifetime() return a const 1.0f later -#if defined(FSR3UPSCALER_BIND_SRV_LOCK_STATUS) -FfxFloat32x2 LoadLockStatus(FfxUInt32x2 iPxPos) -{ - return r_lock_status[iPxPos]; -} -#endif - -#if defined(FSR3UPSCALER_BIND_UAV_LOCK_STATUS) -void StoreLockStatus(FfxUInt32x2 iPxPos, FfxFloat32x2 fLockStatus) -{ - rw_lock_status[iPxPos] = fLockStatus; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_LOCK_INPUT_LUMA) -FfxFloat32 LoadLockInputLuma(FfxUInt32x2 iPxPos) -{ - return r_lock_input_luma[iPxPos]; -} -#endif - -#if defined(FSR3UPSCALER_BIND_UAV_LOCK_INPUT_LUMA) -void StoreLockInputLuma(FfxUInt32x2 iPxPos, FfxFloat32 fLuma) -{ - rw_lock_input_luma[iPxPos] = fLuma; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_NEW_LOCKS) -FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos) -{ - return r_new_locks[iPxPos]; -} -#endif - -#if defined(FSR3UPSCALER_BIND_UAV_NEW_LOCKS) -FfxFloat32 LoadRwNewLocks(FfxUInt32x2 iPxPos) -{ - return rw_new_locks[iPxPos]; -} -#endif - -#if defined(FSR3UPSCALER_BIND_UAV_NEW_LOCKS) -void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock) -{ - rw_new_locks[iPxPos] = newLock; -} -#endif - -#if defined(FSR3UPSCALER_BIND_UAV_PREPARED_INPUT_COLOR) -void StorePreparedInputColor(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped) -{ - rw_prepared_input_color[iPxPos] = fTonemapped; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_PREPARED_INPUT_COLOR) -FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV) -{ - return r_prepared_input_color.SampleLevel(s_LinearClamp, fUV, 0).w; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_LOCK_STATUS) -FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV) -{ - FfxFloat32x2 fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0); - return fLockStatus; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) -FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos) -{ - return asfloat(r_reconstructed_previous_nearest_depth[iPxPos]); -} -#endif - -#if defined(FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) -void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth) -{ - FfxUInt32 uDepth = asuint(fDepth); - - #if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH - InterlockedMax(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); - #else - InterlockedMin(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); // min for standard, max for inverted depth - #endif -} -#endif - -#if defined(FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) -void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue) -{ - rw_reconstructed_previous_nearest_depth[iPxSample] = uValue; -} -#endif - -#if defined(FSR3UPSCALER_BIND_UAV_DILATED_DEPTH) -void StoreDilatedDepth(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth) -{ - rw_dilated_depth[iPxPos] = fDepth; -} -#endif - -#if defined(FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS) -void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector) -{ - rw_dilated_motion_vectors[iPxPos] = fMotionVector; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS) -FfxFloat32x2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput) -{ - return r_dilated_motion_vectors[iPxInput].xy; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) -FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxUInt32x2 iPxInput) -{ - return r_previous_dilated_motion_vectors[iPxInput].xy; -} - -FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 uv) -{ - return r_previous_dilated_motion_vectors.SampleLevel(s_LinearClamp, uv, 0).xy; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_DILATED_DEPTH) -FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput) -{ - return r_dilated_depth[iPxInput]; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE) -FfxFloat32 Exposure() -{ - FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x; - - if (exposure == 0.0f) { - exposure = 1.0f; - } - - return exposure; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_AUTO_EXPOSURE) -FfxFloat32 AutoExposure() -{ - FfxFloat32 exposure = r_auto_exposure[FfxUInt32x2(0, 0)].x; - - if (exposure == 0.0f) { - exposure = 1.0f; - } - - return exposure; -} -#endif - -FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) -{ -#if defined(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT) - return r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x / 2, 0.5f), 0); -#else - return 0.f; -#endif -} - -#if defined(FSR3UPSCALER_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) -FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv) -{ - // Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range. - return FfxFloat32(2.0) * r_upsample_maximum_bias_lut.SampleLevel(s_LinearClamp, abs(uv) * 2.0, 0); -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS) -FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV) -{ - return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0); -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS) -FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos) -{ - return r_dilated_reactive_masks[iPxPos]; -} -#endif - -#if defined(FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS) -void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks) -{ - rw_dilated_reactive_masks[iPxPos] = fDilatedReactiveMasks; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY) -FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) -{ - return r_input_opaque_only[UNITY_FSR3_POS(iPxPos)].xyz; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR) -FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) -{ - return r_input_prev_color_pre_alpha[iPxPos]; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR) -FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) -{ - return r_input_prev_color_post_alpha[iPxPos]; -} -#endif - -#if defined(FSR3UPSCALER_BIND_UAV_AUTOREACTIVE) -#if defined(FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION) -void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive) -{ - rw_output_autoreactive[iPxPos] = fReactive.x; - - rw_output_autocomposition[iPxPos] = fReactive.y; -} -#endif -#endif - -#if defined(FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR) -void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) -{ - rw_output_prev_color_pre_alpha[iPxPos] = color; - -} -#endif - -#if defined(FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR) -void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) -{ - rw_output_prev_color_post_alpha[iPxPos] = color; -} -#endif - -FfxFloat32x2 SPD_LoadExposureBuffer() -{ -#if defined FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE - return rw_auto_exposure[FfxInt32x2(0, 0)]; -#else - return FfxFloat32x2(0.f, 0.f); -#endif // #if defined FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE -} - -void SPD_SetExposureBuffer(FfxFloat32x2 value) -{ -#if defined FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE - rw_auto_exposure[FfxInt32x2(0, 0)] = value; -#endif // #if defined FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE -} - -FfxFloat32x4 SPD_LoadMipmap5(FfxInt32x2 iPxPos) -{ -#if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5 - return FfxFloat32x4(rw_img_mip_5[iPxPos], 0, 0, 0); -#else - return FfxFloat32x4(0.f, 0.f, 0.f, 0.f); -#endif // #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5 -} - -void SPD_SetMipmap(FfxInt32x2 iPxPos, FfxUInt32 slice, FfxFloat32 value) -{ - switch (slice) - { - case FFX_FSR3UPSCALER_SHADING_CHANGE_MIP_LEVEL: -#if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE - rw_img_mip_shading_change[iPxPos] = value; -#endif // #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE - break; - case 5: -#if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5 - rw_img_mip_5[iPxPos] = value; -#endif // #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5 - break; - default: - - // avoid flattened side effect -#if defined(FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) - rw_img_mip_shading_change[iPxPos] = rw_img_mip_shading_change[iPxPos]; -#elif defined(FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5) - rw_img_mip_5[iPxPos] = rw_img_mip_5[iPxPos]; -#endif // #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5 - break; - } -} - -void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter) -{ -#if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC - InterlockedAdd(rw_spd_global_atomic[FfxInt32x2(0, 0)], 1, spdCounter); -#endif // #if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC -} - -void SPD_ResetAtomicCounter() -{ -#if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC - rw_spd_global_atomic[FfxInt32x2(0, 0)] = 0; -#endif // #if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC -} - -#endif // #if defined(FFX_GPU) diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h.meta deleted file mode 100644 index e060d2c..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: ba849fdeb042e7f458c81408414db834 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h deleted file mode 100644 index 1f78a29..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h +++ /dev/null @@ -1,566 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#if !defined(FFX_FSR3UPSCALER_COMMON_H) -#define FFX_FSR3UPSCALER_COMMON_H - -#if defined(FFX_CPU) || defined(FFX_GPU) -//Locks -#define LOCK_LIFETIME_REMAINING 0 -#define LOCK_TEMPORAL_LUMA 1 -#endif // #if defined(FFX_CPU) || defined(FFX_GPU) - -#if defined(FFX_GPU) -FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP16_MIN = 6.10e-05f; -FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP16_MAX = 65504.0f; -FFX_STATIC const FfxFloat32 FSR3UPSCALER_EPSILON = 1e-03f; -FFX_STATIC const FfxFloat32 FSR3UPSCALER_TONEMAP_EPSILON = 1.0f / FSR3UPSCALER_FP16_MAX; -FFX_STATIC const FfxFloat32 FSR3UPSCALER_FLT_MAX = 3.402823466e+38f; -FFX_STATIC const FfxFloat32 FSR3UPSCALER_FLT_MIN = 1.175494351e-38f; - -// treat vector truncation warnings as errors -#pragma warning(error: 3206) - -// suppress warnings -#pragma warning(disable: 3205) // conversion from larger type to smaller -#pragma warning(disable: 3571) // in ffxPow(f, e), f could be negative - -// Reconstructed depth usage -FFX_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = 0.01f; - -// Accumulation -FFX_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 12.0f; -FFX_STATIC const FfxFloat32 fMaxAccumulationLanczosWeight = 1.0f; -FFX_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLanczosWeightScale; // Average lanczos weight for jitter accumulated samples -FFX_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale; - -// Auto exposure -FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f; - -struct AccumulationPassCommonParams -{ - FfxInt32x2 iPxHrPos; - FfxFloat32x2 fHrUv; - FfxFloat32x2 fLrUv_HwSampler; - FfxFloat32x2 fMotionVector; - FfxFloat32x2 fReprojectedHrUv; - FfxFloat32 fHrVelocity; - FfxFloat32 fDepthClipFactor; - FfxFloat32 fDilatedReactiveFactor; - FfxFloat32 fAccumulationMask; - - FfxBoolean bIsResetFrame; - FfxBoolean bIsExistingSample; - FfxBoolean bIsNewSample; -}; - -struct LockState -{ - FfxBoolean NewLock; //Set for both unique new and re-locked new - FfxBoolean WasLockedPrevFrame; //Set to identify if the pixel was already locked (relock) -}; - -void InitializeNewLockSample(FFX_PARAMETER_OUT FfxFloat32x2 fLockStatus) -{ - fLockStatus = FfxFloat32x2(0, 0); -} - -#if FFX_HALF -void InitializeNewLockSample(FFX_PARAMETER_OUT FFX_MIN16_F2 fLockStatus) -{ - fLockStatus = FFX_MIN16_F2(0, 0); -} -#endif - - -void KillLock(FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus) -{ - fLockStatus[LOCK_LIFETIME_REMAINING] = 0; -} - -#if FFX_HALF -void KillLock(FFX_PARAMETER_INOUT FFX_MIN16_F2 fLockStatus) -{ - fLockStatus[LOCK_LIFETIME_REMAINING] = FFX_MIN16_F(0); -} -#endif - -struct RectificationBox -{ - FfxFloat32x3 boxCenter; - FfxFloat32x3 boxVec; - FfxFloat32x3 aabbMin; - FfxFloat32x3 aabbMax; - FfxFloat32 fBoxCenterWeight; -}; -#if FFX_HALF -struct RectificationBoxMin16 -{ - FFX_MIN16_F3 boxCenter; - FFX_MIN16_F3 boxVec; - FFX_MIN16_F3 aabbMin; - FFX_MIN16_F3 aabbMax; - FFX_MIN16_F fBoxCenterWeight; -}; -#endif - -void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBox rectificationBox) -{ - rectificationBox.fBoxCenterWeight = FfxFloat32(0); - - rectificationBox.boxCenter = FfxFloat32x3(0, 0, 0); - rectificationBox.boxVec = FfxFloat32x3(0, 0, 0); - rectificationBox.aabbMin = FfxFloat32x3(FSR3UPSCALER_FLT_MAX, FSR3UPSCALER_FLT_MAX, FSR3UPSCALER_FLT_MAX); - rectificationBox.aabbMax = -FfxFloat32x3(FSR3UPSCALER_FLT_MAX, FSR3UPSCALER_FLT_MAX, FSR3UPSCALER_FLT_MAX); -} -#if FFX_HALF -void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox) -{ - rectificationBox.fBoxCenterWeight = FFX_MIN16_F(0); - - rectificationBox.boxCenter = FFX_MIN16_F3(0, 0, 0); - rectificationBox.boxVec = FFX_MIN16_F3(0, 0, 0); - rectificationBox.aabbMin = FFX_MIN16_F3(FSR3UPSCALER_FP16_MAX, FSR3UPSCALER_FP16_MAX, FSR3UPSCALER_FP16_MAX); - rectificationBox.aabbMax = -FFX_MIN16_F3(FSR3UPSCALER_FP16_MAX, FSR3UPSCALER_FP16_MAX, FSR3UPSCALER_FP16_MAX); -} -#endif - -void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight) -{ - rectificationBox.aabbMin = colorSample; - rectificationBox.aabbMax = colorSample; - - FfxFloat32x3 weightedSample = colorSample * fSampleWeight; - rectificationBox.boxCenter = weightedSample; - rectificationBox.boxVec = colorSample * weightedSample; - rectificationBox.fBoxCenterWeight = fSampleWeight; -} - -void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight) -{ - if (bInitialSample) { - RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight); - } else { - rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample); - rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample); - - FfxFloat32x3 weightedSample = colorSample * fSampleWeight; - rectificationBox.boxCenter += weightedSample; - rectificationBox.boxVec += colorSample * weightedSample; - rectificationBox.fBoxCenterWeight += fSampleWeight; - } -} -#if FFX_HALF -void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight) -{ - rectificationBox.aabbMin = colorSample; - rectificationBox.aabbMax = colorSample; - - FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight; - rectificationBox.boxCenter = weightedSample; - rectificationBox.boxVec = colorSample * weightedSample; - rectificationBox.fBoxCenterWeight = fSampleWeight; -} - -void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight) -{ - if (bInitialSample) { - RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight); - } else { - rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample); - rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample); - - FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight; - rectificationBox.boxCenter += weightedSample; - rectificationBox.boxVec += colorSample * weightedSample; - rectificationBox.fBoxCenterWeight += fSampleWeight; - } -} -#endif - -void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBox rectificationBox) -{ - rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FfxFloat32(FSR3UPSCALER_EPSILON) ? rectificationBox.fBoxCenterWeight : FfxFloat32(1.f)); - rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight; - rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight; - FfxFloat32x3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter)); - rectificationBox.boxVec = stdDev; -} -#if FFX_HALF -void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox) -{ - rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FFX_MIN16_F(FSR3UPSCALER_EPSILON) ? rectificationBox.fBoxCenterWeight : FFX_MIN16_F(1.f)); - rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight; - rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight; - FFX_MIN16_F3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter)); - rectificationBox.boxVec = stdDev; -} -#endif - -FfxFloat32x3 SafeRcp3(FfxFloat32x3 v) -{ - return (all(FFX_NOT_EQUAL(v, FfxFloat32x3(0, 0, 0)))) ? (FfxFloat32x3(1, 1, 1) / v) : FfxFloat32x3(0, 0, 0); -} -#if FFX_HALF -FFX_MIN16_F3 SafeRcp3(FFX_MIN16_F3 v) -{ - return (all(FFX_NOT_EQUAL(v, FFX_MIN16_F3(0, 0, 0)))) ? (FFX_MIN16_F3(1, 1, 1) / v) : FFX_MIN16_F3(0, 0, 0); -} -#endif - -FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1) -{ - const FfxFloat32 m = ffxMax(v0, v1); - return m != 0 ? ffxMin(v0, v1) / m : 0; -} - -#if FFX_HALF -FFX_MIN16_F MinDividedByMax(const FFX_MIN16_F v0, const FFX_MIN16_F v1) -{ - const FFX_MIN16_F m = ffxMax(v0, v1); - return m != FFX_MIN16_F(0) ? ffxMin(v0, v1) / m : FFX_MIN16_F(0); -} -#endif - -FfxFloat32x3 YCoCgToRGB(FfxFloat32x3 fYCoCg) -{ - FfxFloat32x3 fRgb; - - fRgb = FfxFloat32x3( - fYCoCg.x + fYCoCg.y - fYCoCg.z, - fYCoCg.x + fYCoCg.z, - fYCoCg.x - fYCoCg.y - fYCoCg.z); - - return fRgb; -} -#if FFX_HALF -FFX_MIN16_F3 YCoCgToRGB(FFX_MIN16_F3 fYCoCg) -{ - FFX_MIN16_F3 fRgb; - - fRgb = FFX_MIN16_F3( - fYCoCg.x + fYCoCg.y - fYCoCg.z, - fYCoCg.x + fYCoCg.z, - fYCoCg.x - fYCoCg.y - fYCoCg.z); - - return fRgb; -} -#endif - -FfxFloat32x3 RGBToYCoCg(FfxFloat32x3 fRgb) -{ - FfxFloat32x3 fYCoCg; - - fYCoCg = FfxFloat32x3( - 0.25f * fRgb.r + 0.5f * fRgb.g + 0.25f * fRgb.b, - 0.5f * fRgb.r - 0.5f * fRgb.b, - -0.25f * fRgb.r + 0.5f * fRgb.g - 0.25f * fRgb.b); - - return fYCoCg; -} -#if FFX_HALF -FFX_MIN16_F3 RGBToYCoCg(FFX_MIN16_F3 fRgb) -{ - FFX_MIN16_F3 fYCoCg; - - fYCoCg = FFX_MIN16_F3( - 0.25 * fRgb.r + 0.5 * fRgb.g + 0.25 * fRgb.b, - 0.5 * fRgb.r - 0.5 * fRgb.b, - -0.25 * fRgb.r + 0.5 * fRgb.g - 0.25 * fRgb.b); - - return fYCoCg; -} -#endif - -FfxFloat32 RGBToLuma(FfxFloat32x3 fLinearRgb) -{ - return dot(fLinearRgb, FfxFloat32x3(0.2126f, 0.7152f, 0.0722f)); -} -#if FFX_HALF -FFX_MIN16_F RGBToLuma(FFX_MIN16_F3 fLinearRgb) -{ - return dot(fLinearRgb, FFX_MIN16_F3(0.2126f, 0.7152f, 0.0722f)); -} -#endif - -FfxFloat32 RGBToPerceivedLuma(FfxFloat32x3 fLinearRgb) -{ - FfxFloat32 fLuminance = RGBToLuma(fLinearRgb); - - FfxFloat32 fPercievedLuminance = 0; - if (fLuminance <= 216.0f / 24389.0f) { - fPercievedLuminance = fLuminance * (24389.0f / 27.0f); - } - else { - fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f; - } - - return fPercievedLuminance * 0.01f; -} -#if FFX_HALF -FFX_MIN16_F RGBToPerceivedLuma(FFX_MIN16_F3 fLinearRgb) -{ - FFX_MIN16_F fLuminance = RGBToLuma(fLinearRgb); - - FFX_MIN16_F fPercievedLuminance = FFX_MIN16_F(0); - if (fLuminance <= FFX_MIN16_F(216.0f / 24389.0f)) { - fPercievedLuminance = fLuminance * FFX_MIN16_F(24389.0f / 27.0f); - } - else { - fPercievedLuminance = ffxPow(fLuminance, FFX_MIN16_F(1.0f / 3.0f)) * FFX_MIN16_F(116.0f) - FFX_MIN16_F(16.0f); - } - - return fPercievedLuminance * FFX_MIN16_F(0.01f); -} -#endif - -FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb) -{ - return fRgb / (ffxMax(ffxMax(0.f, fRgb.r), ffxMax(fRgb.g, fRgb.b)) + 1.f).xxx; -} - -FfxFloat32x3 InverseTonemap(FfxFloat32x3 fRgb) -{ - return fRgb / ffxMax(FSR3UPSCALER_TONEMAP_EPSILON, 1.f - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx; -} - -#if FFX_HALF -FFX_MIN16_F3 Tonemap(FFX_MIN16_F3 fRgb) -{ - return fRgb / (ffxMax(ffxMax(FFX_MIN16_F(0.f), fRgb.r), ffxMax(fRgb.g, fRgb.b)) + FFX_MIN16_F(1.f)).xxx; -} - -FFX_MIN16_F3 InverseTonemap(FFX_MIN16_F3 fRgb) -{ - return fRgb / ffxMax(FFX_MIN16_F(FSR3UPSCALER_TONEMAP_EPSILON), FFX_MIN16_F(1.f) - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx; -} -#endif - -FfxInt32x2 ClampLoad(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) -{ - FfxInt32x2 result = iPxSample + iPxOffset; - result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x; - result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x; - result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y; - result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y; - return result; - - // return ffxMed3(iPxSample + iPxOffset, FfxInt32x2(0, 0), iTextureSize - FfxInt32x2(1, 1)); -} -#if FFX_HALF -FFX_MIN16_I2 ClampLoad(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize) -{ - FFX_MIN16_I2 result = iPxSample + iPxOffset; - result.x = (iPxOffset.x < 0) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x; - result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x; - result.y = (iPxOffset.y < 0) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y; - result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y; - return result; - - // return ffxMed3Half(iPxSample + iPxOffset, FFX_MIN16_I2(0, 0), iTextureSize - FFX_MIN16_I2(1, 1)); -} -#endif - -FfxFloat32x2 ClampUv(FfxFloat32x2 fUv, FfxInt32x2 iTextureSize, FfxInt32x2 iResourceSize) -{ - const FfxFloat32x2 fSampleLocation = fUv * iTextureSize; - const FfxFloat32x2 fClampedLocation = ffxMax(FfxFloat32x2(0.5f, 0.5f), ffxMin(fSampleLocation, FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f))); - const FfxFloat32x2 fClampedUv = fClampedLocation / FfxFloat32x2(iResourceSize); - - return fClampedUv; -} - -FfxBoolean IsOnScreen(FfxInt32x2 pos, FfxInt32x2 size) -{ - return all(FFX_LESS_THAN(FfxUInt32x2(pos), FfxUInt32x2(size))); -} -#if FFX_HALF -FfxBoolean IsOnScreen(FFX_MIN16_I2 pos, FFX_MIN16_I2 size) -{ - return all(FFX_LESS_THAN(FFX_MIN16_U2(pos), FFX_MIN16_U2(size))); -} -#endif - -FfxFloat32 ComputeAutoExposureFromLavg(FfxFloat32 Lavg) -{ - Lavg = exp(Lavg); - - const FfxFloat32 S = 100.0f; //ISO arithmetic speed - const FfxFloat32 K = 12.5f; - FfxFloat32 ExposureISO100 = log2((Lavg * S) / K); - - const FfxFloat32 q = 0.65f; - FfxFloat32 Lmax = (78.0f / (q * S)) * ffxPow(2.0f, ExposureISO100); - - return 1 / Lmax; -} -#if FFX_HALF -FFX_MIN16_F ComputeAutoExposureFromLavg(FFX_MIN16_F Lavg) -{ - Lavg = exp(Lavg); - - const FFX_MIN16_F S = FFX_MIN16_F(100.0f); //ISO arithmetic speed - const FFX_MIN16_F K = FFX_MIN16_F(12.5f); - const FFX_MIN16_F ExposureISO100 = log2((Lavg * S) / K); - - const FFX_MIN16_F q = FFX_MIN16_F(0.65f); - const FFX_MIN16_F Lmax = (FFX_MIN16_F(78.0f) / (q * S)) * ffxPow(FFX_MIN16_F(2.0f), ExposureISO100); - - return FFX_MIN16_F(1) / Lmax; -} -#endif - -FfxInt32x2 ComputeHrPosFromLrPos(FfxInt32x2 iPxLrPos) -{ - FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter(); - FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * DisplaySize(); - FfxInt32x2 iPxHrPos = FfxInt32x2(floor(fLrPosInHr)); - return iPxHrPos; -} -#if FFX_HALF -FFX_MIN16_I2 ComputeHrPosFromLrPos(FFX_MIN16_I2 iPxLrPos) -{ - FFX_MIN16_F2 fSrcJitteredPos = FFX_MIN16_F2(iPxLrPos) + FFX_MIN16_F(0.5f) - FFX_MIN16_F2(Jitter()); - FFX_MIN16_F2 fLrPosInHr = (fSrcJitteredPos / FFX_MIN16_F2(RenderSize())) * FFX_MIN16_F2(DisplaySize()); - FFX_MIN16_I2 iPxHrPos = FFX_MIN16_I2(floor(fLrPosInHr)); - return iPxHrPos; -} -#endif - -FfxFloat32x2 ComputeNdc(FfxFloat32x2 fPxPos, FfxInt32x2 iSize) -{ - return fPxPos / FfxFloat32x2(iSize) * FfxFloat32x2(2.0f, -2.0f) + FfxFloat32x2(-1.0f, 1.0f); -} - -FfxFloat32 GetViewSpaceDepth(FfxFloat32 fDeviceDepth) -{ - const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors(); - - // fDeviceToViewDepth details found in ffx_fsr3upscaler.cpp - return (fDeviceToViewDepth[1] / (fDeviceDepth - fDeviceToViewDepth[0])); -} - -FfxFloat32 GetViewSpaceDepthInMeters(FfxFloat32 fDeviceDepth) -{ - return GetViewSpaceDepth(fDeviceDepth) * ViewSpaceToMetersFactor(); -} - -FfxFloat32x3 GetViewSpacePosition(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth) -{ - const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors(); - - const FfxFloat32 Z = GetViewSpaceDepth(fDeviceDepth); - - const FfxFloat32x2 fNdcPos = ComputeNdc(iViewportPos, iViewportSize); - const FfxFloat32 X = fDeviceToViewDepth[2] * fNdcPos.x * Z; - const FfxFloat32 Y = fDeviceToViewDepth[3] * fNdcPos.y * Z; - - return FfxFloat32x3(X, Y, Z); -} - -FfxFloat32x3 GetViewSpacePositionInMeters(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth) -{ - return GetViewSpacePosition(iViewportPos, iViewportSize, fDeviceDepth) * ViewSpaceToMetersFactor(); -} - -FfxFloat32 GetMaxDistanceInMeters() -{ -#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH - return GetViewSpaceDepth(0.0f) * ViewSpaceToMetersFactor(); -#else - return GetViewSpaceDepth(1.0f) * ViewSpaceToMetersFactor(); -#endif -} - -FfxFloat32x3 PrepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure, FfxFloat32 fPreExposure) -{ - fRgb /= fPreExposure; - fRgb *= fExposure; - - fRgb = clamp(fRgb, 0.0f, FSR3UPSCALER_FP16_MAX); - - return fRgb; -} - -FfxFloat32x3 UnprepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure) -{ - fRgb /= fExposure; - fRgb *= PreExposure(); - - return fRgb; -} - - -struct BilinearSamplingData -{ - FfxInt32x2 iOffsets[4]; - FfxFloat32 fWeights[4]; - FfxInt32x2 iBasePos; -}; - -BilinearSamplingData GetBilinearSamplingData(FfxFloat32x2 fUv, FfxInt32x2 iSize) -{ - BilinearSamplingData data; - - FfxFloat32x2 fPxSample = (fUv * iSize) - FfxFloat32x2(0.5f, 0.5f); - data.iBasePos = FfxInt32x2(floor(fPxSample)); - FfxFloat32x2 fPxFrac = ffxFract(fPxSample); - - data.iOffsets[0] = FfxInt32x2(0, 0); - data.iOffsets[1] = FfxInt32x2(1, 0); - data.iOffsets[2] = FfxInt32x2(0, 1); - data.iOffsets[3] = FfxInt32x2(1, 1); - - data.fWeights[0] = (1 - fPxFrac.x) * (1 - fPxFrac.y); - data.fWeights[1] = (fPxFrac.x) * (1 - fPxFrac.y); - data.fWeights[2] = (1 - fPxFrac.x) * (fPxFrac.y); - data.fWeights[3] = (fPxFrac.x) * (fPxFrac.y); - - return data; -} - -struct PlaneData -{ - FfxFloat32x3 fNormal; - FfxFloat32 fDistanceFromOrigin; -}; - -PlaneData GetPlaneFromPoints(FfxFloat32x3 fP0, FfxFloat32x3 fP1, FfxFloat32x3 fP2) -{ - PlaneData plane; - - FfxFloat32x3 v0 = fP0 - fP1; - FfxFloat32x3 v1 = fP0 - fP2; - plane.fNormal = normalize(cross(v0, v1)); - plane.fDistanceFromOrigin = -dot(fP0, plane.fNormal); - - return plane; -} - -FfxFloat32 PointToPlaneDistance(PlaneData plane, FfxFloat32x3 fPoint) -{ - return abs(dot(plane.fNormal, fPoint) + plane.fDistanceFromOrigin); -} - -#endif // #if defined(FFX_GPU) - -#endif //!defined(FFX_FSR3UPSCALER_COMMON_H) diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h.meta deleted file mode 100644 index 2ebe2aa..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: 6a638bec681caac4fa8e2ca198726694 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h deleted file mode 100644 index d26cf23..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h +++ /dev/null @@ -1,176 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -FFX_GROUPSHARED FfxUInt32 spdCounter; - -void SpdIncreaseAtomicCounter(FfxUInt32 slice) -{ - SPD_IncreaseAtomicCounter(spdCounter); -} - -FfxUInt32 SpdGetAtomicCounter() -{ - return spdCounter; -} - -void SpdResetAtomicCounter(FfxUInt32 slice) -{ - SPD_ResetAtomicCounter(); -} - -#ifndef SPD_PACKED_ONLY -FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16]; -FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16]; -FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16]; -FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; - -FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice) -{ - FfxFloat32x2 fUv = (tex + 0.5f + Jitter()) / RenderSize(); - fUv = ClampUv(fUv, RenderSize(), InputColorResourceDimensions()); - FfxFloat32x3 fRgb = SampleInputColor(fUv); - - fRgb /= PreExposure(); - - //compute log luma - const FfxFloat32 fLogLuma = log(ffxMax(FSR3UPSCALER_EPSILON, RGBToLuma(fRgb))); - - // Make sure out of screen pixels contribute no value to the end result - const FfxFloat32 result = all(FFX_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f; - - return FfxFloat32x4(result, 0, 0, 0); -} - -FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) -{ - return SPD_LoadMipmap5(tex); -} - -void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice) -{ - if (index == LumaMipLevelToUse() || index == 5) - { - SPD_SetMipmap(pix, index, outValue.r); - } - - if (index == MipCount() - 1) { //accumulate on 1x1 level - - if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0)))) - { - FfxFloat32 prev = SPD_LoadExposureBuffer().y; - FfxFloat32 result = outValue.r; - - if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values - { - FfxFloat32 rate = 1.0f; - result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate)); - } - FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result); - SPD_SetExposureBuffer(spdOutput); - } - } -} - -FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) -{ - return FfxFloat32x4( - spdIntermediateR[x][y], - spdIntermediateG[x][y], - spdIntermediateB[x][y], - spdIntermediateA[x][y]); -} -void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) -{ - spdIntermediateR[x][y] = value.x; - spdIntermediateG[x][y] = value.y; - spdIntermediateB[x][y] = value.z; - spdIntermediateA[x][y] = value.w; -} -FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) -{ - return (v0 + v1 + v2 + v3) * 0.25f; -} -#endif - -// define fetch and store functions Packed -#if FFX_HALF - -FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16]; -FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16]; - -FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice) -{ - return FfxFloat16x4(0, 0, 0, 0); -} - -FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice) -{ - return FfxFloat16x4(0, 0, 0, 0); -} - -void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice) -{ -} - -FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) -{ - return FfxFloat16x4( - spdIntermediateRG[x][y].x, - spdIntermediateRG[x][y].y, - spdIntermediateBA[x][y].x, - spdIntermediateBA[x][y].y); -} - -void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value) -{ - spdIntermediateRG[x][y] = value.xy; - spdIntermediateBA[x][y] = value.zw; -} - -FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) -{ - return (v0 + v1 + v2 + v3) * FfxFloat16(0.25); -} -#endif - -#include "spd/ffx_spd.h" - -void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex) -{ -#if FFX_HALF - SpdDownsampleH( - FfxUInt32x2(WorkGroupId.xy), - FfxUInt32(LocalThreadIndex), - FfxUInt32(MipCount()), - FfxUInt32(NumWorkGroups()), - FfxUInt32(WorkGroupId.z), - FfxUInt32x2(WorkGroupOffset())); -#else - SpdDownsample( - FfxUInt32x2(WorkGroupId.xy), - FfxUInt32(LocalThreadIndex), - FfxUInt32(MipCount()), - FfxUInt32(NumWorkGroups()), - FfxUInt32(WorkGroupId.z), - FfxUInt32x2(WorkGroupOffset())); -#endif -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h.meta deleted file mode 100644 index 9f1d2ab..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: 61bd10363d44ee2478461c9e9efbcb67 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h deleted file mode 100644 index 53763c8..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h +++ /dev/null @@ -1,259 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#ifndef FFX_FSR3UPSCALER_DEPTH_CLIP_H -#define FFX_FSR3UPSCALER_DEPTH_CLIP_H - -FFX_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f; - -FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthSample) -{ - FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(fCurrentDepthSample); - BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize()); - - FfxFloat32 fDilatedSum = 0.0f; - FfxFloat32 fDepth = 0.0f; - FfxFloat32 fWeightSum = 0.0f; - for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { - - const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; - const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset; - - if (IsOnScreen(iSamplePos, RenderSize())) { - const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; - if (fWeight > fReconstructedDepthBilinearWeightThreshold) { - - const FfxFloat32 fPrevDepthSample = LoadReconstructedPrevDepth(iSamplePos); - const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(fPrevDepthSample); - - const FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; - - if (fDepthDiff > 0.0f) { - -#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH - const FfxFloat32 fPlaneDepth = ffxMin(fPrevDepthSample, fCurrentDepthSample); -#else - const FfxFloat32 fPlaneDepth = ffxMax(fPrevDepthSample, fCurrentDepthSample); -#endif - - const FfxFloat32x3 fCenter = GetViewSpacePosition(FfxInt32x2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth); - const FfxFloat32x3 fCorner = GetViewSpacePosition(FfxInt32x2(0, 0), RenderSize(), fPlaneDepth); - - const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize())); - const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); - - const FfxFloat32 Ksep = 1.37e-05f; - const FfxFloat32 Kfov = length(fCorner) / length(fCenter); - const FfxFloat32 fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold; - - const FfxFloat32 fResolutionFactor = ffxSaturate(length(FfxFloat32x2(RenderSize())) / length(FfxFloat32x2(1920.0f, 1080.0f))); - const FfxFloat32 fPower = ffxLerp(1.0f, 3.0f, fResolutionFactor); - fDepth += ffxPow(ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDiff)), fPower) * fWeight; - fWeightSum += fWeight; - } - } - } - } - - return (fWeightSum > 0) ? ffxSaturate(1.0f - fDepth / fWeightSum) : 0.0f; -} - -FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize) -{ - FfxFloat32 minconvergence = 1.0f; - - FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos); - FfxFloat32 fNucleusVelocityLr = length(fMotionVectorNucleus * RenderSize()); - FfxFloat32 fMaxVelocityUv = length(fMotionVectorNucleus); - - const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f; - - if (fNucleusVelocityLr > MotionVectorVelocityEpsilon) { - for (FfxInt32 y = -1; y <= 1; ++y) { - for (FfxInt32 x = -1; x <= 1; ++x) { - - FfxInt32x2 sp = ClampLoad(iPxPos, FfxInt32x2(x, y), iPxInputMotionVectorSize); - - FfxFloat32x2 fMotionVector = LoadInputMotionVector(sp); - FfxFloat32 fVelocityUv = length(fMotionVector); - - fMaxVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv); - fVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv); - minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocityUv, fMotionVectorNucleus / fVelocityUv)); - } - } - } - - return ffxSaturate(1.0f - minconvergence) * ffxSaturate(fMaxVelocityUv / 0.01f); -} - -FfxFloat32 ComputeDepthDivergence(FfxInt32x2 iPxPos) -{ - const FfxFloat32 fMaxDistInMeters = GetMaxDistanceInMeters(); - FfxFloat32 fDepthMax = 0.0f; - FfxFloat32 fDepthMin = fMaxDistInMeters; - - FfxInt32 iMaxDistFound = 0; - - for (FfxInt32 y = -1; y < 2; y++) { - for (FfxInt32 x = -1; x < 2; x++) { - - const FfxInt32x2 iOffset = FfxInt32x2(x, y); - const FfxInt32x2 iSamplePos = iPxPos + iOffset; - - const FfxFloat32 fOnScreenFactor = IsOnScreen(iSamplePos, RenderSize()) ? 1.0f : 0.0f; - FfxFloat32 fDepth = GetViewSpaceDepthInMeters(LoadDilatedDepth(iSamplePos)) * fOnScreenFactor; - - iMaxDistFound |= FfxInt32(fMaxDistInMeters == fDepth); - - fDepthMin = ffxMin(fDepthMin, fDepth); - fDepthMax = ffxMax(fDepthMax, fDepth); - } - } - - return (1.0f - fDepthMin / fDepthMax) * (FfxBoolean(iMaxDistFound) ? 0.0f : 1.0f); -} - -FfxFloat32 ComputeTemporalMotionDivergence(FfxInt32x2 iPxPos) -{ - const FfxFloat32x2 fUv = FfxFloat32x2(iPxPos + 0.5f) / RenderSize(); - - FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos); - FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; - fReprojectedUv = ClampUv(fReprojectedUv, RenderSize(), MaxRenderSize()); - FfxFloat32x2 fPrevMotionVector = SamplePreviousDilatedMotionVector(fReprojectedUv); - - float fPxDistance = length(fMotionVector * DisplaySize()); - return fPxDistance > 1.0f ? ffxLerp(0.0f, 1.0f - ffxSaturate(length(fPrevMotionVector) / length(fMotionVector)), ffxSaturate(ffxPow(fPxDistance / 20.0f, 3.0f))) : 0; -} - -void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence) -{ - // Compensate for bilinear sampling in accumulation pass - - FfxFloat32x3 fReferenceColor = LoadInputColor(iPxLrPos).xyz; - FfxFloat32x2 fReactiveFactor = FfxFloat32x2(0.0f, fMotionDivergence); - - float fMasksSum = 0.0f; - - FfxFloat32x3 fColorSamples[9]; - FfxFloat32 fReactiveSamples[9]; - FfxFloat32 fTransparencyAndCompositionSamples[9]; - - FFX_UNROLL - for (FfxInt32 y = -1; y < 2; y++) { - FFX_UNROLL - for (FfxInt32 x = -1; x < 2; x++) { - - const FfxInt32x2 sampleCoord = ClampLoad(iPxLrPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize())); - - FfxInt32 sampleIdx = (y + 1) * 3 + x + 1; - - FfxFloat32x3 fColorSample = LoadInputColor(sampleCoord).xyz; - FfxFloat32 fReactiveSample = LoadReactiveMask(sampleCoord); - FfxFloat32 fTransparencyAndCompositionSample = LoadTransparencyAndCompositionMask(sampleCoord); - - fColorSamples[sampleIdx] = fColorSample; - fReactiveSamples[sampleIdx] = fReactiveSample; - fTransparencyAndCompositionSamples[sampleIdx] = fTransparencyAndCompositionSample; - - fMasksSum += (fReactiveSample + fTransparencyAndCompositionSample); - } - } - - if (fMasksSum > 0) - { - for (FfxInt32 sampleIdx = 0; sampleIdx < 9; sampleIdx++) - { - FfxFloat32x3 fColorSample = fColorSamples[sampleIdx]; - FfxFloat32 fReactiveSample = fReactiveSamples[sampleIdx]; - FfxFloat32 fTransparencyAndCompositionSample = fTransparencyAndCompositionSamples[sampleIdx]; - - const FfxFloat32 fMaxLenSq = ffxMax(dot(fReferenceColor, fReferenceColor), dot(fColorSample, fColorSample)); - const FfxFloat32 fSimilarity = dot(fReferenceColor, fColorSample) / fMaxLenSq; - - // Increase power for non-similar samples - const FfxFloat32 fPowerBiasMax = 6.0f; - const FfxFloat32 fSimilarityPower = 1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax); - const FfxFloat32 fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower); - const FfxFloat32 fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower); - - fReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32x2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample)); - } - } - - StoreDilatedReactiveMasks(iPxLrPos, fReactiveFactor); -} - -FfxFloat32x3 ComputePreparedInputColor(FfxInt32x2 iPxLrPos) -{ - //We assume linear data. if non-linear input (sRGB, ...), - //then we should convert to linear first and back to sRGB on output. - FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos)); - - fRgb = PrepareRgb(fRgb, Exposure(), PreExposure()); - - const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb); - - return fPreparedYCoCg; -} - -FfxFloat32 EvaluateSurface(FfxInt32x2 iPxPos, FfxFloat32x2 fMotionVector) -{ - FfxFloat32 d0 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, -1))); - FfxFloat32 d1 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 0))); - FfxFloat32 d2 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 1))); - - return 1.0f - FfxFloat32(((d0 - d1) > (d1 * 0.01f)) && ((d1 - d2) > (d2 * 0.01f))); -} - -void DepthClip(FfxInt32x2 iPxPos) -{ - FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize(); - FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos); - - // Discard tiny mvs - fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.01f); - - const FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector; - const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos); - const FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(LoadInputDepth(iPxPos)); - - // Compute prepared input color and depth clip - FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fDilatedDepth) * EvaluateSurface(iPxPos, fMotionVector); - FfxFloat32x3 fPreparedYCoCg = ComputePreparedInputColor(iPxPos); - StorePreparedInputColor(iPxPos, FfxFloat32x4(fPreparedYCoCg, fDepthClip)); - - // Compute dilated reactive mask -#if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS - FfxInt32x2 iSamplePos = iPxPos; -#else - FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos); -#endif - - FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize()); - FfxFloat32 fTemporalMotionDifference = ffxSaturate(ComputeTemporalMotionDivergence(iPxPos) - ComputeDepthDivergence(iPxPos)); - - PreProcessReactiveMasks(iPxPos, ffxMax(fTemporalMotionDifference, fMotionDivergence)); -} - -#endif //!defined( FFX_FSR3UPSCALER_DEPTH_CLIPH ) diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h.meta deleted file mode 100644 index 21fe627..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: 7c662249d70c4434da4f2da00e432c38 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h deleted file mode 100644 index e1a0d06..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h +++ /dev/null @@ -1,116 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#ifndef FFX_FSR3UPSCALER_LOCK_H -#define FFX_FSR3UPSCALER_LOCK_H - -void ClearResourcesForNextFrame(in FfxInt32x2 iPxHrPos) -{ - if (all(FFX_LESS_THAN(iPxHrPos, FfxInt32x2(RenderSize())))) - { -#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH - const FfxUInt32 farZ = 0x0; -#else - const FfxUInt32 farZ = 0x3f800000; -#endif - SetReconstructedDepth(iPxHrPos, farZ); - } -} - -FfxBoolean ComputeThinFeatureConfidence(FfxInt32x2 pos) -{ - const FfxInt32 RADIUS = 1; - - FfxFloat32 fNucleus = LoadLockInputLuma(pos); - - FfxFloat32 similar_threshold = 1.05f; - FfxFloat32 dissimilarLumaMin = FSR3UPSCALER_FLT_MAX; - FfxFloat32 dissimilarLumaMax = 0; - - /* - 0 1 2 - 3 4 5 - 6 7 8 - */ - - #define SETBIT(x) (1U << x) - - FfxUInt32 mask = SETBIT(4); //flag fNucleus as similar - - const FfxUInt32 uNumRejectionMasks = 4; - const FfxUInt32 uRejectionMasks[uNumRejectionMasks] = { - SETBIT(0) | SETBIT(1) | SETBIT(3) | SETBIT(4), //Upper left - SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(5), //Upper right - SETBIT(3) | SETBIT(4) | SETBIT(6) | SETBIT(7), //Lower left - SETBIT(4) | SETBIT(5) | SETBIT(7) | SETBIT(8), //Lower right - }; - - FfxInt32 idx = 0; - FFX_UNROLL - for (FfxInt32 y = -RADIUS; y <= RADIUS; y++) { - FFX_UNROLL - for (FfxInt32 x = -RADIUS; x <= RADIUS; x++, idx++) { - if (x == 0 && y == 0) continue; - - FfxInt32x2 samplePos = ClampLoad(pos, FfxInt32x2(x, y), FfxInt32x2(RenderSize())); - - FfxFloat32 sampleLuma = LoadLockInputLuma(samplePos); - FfxFloat32 difference = ffxMax(sampleLuma, fNucleus) / ffxMin(sampleLuma, fNucleus); - - if (difference > 0 && (difference < similar_threshold)) { - mask |= SETBIT(idx); - } else { - dissimilarLumaMin = ffxMin(dissimilarLumaMin, sampleLuma); - dissimilarLumaMax = ffxMax(dissimilarLumaMax, sampleLuma); - } - } - } - - FfxBoolean isRidge = fNucleus > dissimilarLumaMax || fNucleus < dissimilarLumaMin; - - if (FFX_FALSE == isRidge) { - - return false; - } - - FFX_UNROLL - for (FfxInt32 i = 0; i < 4; i++) { - - if ((mask & uRejectionMasks[i]) == uRejectionMasks[i]) { - return false; - } - } - - return true; -} - -void ComputeLock(FfxInt32x2 iPxLrPos) -{ - if (ComputeThinFeatureConfidence(iPxLrPos)) - { - StoreNewLocks(ComputeHrPosFromLrPos(iPxLrPos), 1.f); - } - - // ClearResourcesForNextFrame(iPxLrPos); -} - -#endif // FFX_FSR3UPSCALER_LOCK_H diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h.meta deleted file mode 100644 index 38a5a57..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: c7e9f53dd040b2645af5ccd936a94b0e -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h deleted file mode 100644 index 3709113..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h +++ /dev/null @@ -1,107 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#ifndef FFX_FSR3UPSCALER_POSTPROCESS_LOCK_STATUS_H -#define FFX_FSR3UPSCALER_POSTPROCESS_LOCK_STATUS_H - -FfxFloat32x4 WrapShadingChangeLuma(FfxInt32x2 iPxSample) -{ - return FfxFloat32x4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0); -} - -#if FFX_HALF -FFX_MIN16_F4 WrapShadingChangeLuma(FFX_MIN16_I2 iPxSample) -{ - return FFX_MIN16_F4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0); -} -#endif - -#if FFX_FSR3UPSCALER_OPTION_POSTPROCESSLOCKSTATUS_SAMPLERS_USE_DATA_HALF && FFX_HALF -DeclareCustomFetchBilinearSamplesMin16(FetchShadingChangeLumaSamples, WrapShadingChangeLuma) -#else -DeclareCustomFetchBicubicSamples(FetchShadingChangeLumaSamples, WrapShadingChangeLuma) -#endif -DeclareCustomTextureSample(ShadingChangeLumaSample, Lanczos2, FetchShadingChangeLumaSamples) - -FfxFloat32 GetShadingChangeLuma(FfxInt32x2 iPxHrPos, FfxFloat32x2 fUvCoord) -{ - FfxFloat32 fShadingChangeLuma = 0; - -#if 0 - fShadingChangeLuma = Exposure() * exp(ShadingChangeLumaSample(fUvCoord, LumaMipDimensions()).x); -#else - - const FfxFloat32 fDiv = FfxFloat32(2u << LumaMipLevelToUse()); - FfxInt32x2 iMipRenderSize = FfxInt32x2(RenderSize() / fDiv); - - fUvCoord = ClampUv(fUvCoord, iMipRenderSize, LumaMipDimensions()); - fShadingChangeLuma = Exposure() * exp(FfxFloat32(SampleMipLuma(fUvCoord, LumaMipLevelToUse()))); -#endif - - fShadingChangeLuma = ffxPow(fShadingChangeLuma, 1.0f / 6.0f); - - return fShadingChangeLuma; -} - -void UpdateLockStatus(AccumulationPassCommonParams params, - FFX_PARAMETER_INOUT FfxFloat32 fReactiveFactor, LockState state, - FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus, - FFX_PARAMETER_OUT FfxFloat32 fLockContributionThisFrame, - FFX_PARAMETER_OUT FfxFloat32 fLuminanceDiff) { - - const FfxFloat32 fShadingChangeLuma = GetShadingChangeLuma(params.iPxHrPos, params.fHrUv); - - //init temporal shading change factor, init to -1 or so in reproject to know if "true new"? - fLockStatus[LOCK_TEMPORAL_LUMA] = (fLockStatus[LOCK_TEMPORAL_LUMA] == FfxFloat32(0.0f)) ? fShadingChangeLuma : fLockStatus[LOCK_TEMPORAL_LUMA]; - - FfxFloat32 fPreviousShadingChangeLuma = fLockStatus[LOCK_TEMPORAL_LUMA]; - - fLuminanceDiff = 1.0f - MinDividedByMax(fPreviousShadingChangeLuma, fShadingChangeLuma); - - if (state.NewLock) { - fLockStatus[LOCK_TEMPORAL_LUMA] = fShadingChangeLuma; - - fLockStatus[LOCK_LIFETIME_REMAINING] = (fLockStatus[LOCK_LIFETIME_REMAINING] != 0.0f) ? 2.0f : 1.0f; - } - else if(fLockStatus[LOCK_LIFETIME_REMAINING] <= 1.0f) { - fLockStatus[LOCK_TEMPORAL_LUMA] = ffxLerp(fLockStatus[LOCK_TEMPORAL_LUMA], FfxFloat32(fShadingChangeLuma), 0.5f); - } - else { - if (fLuminanceDiff > 0.1f) { - KillLock(fLockStatus); - } - } - - fReactiveFactor = ffxMax(fReactiveFactor, ffxSaturate((fLuminanceDiff - 0.1f) * 10.0f)); - fLockStatus[LOCK_LIFETIME_REMAINING] *= (1.0f - fReactiveFactor); - - fLockStatus[LOCK_LIFETIME_REMAINING] *= ffxSaturate(1.0f - params.fAccumulationMask); - fLockStatus[LOCK_LIFETIME_REMAINING] *= FfxFloat32(params.fDepthClipFactor < 0.1f); - - // Compute this frame lock contribution - const FfxFloat32 fLifetimeContribution = ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - 1.0f); - const FfxFloat32 fShadingChangeContribution = ffxSaturate(MinDividedByMax(fLockStatus[LOCK_TEMPORAL_LUMA], fShadingChangeLuma)); - - fLockContributionThisFrame = ffxSaturate(ffxSaturate(fLifetimeContribution * 4.0f) * fShadingChangeContribution); -} - -#endif //!defined( FFX_FSR3UPSCALER_POSTPROCESS_LOCK_STATUS_H ) diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h.meta deleted file mode 100644 index f8b5616..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: 67a8b72ceb93d634f883b086fdccb348 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h deleted file mode 100644 index 77619a5..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h +++ /dev/null @@ -1,67 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#define GROUP_SIZE 8 -#define FSR_RCAS_DENOISE 1 - -#include "ffx_core.h" - -void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor) -{ - StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor); -} - -#define FSR_RCAS_F 1 -FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) -{ - FfxFloat32x4 fColor = LoadRCAS_Input(p); - - fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure()); - - return fColor; -} -void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} - -#include "fsr1/ffx_fsr1.h" - -void CurrFilter(FFX_MIN16_U2 pos) -{ - FfxFloat32x3 c; - FsrRcasF(c.r, c.g, c.b, pos, RCASConfig()); - - c = UnprepareRgb(c, Exposure()); - - WriteUpscaledOutput(pos, c); -} - -void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) -{ - // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. - FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u); - CurrFilter(FFX_MIN16_U2(gxy)); - gxy.x += 8u; - CurrFilter(FFX_MIN16_U2(gxy)); - gxy.y += 8u; - CurrFilter(FFX_MIN16_U2(gxy)); - gxy.x -= 8u; - CurrFilter(FFX_MIN16_U2(gxy)); -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h.meta deleted file mode 100644 index 7a53a1f..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: 692efb7cec0df67408a583a7ff34146a -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h deleted file mode 100644 index a822dfc..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h +++ /dev/null @@ -1,146 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#ifndef FFX_FSR3UPSCALER_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H -#define FFX_FSR3UPSCALER_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H - -void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize) -{ - fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.1f); - - FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize; - FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; - - BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize()); - - // Project current depth into previous frame locations. - // Push to all pixels having some contribution if reprojection is using bilinear logic. - for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { - - const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; - FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; - - if (fWeight > fReconstructedDepthBilinearWeightThreshold) { - - FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset; - if (IsOnScreen(iStorePos, iPxDepthSize)) { - StoreReconstructedDepth(iStorePos, fDepth); - } - } - } -} - -void FindNearestDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxInt32x2 iPxSize, FFX_PARAMETER_OUT FfxFloat32 fNearestDepth, FFX_PARAMETER_OUT FfxInt32x2 fNearestDepthCoord) -{ - const FfxInt32 iSampleCount = 9; - const FfxInt32x2 iSampleOffsets[iSampleCount] = { - FfxInt32x2(+0, +0), - FfxInt32x2(+1, +0), - FfxInt32x2(+0, +1), - FfxInt32x2(+0, -1), - FfxInt32x2(-1, +0), - FfxInt32x2(-1, +1), - FfxInt32x2(+1, +1), - FfxInt32x2(-1, -1), - FfxInt32x2(+1, -1), - }; - - // pull out the depth loads to allow SC to batch them - FfxFloat32 depth[9]; - FfxInt32 iSampleIndex = 0; - FFX_UNROLL - for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) { - - FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; - depth[iSampleIndex] = LoadInputDepth(iPos); - } - - // find closest depth - fNearestDepthCoord = iPxPos; - fNearestDepth = depth[0]; - FFX_UNROLL - for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) { - - FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; - if (IsOnScreen(iPos, iPxSize)) { - - FfxFloat32 fNdDepth = depth[iSampleIndex]; -#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH - if (fNdDepth > fNearestDepth) { -#else - if (fNdDepth < fNearestDepth) { -#endif - fNearestDepthCoord = iPos; - fNearestDepth = fNdDepth; - } - } - } -} - -FfxFloat32 ComputeLockInputLuma(FfxInt32x2 iPxLrPos) -{ - //We assume linear data. if non-linear input (sRGB, ...), - //then we should convert to linear first and back to sRGB on output. - FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos)); - - // Use internal auto exposure for locking logic - fRgb /= PreExposure(); - fRgb *= Exposure(); - -#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT - fRgb = Tonemap(fRgb); -#endif - - //compute luma used to lock pixels, if used elsewhere the ffxPow must be moved! - const FfxFloat32 fLockInputLuma = ffxPow(RGBToPerceivedLuma(fRgb), FfxFloat32(1.0 / 6.0)); - - return fLockInputLuma; -} - -void ReconstructAndDilate(FfxInt32x2 iPxLrPos) -{ - FfxFloat32 fDilatedDepth; - FfxInt32x2 iNearestDepthCoord; - - FindNearestDepth(iPxLrPos, RenderSize(), fDilatedDepth, iNearestDepthCoord); - -#if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS - FfxInt32x2 iSamplePos = iPxLrPos; - FfxInt32x2 iMotionVectorPos = iNearestDepthCoord; -#else - FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxLrPos); - FfxInt32x2 iMotionVectorPos = ComputeHrPosFromLrPos(iNearestDepthCoord); -#endif - - FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iMotionVectorPos); - - StoreDilatedDepth(iPxLrPos, fDilatedDepth); - StoreDilatedMotionVector(iPxLrPos, fDilatedMotionVector); - - ReconstructPrevDepth(iPxLrPos, fDilatedDepth, fDilatedMotionVector, RenderSize()); - - FfxFloat32 fLockInputLuma = ComputeLockInputLuma(iPxLrPos); - StoreLockInputLuma(iPxLrPos, fLockInputLuma); -} - - -#endif //!defined( FFX_FSR3UPSCALER_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H ) diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h.meta deleted file mode 100644 index 78ced0d..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: c8b3854bad30a8b40babc5a9805f294e -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h deleted file mode 100644 index 29b7584..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h +++ /dev/null @@ -1,137 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#ifndef FFX_FSR3UPSCALER_REPROJECT_H -#define FFX_FSR3UPSCALER_REPROJECT_H - -#ifndef FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE -#define FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE 0 // Reference -#endif - -FfxFloat32x4 WrapHistory(FfxInt32x2 iPxSample) -{ - return LoadHistory(iPxSample); -} - -#if FFX_HALF -FFX_MIN16_F4 WrapHistory(FFX_MIN16_I2 iPxSample) -{ - return FFX_MIN16_F4(LoadHistory(iPxSample)); -} -#endif - - -#if FFX_FSR3UPSCALER_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF -DeclareCustomFetchBicubicSamplesMin16(FetchHistorySamples, WrapHistory) -DeclareCustomTextureSampleMin16(HistorySample, FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples) -#else -DeclareCustomFetchBicubicSamples(FetchHistorySamples, WrapHistory) -DeclareCustomTextureSample(HistorySample, FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples) -#endif - -FfxFloat32x4 WrapLockStatus(FfxInt32x2 iPxSample) -{ - FfxFloat32x4 fSample = FfxFloat32x4(LoadLockStatus(iPxSample), 0.0f, 0.0f); - return fSample; -} - -#if FFX_HALF -FFX_MIN16_F4 WrapLockStatus(FFX_MIN16_I2 iPxSample) -{ - FFX_MIN16_F4 fSample = FFX_MIN16_F4(LoadLockStatus(iPxSample), 0.0, 0.0); - - return fSample; -} -#endif - -#if 1 -#if FFX_FSR3UPSCALER_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF -DeclareCustomFetchBilinearSamplesMin16(FetchLockStatusSamples, WrapLockStatus) -DeclareCustomTextureSampleMin16(LockStatusSample, Bilinear, FetchLockStatusSamples) -#else -DeclareCustomFetchBilinearSamples(FetchLockStatusSamples, WrapLockStatus) -DeclareCustomTextureSample(LockStatusSample, Bilinear, FetchLockStatusSamples) -#endif -#else -#if FFX_FSR3UPSCALER_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF -DeclareCustomFetchBicubicSamplesMin16(FetchLockStatusSamples, WrapLockStatus) -DeclareCustomTextureSampleMin16(LockStatusSample, FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples) -#else -DeclareCustomFetchBicubicSamples(FetchLockStatusSamples, WrapLockStatus) -DeclareCustomTextureSample(LockStatusSample, FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples) -#endif -#endif - -FfxFloat32x2 GetMotionVector(FfxInt32x2 iPxHrPos, FfxFloat32x2 fHrUv) -{ -#if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS - FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(FFX_MIN16_I2(fHrUv * RenderSize())); -#else - FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iPxHrPos); -#endif - - return fDilatedMotionVector; -} - -FfxBoolean IsUvInside(FfxFloat32x2 fUv) -{ - return (fUv.x >= 0.0f && fUv.x <= 1.0f) && (fUv.y >= 0.0f && fUv.y <= 1.0f); -} - -void ComputeReprojectedUVs(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxBoolean bIsExistingSample) -{ - fReprojectedHrUv = params.fHrUv + params.fMotionVector; - - bIsExistingSample = IsUvInside(fReprojectedHrUv); -} - -void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x3 fHistoryColor, FFX_PARAMETER_OUT FfxFloat32 fTemporalReactiveFactor, FFX_PARAMETER_OUT FfxBoolean bInMotionLastFrame) -{ - FfxFloat32x4 fHistory = HistorySample(params.fReprojectedHrUv, DisplaySize()); - - fHistoryColor = PrepareRgb(fHistory.rgb, Exposure(), PreviousFramePreExposure()); - - fHistoryColor = RGBToYCoCg(fHistoryColor); - - //Compute temporal reactivity info - fTemporalReactiveFactor = ffxSaturate(abs(fHistory.w)); - bInMotionLastFrame = (fHistory.w < 0.0f); -} - -LockState ReprojectHistoryLockStatus(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedLockStatus) -{ - LockState state = { FFX_FALSE, FFX_FALSE }; - const FfxFloat32 fNewLockIntensity = LoadRwNewLocks(params.iPxHrPos); - state.NewLock = fNewLockIntensity > (127.0f / 255.0f); - - FfxFloat32 fInPlaceLockLifetime = state.NewLock ? fNewLockIntensity : 0; - - fReprojectedLockStatus = SampleLockStatus(params.fReprojectedHrUv); - - if (fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] != FfxFloat32(0.0f)) { - state.WasLockedPrevFrame = true; - } - - return state; -} - -#endif //!defined( FFX_FSR3UPSCALER_REPROJECT_H ) diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h.meta deleted file mode 100644 index ea2e14d..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: 9d893016eebb2564f9a66b80afb0849f -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h deleted file mode 100644 index d98cfcc..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h +++ /dev/null @@ -1,104 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#ifndef FFX_FSR3UPSCALER_RESOURCES_H -#define FFX_FSR3UPSCALER_RESOURCES_H - -#if defined(FFX_CPU) || defined(FFX_GPU) -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL 0 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY 1 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_COLOR 2 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS 3 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_DEPTH 4 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_EXPOSURE 5 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK 6 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK 7 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH 8 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS 9 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH 10 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 11 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LOCK_STATUS 12 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NEW_LOCKS 13 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR 14 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY 15 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 16 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LANCZOS_LUT 17 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT 18 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 19 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RCAS_INPUT 20 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LOCK_STATUS_1 21 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LOCK_STATUS_2 22 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 23 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 24 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY 25 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION 26 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT 27 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS 28 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE 29 // same as FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 29 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_1 30 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_2 31 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_3 32 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 33 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5 34 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_6 35 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_7 36 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_8 37 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_9 38 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_10 39 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_11 40 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12 41 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE 42 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTO_EXPOSURE 43 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOREACTIVE 44 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOCOMPOSITION_DEPRECATED 45 - -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR 46 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR 47 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1 48 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1 49 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2 50 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2 51 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS 52 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 53 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 54 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA 55 - -// Shading change detection mip level setting, value must be in the range [FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0, FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12] -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 -#define FFX_FSR3UPSCALER_SHADING_CHANGE_MIP_LEVEL (FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE - FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE) - -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT 56 - -#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_FSR3UPSCALER 0 -#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_SPD 1 -#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_RCAS 2 -#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE 3 - -#define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_TONEMAP 1 -#define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP 2 -#define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_THRESHOLD 4 -#define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX 8 - -#endif // #if defined(FFX_CPU) || defined(FFX_GPU) - -#endif //!defined( FFX_FSR3UPSCALER_RESOURCES_H ) diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h.meta deleted file mode 100644 index 24cdbd2..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: b5a95a38dcfaf3946a5095bbbc42939a -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h deleted file mode 100644 index d33f70c..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h +++ /dev/null @@ -1,606 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#ifndef FFX_FSR3UPSCALER_SAMPLE_H -#define FFX_FSR3UPSCALER_SAMPLE_H - -// suppress warnings -#ifdef FFX_HLSL -#pragma warning(disable: 4008) // potentially divide by zero -#endif //FFX_HLSL - -struct FetchedBilinearSamples { - - FfxFloat32x4 fColor00; - FfxFloat32x4 fColor10; - - FfxFloat32x4 fColor01; - FfxFloat32x4 fColor11; -}; - -struct FetchedBicubicSamples { - - FfxFloat32x4 fColor00; - FfxFloat32x4 fColor10; - FfxFloat32x4 fColor20; - FfxFloat32x4 fColor30; - - FfxFloat32x4 fColor01; - FfxFloat32x4 fColor11; - FfxFloat32x4 fColor21; - FfxFloat32x4 fColor31; - - FfxFloat32x4 fColor02; - FfxFloat32x4 fColor12; - FfxFloat32x4 fColor22; - FfxFloat32x4 fColor32; - - FfxFloat32x4 fColor03; - FfxFloat32x4 fColor13; - FfxFloat32x4 fColor23; - FfxFloat32x4 fColor33; -}; - -#if FFX_HALF -struct FetchedBilinearSamplesMin16 { - - FFX_MIN16_F4 fColor00; - FFX_MIN16_F4 fColor10; - - FFX_MIN16_F4 fColor01; - FFX_MIN16_F4 fColor11; -}; - -struct FetchedBicubicSamplesMin16 { - - FFX_MIN16_F4 fColor00; - FFX_MIN16_F4 fColor10; - FFX_MIN16_F4 fColor20; - FFX_MIN16_F4 fColor30; - - FFX_MIN16_F4 fColor01; - FFX_MIN16_F4 fColor11; - FFX_MIN16_F4 fColor21; - FFX_MIN16_F4 fColor31; - - FFX_MIN16_F4 fColor02; - FFX_MIN16_F4 fColor12; - FFX_MIN16_F4 fColor22; - FFX_MIN16_F4 fColor32; - - FFX_MIN16_F4 fColor03; - FFX_MIN16_F4 fColor13; - FFX_MIN16_F4 fColor23; - FFX_MIN16_F4 fColor33; -}; -#else //FFX_HALF -#define FetchedBicubicSamplesMin16 FetchedBicubicSamples -#define FetchedBilinearSamplesMin16 FetchedBilinearSamples -#endif //FFX_HALF - -FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t) -{ - return A + (B - A) * t; -} - -FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac) -{ - FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x); - FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x); - FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y); - return fColorXY; -} - -#if FFX_HALF -FFX_MIN16_F4 Linear(FFX_MIN16_F4 A, FFX_MIN16_F4 B, FFX_MIN16_F t) -{ - return A + (B - A) * t; -} - -FFX_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFX_MIN16_F2 fPxFrac) -{ - FFX_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x); - FFX_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x); - FFX_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y); - return fColorXY; -} -#endif - -FfxFloat32 Lanczos2NoClamp(FfxFloat32 x) -{ - const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants - return abs(x) < FSR3UPSCALER_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x)); -} - -FfxFloat32 Lanczos2(FfxFloat32 x) -{ - x = ffxMin(abs(x), 2.0f); - return Lanczos2NoClamp(x); -} - -#if FFX_HALF - -#if 0 -FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x) -{ - const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants - return abs(x) < FFX_MIN16_F(FSR3UPSCALER_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x)); -} -#endif - -FFX_MIN16_F Lanczos2(FFX_MIN16_F x) -{ - x = ffxMin(abs(x), FFX_MIN16_F(2.0f)); - return FFX_MIN16_F(Lanczos2NoClamp(x)); -} -#endif //FFX_HALF - -// FSR1 lanczos approximation. Input is x*x and must be <= 4. -FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2) -{ - FfxFloat32 a = (2.0f / 5.0f) * x2 - 1; - FfxFloat32 b = (1.0f / 4.0f) * x2 - 1; - return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b); -} - -#if FFX_HALF -FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2) -{ - FFX_MIN16_F a = FFX_MIN16_F(2.0f / 5.0f) * x2 - FFX_MIN16_F(1); - FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1); - return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b); -} -#endif //FFX_HALF - -FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2) -{ - x2 = ffxMin(x2, 4.0f); - return Lanczos2ApproxSqNoClamp(x2); -} - -#if FFX_HALF -FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2) -{ - x2 = ffxMin(x2, FFX_MIN16_F(4.0f)); - return Lanczos2ApproxSqNoClamp(x2); -} -#endif //FFX_HALF - -FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x) -{ - return Lanczos2ApproxSqNoClamp(x * x); -} - -#if FFX_HALF -FFX_MIN16_F Lanczos2ApproxNoClamp(FFX_MIN16_F x) -{ - return Lanczos2ApproxSqNoClamp(x * x); -} -#endif //FFX_HALF - -FfxFloat32 Lanczos2Approx(FfxFloat32 x) -{ - return Lanczos2ApproxSq(x * x); -} - -#if FFX_HALF -FFX_MIN16_F Lanczos2Approx(FFX_MIN16_F x) -{ - return Lanczos2ApproxSq(x * x); -} -#endif //FFX_HALF - -FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x) -{ - return SampleLanczos2Weight(abs(x)); -} - -#if FFX_HALF -FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x) -{ - return FFX_MIN16_F(SampleLanczos2Weight(abs(x))); -} -#endif //FFX_HALF - -FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) -{ - FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t); - FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t); - FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t); - FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t); - return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); -} -#if FFX_HALF -FFX_MIN16_F4 Lanczos2_UseLUT(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) -{ - FFX_MIN16_F fWeight0 = Lanczos2_UseLUT(FFX_MIN16_F(-1.f) - t); - FFX_MIN16_F fWeight1 = Lanczos2_UseLUT(FFX_MIN16_F(-0.f) - t); - FFX_MIN16_F fWeight2 = Lanczos2_UseLUT(FFX_MIN16_F(+1.f) - t); - FFX_MIN16_F fWeight3 = Lanczos2_UseLUT(FFX_MIN16_F(+2.f) - t); - return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); -} -#endif - -FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) -{ - FfxFloat32 fWeight0 = Lanczos2(-1.f - t); - FfxFloat32 fWeight1 = Lanczos2(-0.f - t); - FfxFloat32 fWeight2 = Lanczos2(+1.f - t); - FfxFloat32 fWeight3 = Lanczos2(+2.f - t); - return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); -} - -FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) -{ - FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); - FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); - FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); - FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); - FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); - - // Deringing - - // TODO: only use 4 by checking jitter - const FfxInt32 iDeringingSampleCount = 4; - const FfxFloat32x4 fDeringingSamples[4] = { - Samples.fColor11, - Samples.fColor21, - Samples.fColor12, - Samples.fColor22, - }; - - FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; - FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; - - FFX_UNROLL - for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { - - fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); - fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); - } - - fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); - - return fColorXY; -} - -#if FFX_HALF -FFX_MIN16_F4 Lanczos2(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) -{ - FFX_MIN16_F fWeight0 = Lanczos2(FFX_MIN16_F(-1.f) - t); - FFX_MIN16_F fWeight1 = Lanczos2(FFX_MIN16_F(-0.f) - t); - FFX_MIN16_F fWeight2 = Lanczos2(FFX_MIN16_F(+1.f) - t); - FFX_MIN16_F fWeight3 = Lanczos2(FFX_MIN16_F(+2.f) - t); - return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); -} - -FFX_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) -{ - FFX_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); - FFX_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); - FFX_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); - FFX_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); - FFX_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); - - // Deringing - - // TODO: only use 4 by checking jitter - const FfxInt32 iDeringingSampleCount = 4; - const FFX_MIN16_F4 fDeringingSamples[4] = { - Samples.fColor11, - Samples.fColor21, - Samples.fColor12, - Samples.fColor22, - }; - - FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; - FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; - - FFX_UNROLL - for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) - { - fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); - fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); - } - - fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); - - return fColorXY; -} -#endif //FFX_HALF - - -FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) -{ - FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); - FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); - FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); - FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); - FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); - - // Deringing - - // TODO: only use 4 by checking jitter - const FfxInt32 iDeringingSampleCount = 4; - const FfxFloat32x4 fDeringingSamples[4] = { - Samples.fColor11, - Samples.fColor21, - Samples.fColor12, - Samples.fColor22, - }; - - FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; - FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; - - FFX_UNROLL - for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { - - fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); - fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); - } - - fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); - - return fColorXY; -} - -#if FFX_HALF -FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) -{ - FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); - FFX_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); - FFX_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); - FFX_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); - FFX_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); - - // Deringing - - // TODO: only use 4 by checking jitter - const FfxInt32 iDeringingSampleCount = 4; - const FFX_MIN16_F4 fDeringingSamples[4] = { - Samples.fColor11, - Samples.fColor21, - Samples.fColor12, - Samples.fColor22, - }; - - FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; - FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; - - FFX_UNROLL - for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) - { - fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); - fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); - } - - fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); - - return fColorXY; -} -#endif //FFX_HALF - - - -FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) -{ - FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t); - FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t); - FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t); - FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t); - return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); -} - -#if FFX_HALF -FFX_MIN16_F4 Lanczos2Approx(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) -{ - FFX_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-1.f) - t); - FFX_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-0.f) - t); - FFX_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+1.f) - t); - FFX_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+2.f) - t); - return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); -} -#endif //FFX_HALF - -FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) -{ - FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); - FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); - FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); - FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); - FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); - - // Deringing - - // TODO: only use 4 by checking jitter - const FfxInt32 iDeringingSampleCount = 4; - const FfxFloat32x4 fDeringingSamples[4] = { - Samples.fColor11, - Samples.fColor21, - Samples.fColor12, - Samples.fColor22, - }; - - FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; - FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; - - FFX_UNROLL - for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) - { - fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); - fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); - } - - fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); - - return fColorXY; -} - -#if FFX_HALF -FFX_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) -{ - FFX_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); - FFX_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); - FFX_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); - FFX_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); - FFX_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); - - // Deringing - - // TODO: only use 4 by checking jitter - const FfxInt32 iDeringingSampleCount = 4; - const FFX_MIN16_F4 fDeringingSamples[4] = { - Samples.fColor11, - Samples.fColor21, - Samples.fColor12, - Samples.fColor22, - }; - - FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; - FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; - - FFX_UNROLL - for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) - { - fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); - fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); - } - - fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); - - return fColorXY; -} -#endif - -// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant. -FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) -{ - FfxInt32x2 result = iPxSample + iPxOffset; - result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x; - result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x; - result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y; - result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y; - return result; -} -#if FFX_HALF -FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize) -{ - FFX_MIN16_I2 result = iPxSample + iPxOffset; - result.x = (iPxOffset.x < FFX_MIN16_I(0)) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x; - result.x = (iPxOffset.x > FFX_MIN16_I(0)) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x; - result.y = (iPxOffset.y < FFX_MIN16_I(0)) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y; - result.y = (iPxOffset.y > FFX_MIN16_I(0)) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y; - return result; -} -#endif //FFX_HALF - - -#define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture) \ - SampleType Name(AddrType iPxSample, AddrType iTextureSize) \ - { \ - SampleType Samples; \ - \ - Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize))); \ - Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize))); \ - Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize))); \ - Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize))); \ - \ - Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize))); \ - Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \ - Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \ - Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize))); \ - \ - Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize))); \ - Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \ - Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \ - Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize))); \ - \ - Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize))); \ - Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize))); \ - Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize))); \ - Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize))); \ - \ - return Samples; \ - } - -#define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \ - DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture) - -#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \ - DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture) - -#define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture) \ - SampleType Name(AddrType iPxSample, AddrType iTextureSize) \ - { \ - SampleType Samples; \ - Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \ - Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \ - Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \ - Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \ - return Samples; \ - } - -#define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \ - DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture) - -#define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture) \ - DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture) - -// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102) -// is common, so iPxSample can "jitter" -#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \ - FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ - { \ - FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ - /* Clamp base coords */ \ - fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \ - fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \ - /* */ \ - FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ - FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ - FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ - return fColorXY; \ - } - -#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \ - FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ - { \ - FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ - /* Clamp base coords */ \ - fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \ - fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \ - /* */ \ - FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ - FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \ - FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ - return fColorXY; \ - } - -#define FFX_FSR3UPSCALER_CONCAT_ID(x, y) x ## y -#define FFX_FSR3UPSCALER_CONCAT(x, y) FFX_FSR3UPSCALER_CONCAT_ID(x, y) -#define FFX_FSR3UPSCALER_SAMPLER_1D_0 Lanczos2 -#define FFX_FSR3UPSCALER_SAMPLER_1D_1 Lanczos2LUT -#define FFX_FSR3UPSCALER_SAMPLER_1D_2 Lanczos2Approx - -#define FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(x) FFX_FSR3UPSCALER_CONCAT(FFX_FSR3UPSCALER_SAMPLER_1D_, x) - -#endif //!defined( FFX_FSR3UPSCALER_SAMPLE_H ) diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h.meta deleted file mode 100644 index 1a46376..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: dcb900c9deecd06419a8a4c10c305890 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h deleted file mode 100644 index 2d446bb..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h +++ /dev/null @@ -1,250 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#define USE_YCOCG 1 - -#define fAutogenEpsilon 0.01f - -// EXPERIMENTAL - -FFX_MIN16_F ComputeAutoTC_01(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) -{ - FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId); - FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId); - FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx); - FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx); - -#if USE_YCOCG - colorPreAlpha = RGBToYCoCg(colorPreAlpha); - colorPostAlpha = RGBToYCoCg(colorPostAlpha); - colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha); - colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha); -#endif - - FfxFloat32x3 colorDeltaCurr = colorPostAlpha - colorPreAlpha; - FfxFloat32x3 colorDeltaPrev = colorPrevPostAlpha - colorPrevPreAlpha; - bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDeltaCurr), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); - bool hadAlpha = any(FFX_GREATER_THAN(abs(colorDeltaPrev), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); - - FfxFloat32x3 X = colorPreAlpha; - FfxFloat32x3 Y = colorPostAlpha; - FfxFloat32x3 Z = colorPrevPreAlpha; - FfxFloat32x3 W = colorPrevPostAlpha; - - FFX_MIN16_F retVal = FFX_MIN16_F(ffxSaturate(dot(abs(abs(Y - X) - abs(W - Z)), FfxFloat32x3(1, 1, 1)))); - - // cleanup very small values - retVal = (retVal < TcThreshold()) ? FFX_MIN16_F(0.0f) : FFX_MIN16_F(1.f); - - return retVal; -} - -// works ok: thin edges -FFX_MIN16_F ComputeAutoTC_02(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) -{ - FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId); - FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId); - FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx); - FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx); - -#if USE_YCOCG - colorPreAlpha = RGBToYCoCg(colorPreAlpha); - colorPostAlpha = RGBToYCoCg(colorPostAlpha); - colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha); - colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha); -#endif - - FfxFloat32x3 colorDelta = colorPostAlpha - colorPreAlpha; - FfxFloat32x3 colorPrevDelta = colorPrevPostAlpha - colorPrevPreAlpha; - bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); - bool hadAlpha = any(FFX_GREATER_THAN(abs(colorPrevDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); - - FfxFloat32x3 delta = colorPostAlpha - colorPreAlpha; //prev+1*d = post => d = color, alpha = - FfxFloat32x3 deltaPrev = colorPrevPostAlpha - colorPrevPreAlpha; - - FfxFloat32x3 X = colorPrevPreAlpha; - FfxFloat32x3 N = colorPreAlpha - colorPrevPreAlpha; - FfxFloat32x3 YAminusXA = colorPrevPostAlpha - colorPrevPreAlpha; - FfxFloat32x3 NminusNA = colorPostAlpha - colorPrevPostAlpha; - - FfxFloat32x3 A = (hasAlpha || hadAlpha) ? NminusNA / max(FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon), N) : FfxFloat32x3(0, 0, 0); - - FFX_MIN16_F retVal = FFX_MIN16_F( max(max(A.x, A.y), A.z) ); - - // only pixels that have significantly changed in color shuold be considered - retVal = ffxSaturate(retVal * FFX_MIN16_F(length(colorPostAlpha - colorPrevPostAlpha)) ); - - return retVal; -} - -// This function computes the TransparencyAndComposition mask: -// This mask indicates pixels that should discard locks and apply color clamping. -// -// Typically this is the case for translucent pixels (that don't write depth values) or pixels where the correctness of -// the MVs can not be guaranteed (e.g. procedutal movement or vegetation that does not have MVs to reduce the cost during rasterization) -// Also, large changes in color due to changed lighting should be marked to remove locks on pixels with "old" lighting. -// -// This function takes a opaque only and a final texture and uses internal copies of those textures from the last frame. -// The function tries to determine where the color changes between opaque only and final image to determine the pixels that use transparency. -// Also it uses the previous frames and detects where the use of transparency changed to mark those pixels. -// Additionally it marks pixels where the color changed significantly in the opaque only image, e.g. due to lighting or texture animation. -// -// In the final step it stores the current textures in internal textures for the next frame - -FFX_MIN16_F ComputeTransparencyAndComposition(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) -{ - FFX_MIN16_F retVal = ComputeAutoTC_02(uDispatchThreadId, iPrevIdx); - - // [branch] - if (retVal > FFX_MIN16_F(0.01f)) - { - retVal = ComputeAutoTC_01(uDispatchThreadId, iPrevIdx); - } - return retVal; -} - -float computeSolidEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos) -{ - float lum[9]; - int i = 0; - for (int y = -1; y < 2; ++y) - { - for (int x = -1; x < 2; ++x) - { - FfxFloat32x3 curCol = LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb; - FfxFloat32x3 prevCol = LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb; - lum[i++] = length(curCol - prevCol); - } - } - - //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]); - //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]); - - //return sqrt(gradX * gradX + gradY * gradY); - - float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]); - float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]); - - return sqrt(sqrt(gradX * gradY)); -} - -float computeAlphaEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos) -{ - float lum[9]; - int i = 0; - for (int y = -1; y < 2; ++y) - { - for (int x = -1; x < 2; ++x) - { - FfxFloat32x3 curCol = abs(LoadInputColor(curPos + FFX_MIN16_I2(x, y)).rgb - LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb); - FfxFloat32x3 prevCol = abs(LoadPrevPostAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb - LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb); - lum[i++] = length(curCol - prevCol); - } - } - - //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]); - //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]); - - //return sqrt(gradX * gradX + gradY * gradY); - - float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]); - float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]); - - return sqrt(sqrt(gradX * gradY)); -} - -FFX_MIN16_F ComputeAabbOverlap(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) -{ - FFX_MIN16_F retVal = FFX_MIN16_F(0.f); - - FfxFloat32x2 fMotionVector = LoadInputMotionVector(uDispatchThreadId); - FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId); - FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId); - FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx); - FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx); - -#if USE_YCOCG - colorPreAlpha = RGBToYCoCg(colorPreAlpha); - colorPostAlpha = RGBToYCoCg(colorPostAlpha); - colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha); - colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha); -#endif - FfxFloat32x3 minPrev = FFX_MIN16_F3(+1000.f, +1000.f, +1000.f); - FfxFloat32x3 maxPrev = FFX_MIN16_F3(-1000.f, -1000.f, -1000.f); - for (int y = -1; y < 2; ++y) - { - for (int x = -1; x < 2; ++x) - { - FfxFloat32x3 W = LoadPrevPostAlpha(iPrevIdx + FFX_MIN16_I2(x, y)); - -#if USE_YCOCG - W = RGBToYCoCg(W); -#endif - minPrev = min(minPrev, W); - maxPrev = max(maxPrev, W); - } - } - // instead of computing the overlap: simply count how many samples are outside - // set reactive based on that - FFX_MIN16_F count = FFX_MIN16_F(0.f); - for (int y = -1; y < 2; ++y) - { - for (int x = -1; x < 2; ++x) - { - FfxFloat32x3 Y = LoadInputColor(uDispatchThreadId + FFX_MIN16_I2(x, y)); - -#if USE_YCOCG - Y = RGBToYCoCg(Y); -#endif - count += ((Y.x < minPrev.x) || (Y.x > maxPrev.x)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f); - count += ((Y.y < minPrev.y) || (Y.y > maxPrev.y)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f); - count += ((Y.z < minPrev.z) || (Y.z > maxPrev.z)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f); - } - } - retVal = count / FFX_MIN16_F(27.f); - - return retVal; -} - - -// This function computes the Reactive mask: -// We want pixels marked where the alpha portion of the frame changes a lot between neighbours -// Those pixels are expected to change quickly between frames, too. (e.g. small particles, reflections on curved surfaces...) -// As a result history would not be trustworthy. -// On the other hand we don't want pixels marked where pre-alpha has a large differnce, since those would profit from accumulation -// For mirrors we may assume the pre-alpha is pretty uniform color. -// -// This works well generally, but also marks edge pixels -FFX_MIN16_F ComputeReactive(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) -{ - // we only get here if alpha has a significant contribution and has changed since last frame. - FFX_MIN16_F retVal = FFX_MIN16_F(0.f); - - // mark pixels with huge variance in alpha as reactive - FFX_MIN16_F alphaEdge = FFX_MIN16_F(computeAlphaEdge(uDispatchThreadId, iPrevIdx)); - FFX_MIN16_F opaqueEdge = FFX_MIN16_F(computeSolidEdge(uDispatchThreadId, iPrevIdx)); - retVal = ffxSaturate(alphaEdge - opaqueEdge); - - // the above also marks edge pixels due to jitter, so we need to cancel those out - - - return retVal; -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h.meta deleted file mode 100644 index 99d54e7..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: f01d5a8fbd1f34a4ea8d971755a21b6c -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h deleted file mode 100644 index 47e7ccf..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h +++ /dev/null @@ -1,195 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#ifndef FFX_FSR3UPSCALER_UPSAMPLE_H -#define FFX_FSR3UPSCALER_UPSAMPLE_H - -FFX_STATIC const FfxUInt32 iLanczos2SampleCount = 16; - -void Deringing(RectificationBox clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor) -{ - fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); -} -#if FFX_HALF -void Deringing(RectificationBoxMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor) -{ - fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); -} -#endif - -#ifndef FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE -#define FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 2 // Approximate -#endif - -FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight) -{ - FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; -#if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE - FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); -#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT - FfxFloat32 fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); -#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE - FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); -#else -#error "Invalid Lanczos type" -#endif - return fSampleWeight; -} - -#if FFX_HALF -FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F fKernelWeight) -{ - FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; -#if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE - FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); -#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT - FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); -#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE - FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); - - // To Test: Save reciproqual sqrt compute - // FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); -#else -#error "Invalid Lanczos type" -#endif - return fSampleWeight; -} -#endif - -FfxFloat32 ComputeMaxKernelWeight() { - const FfxFloat32 fKernelSizeBias = 1.0f; - - FfxFloat32 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)).x * FfxFloat32(fKernelSizeBias); - - return ffxMin(FfxFloat32(1.99f), fKernelWeight); -} - -FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, - FFX_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor) -{ - #if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF - #include "ffx_fsr3upscaler_force16_begin.h" - #endif - // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly) - FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position - FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); // Source resolution output pixel center position - FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); // TODO: what about weird upscale factors... - - #if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF - #include "ffx_fsr3upscaler_force16_end.h" - #endif - - FfxFloat32x3 fSamples[iLanczos2SampleCount]; - - FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 - - FfxInt32x2 offsetTL; - offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1); - offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? FfxInt32(-2) : FfxInt32(-1); - - //Load samples - // If fSrcUnjitteredPos.y > fSrcOutputPos.y, indicates offsetTL.y = -2, sample offset Y will be [-2, 1], clipbox will be rows [1, 3]. - // Flip row# for sampling offset in this case, so first 0~2 rows in the sampled array can always be used for computing the clipbox. - // This reduces branch or cmove on sampled colors, but moving this overhead to sample position / weight calculation time which apply to less values. - const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y; - const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x; - - FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL); - - FFX_UNROLL - for (FfxInt32 row = 0; row < 3; row++) { - - FFX_UNROLL - for (FfxInt32 col = 0; col < 3; col++) { - FfxInt32 iSampleIndex = col + (row << 2); - - FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); - FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow; - - const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); - - fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord)); - } - } - - FfxFloat32x4 fColorAndWeight = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); - - FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos); - - // Identify how much of each upsampled color to be used for this frame - const FfxFloat32 fKernelReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32(params.bIsNewSample)); - const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight() * (1.0f - fKernelReactiveFactor); - - const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f)); - const FfxFloat32 fKernelBiasFactor = ffxMax(0.0f, ffxMax(0.25f * params.fDepthClipFactor, fKernelReactiveFactor)); - const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor); - - const FfxFloat32 fRectificationCurveBias = ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f)); - - FFX_UNROLL - for (FfxInt32 row = 0; row < 3; row++) { - FFX_UNROLL - for (FfxInt32 col = 0; col < 3; col++) { - FfxInt32 iSampleIndex = col + (row << 2); - - const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); - const FfxFloat32x2 fOffset = fOffsetTL + FfxFloat32x2(sampleColRow); - FfxFloat32x2 fSrcSampleOffset = fBaseSampleOffset + fOffset; - - FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow; - - const FfxFloat32 fOnScreenFactor = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize()))); - FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias)); - - fColorAndWeight += FfxFloat32x4(fSamples[iSampleIndex] * fSampleWeight, fSampleWeight); - - // Update rectification box - { - const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset); - const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq); - - const FfxBoolean bInitialSample = (row == 0) && (col == 0); - RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[iSampleIndex], fBoxSampleWeight); - } - } - } - - RectificationBoxComputeVarianceBoxData(clippingBox); - - fColorAndWeight.w *= FfxFloat32(fColorAndWeight.w > FSR3UPSCALER_EPSILON); - - if (fColorAndWeight.w > FSR3UPSCALER_EPSILON) { - // Normalize for deringing (we need to compare colors) - fColorAndWeight.xyz = fColorAndWeight.xyz / fColorAndWeight.w; - fColorAndWeight.w *= fUpsampleLanczosWeightScale; - - Deringing(clippingBox, fColorAndWeight.xyz); - } - - #if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF - #include "ffx_fsr3upscaler_force16_end.h" - #endif - - return fColorAndWeight; -} - -#endif //!defined( FFX_FSR3UPSCALER_UPSAMPLE_H ) diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h.meta deleted file mode 100644 index e4153a0..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: 3e7832c4a9154414f9eaa125acfe6cd5 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/fsr1.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/fsr1.meta deleted file mode 100644 index 731c94f..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/fsr1.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: 09438bc445e66204f970dc99ca8dae5a -folderAsset: yes -DefaultImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h deleted file mode 100644 index e780995..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h +++ /dev/null @@ -1,1252 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -/// @defgroup FfxGPUFsr1 FidelityFX FSR1 -/// FidelityFX Super Resolution 1 GPU documentation -/// -/// @ingroup FfxGPUEffects - -/// Setup required constant values for EASU (works on CPU or GPU). -/// -/// @param [out] con0 -/// @param [out] con1 -/// @param [out] con2 -/// @param [out] con3 -/// @param [in] inputViewportInPixelsX The rendered image resolution being upscaled in X dimension. -/// @param [in] inputViewportInPixelsY The rendered image resolution being upscaled in Y dimension. -/// @param [in] inputSizeInPixelsX The resolution of the resource containing the input image (useful for dynamic resolution) in X dimension. -/// @param [in] inputSizeInPixelsY The resolution of the resource containing the input image (useful for dynamic resolution) in Y dimension. -/// @param [in] outputSizeInPixelsX The display resolution which the input image gets upscaled to in X dimension. -/// @param [in] outputSizeInPixelsY The display resolution which the input image gets upscaled to in Y dimension. -/// -/// @ingroup FfxGPUFsr1 -FFX_STATIC void ffxFsrPopulateEasuConstants( - FFX_PARAMETER_INOUT FfxUInt32x4 con0, - FFX_PARAMETER_INOUT FfxUInt32x4 con1, - FFX_PARAMETER_INOUT FfxUInt32x4 con2, - FFX_PARAMETER_INOUT FfxUInt32x4 con3, - FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsX, - FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsY, - FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsX, - FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsY, - FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsX, - FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsY) -{ - // Output integer position to a pixel position in viewport. - con0[0] = ffxAsUInt32(inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX)); - con0[1] = ffxAsUInt32(inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY)); - con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5)); - con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5)); - - // Viewport pixel position to normalized image space. - // This is used to get upper-left of 'F' tap. - con1[0] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsX)); - con1[1] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsY)); - - // Centers of gather4, first offset from upper-left of 'F'. - // +---+---+ - // | | | - // +--(0)--+ - // | b | c | - // +---F---+---+---+ - // | e | f | g | h | - // +--(1)--+--(2)--+ - // | i | j | k | l | - // +---+---+---+---+ - // | n | o | - // +--(3)--+ - // | | | - // +---+---+ - con1[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX)); - con1[3] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsY)); - - // These are from (0) instead of 'F'. - con2[0] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsX)); - con2[1] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY)); - con2[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX)); - con2[3] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY)); - con3[0] = ffxAsUInt32(FfxFloat32(0.0) * ffxReciprocal(inputSizeInPixelsX)); - con3[1] = ffxAsUInt32(FfxFloat32(4.0) * ffxReciprocal(inputSizeInPixelsY)); - con3[2] = con3[3] = 0; -} - -/// Setup required constant values for EASU (works on CPU or GPU). -/// -/// @param [out] con0 -/// @param [out] con1 -/// @param [out] con2 -/// @param [out] con3 -/// @param [in] inputViewportInPixelsX The resolution of the input in the X dimension. -/// @param [in] inputViewportInPixelsY The resolution of the input in the Y dimension. -/// @param [in] inputSizeInPixelsX The input size in pixels in the X dimension. -/// @param [in] inputSizeInPixelsY The input size in pixels in the Y dimension. -/// @param [in] outputSizeInPixelsX The output size in pixels in the X dimension. -/// @param [in] outputSizeInPixelsY The output size in pixels in the Y dimension. -/// @param [in] inputOffsetInPixelsX The input image offset in the X dimension into the resource containing it (useful for dynamic resolution). -/// @param [in] inputOffsetInPixelsY The input image offset in the Y dimension into the resource containing it (useful for dynamic resolution). -/// -/// @ingroup FfxGPUFsr1 -FFX_STATIC void ffxFsrPopulateEasuConstantsOffset( - FFX_PARAMETER_INOUT FfxUInt32x4 con0, - FFX_PARAMETER_INOUT FfxUInt32x4 con1, - FFX_PARAMETER_INOUT FfxUInt32x4 con2, - FFX_PARAMETER_INOUT FfxUInt32x4 con3, - FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsX, - FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsY, - FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsX, - FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsY, - FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsX, - FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsY, - FFX_PARAMETER_IN FfxFloat32 inputOffsetInPixelsX, - FFX_PARAMETER_IN FfxFloat32 inputOffsetInPixelsY) -{ - ffxFsrPopulateEasuConstants( - con0, - con1, - con2, - con3, - inputViewportInPixelsX, - inputViewportInPixelsY, - inputSizeInPixelsX, - inputSizeInPixelsY, - outputSizeInPixelsX, - outputSizeInPixelsY); - - // override - con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5) + inputOffsetInPixelsX); - con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5) + inputOffsetInPixelsY); -} - -#if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT) -// Input callback prototypes, need to be implemented by calling shader -FfxFloat32x4 FsrEasuRF(FfxFloat32x2 p); -FfxFloat32x4 FsrEasuGF(FfxFloat32x2 p); -FfxFloat32x4 FsrEasuBF(FfxFloat32x2 p); - -// Filtering for a given tap for the scalar. -void fsrEasuTapFloat( - FFX_PARAMETER_INOUT FfxFloat32x3 accumulatedColor, // Accumulated color, with negative lobe. - FFX_PARAMETER_INOUT FfxFloat32 accumulatedWeight, // Accumulated weight. - FFX_PARAMETER_IN FfxFloat32x2 pixelOffset, // Pixel offset from resolve position to tap. - FFX_PARAMETER_IN FfxFloat32x2 gradientDirection, // Gradient direction. - FFX_PARAMETER_IN FfxFloat32x2 length, // Length. - FFX_PARAMETER_IN FfxFloat32 negativeLobeStrength, // Negative lobe strength. - FFX_PARAMETER_IN FfxFloat32 clippingPoint, // Clipping point. - FFX_PARAMETER_IN FfxFloat32x3 color) // Tap color. -{ - // Rotate offset by direction. - FfxFloat32x2 rotatedOffset; - rotatedOffset.x = (pixelOffset.x * (gradientDirection.x)) + (pixelOffset.y * gradientDirection.y); - rotatedOffset.y = (pixelOffset.x * (-gradientDirection.y)) + (pixelOffset.y * gradientDirection.x); - - // Anisotropy. - rotatedOffset *= length; - - // Compute distance^2. - FfxFloat32 distanceSquared = rotatedOffset.x * rotatedOffset.x + rotatedOffset.y * rotatedOffset.y; - - // Limit to the window as at corner, 2 taps can easily be outside. - distanceSquared = ffxMin(distanceSquared, clippingPoint); - - // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x. - // (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2 - // |_______________________________________| |_______________| - // base window - // The general form of the 'base' is, - // (a*(b*x^2-1)^2-(a-1)) - // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe. - FfxFloat32 weightB = FfxFloat32(2.0 / 5.0) * distanceSquared + FfxFloat32(-1.0); - FfxFloat32 weightA = negativeLobeStrength * distanceSquared + FfxFloat32(-1.0); - weightB *= weightB; - weightA *= weightA; - weightB = FfxFloat32(25.0 / 16.0) * weightB + FfxFloat32(-(25.0 / 16.0 - 1.0)); - FfxFloat32 weight = weightB * weightA; - - // Do weighted average. - accumulatedColor += color * weight; - accumulatedWeight += weight; -} - -// Accumulate direction and length. -void fsrEasuSetFloat( - FFX_PARAMETER_INOUT FfxFloat32x2 direction, - FFX_PARAMETER_INOUT FfxFloat32 length, - FFX_PARAMETER_IN FfxFloat32x2 pp, - FFX_PARAMETER_IN FfxBoolean biS, - FFX_PARAMETER_IN FfxBoolean biT, - FFX_PARAMETER_IN FfxBoolean biU, - FFX_PARAMETER_IN FfxBoolean biV, - FFX_PARAMETER_IN FfxFloat32 lA, - FFX_PARAMETER_IN FfxFloat32 lB, - FFX_PARAMETER_IN FfxFloat32 lC, - FFX_PARAMETER_IN FfxFloat32 lD, - FFX_PARAMETER_IN FfxFloat32 lE) -{ - // Compute bilinear weight, branches factor out as predicates are compiler time immediates. - // s t - // u v - FfxFloat32 weight = FfxFloat32(0.0); - if (biS) - weight = (FfxFloat32(1.0) - pp.x) * (FfxFloat32(1.0) - pp.y); - if (biT) - weight = pp.x * (FfxFloat32(1.0) - pp.y); - if (biU) - weight = (FfxFloat32(1.0) - pp.x) * pp.y; - if (biV) - weight = pp.x * pp.y; - - // Direction is the '+' diff. - // a - // b c d - // e - // Then takes magnitude from abs average of both sides of 'c'. - // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms. - FfxFloat32 dc = lD - lC; - FfxFloat32 cb = lC - lB; - FfxFloat32 lengthX = max(abs(dc), abs(cb)); - lengthX = ffxApproximateReciprocal(lengthX); - FfxFloat32 directionX = lD - lB; - direction.x += directionX * weight; - lengthX = ffxSaturate(abs(directionX) * lengthX); - lengthX *= lengthX; - length += lengthX * weight; - - // Repeat for the y axis. - FfxFloat32 ec = lE - lC; - FfxFloat32 ca = lC - lA; - FfxFloat32 lengthY = max(abs(ec), abs(ca)); - lengthY = ffxApproximateReciprocal(lengthY); - FfxFloat32 directionY = lE - lA; - direction.y += directionY * weight; - lengthY = ffxSaturate(abs(directionY) * lengthY); - lengthY *= lengthY; - length += lengthY * weight; -} - -/// Apply edge-aware spatial upsampling using 32bit floating point precision calculations. -/// -/// @param [out] outPixel The computed color of a pixel. -/// @param [in] integerPosition Integer pixel position within the output. -/// @param [in] con0 The first constant value generated by ffxFsrPopulateEasuConstants. -/// @param [in] con1 The second constant value generated by ffxFsrPopulateEasuConstants. -/// @param [in] con2 The third constant value generated by ffxFsrPopulateEasuConstants. -/// @param [in] con3 The fourth constant value generated by ffxFsrPopulateEasuConstants. -/// -/// @ingroup FSR -void ffxFsrEasuFloat( - FFX_PARAMETER_OUT FfxFloat32x3 pix, - FFX_PARAMETER_IN FfxUInt32x2 ip, - FFX_PARAMETER_IN FfxUInt32x4 con0, - FFX_PARAMETER_IN FfxUInt32x4 con1, - FFX_PARAMETER_IN FfxUInt32x4 con2, - FFX_PARAMETER_IN FfxUInt32x4 con3) -{ - // Get position of 'f'. - FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw); - FfxFloat32x2 fp = floor(pp); - pp -= fp; - - // 12-tap kernel. - // b c - // e f g h - // i j k l - // n o - // Gather 4 ordering. - // a b - // r g - // For packed FP16, need either {rg} or {ab} so using the following setup for gather in all versions, - // a b <- unused (z) - // r g - // a b a b - // r g r g - // a b - // r g <- unused (z) - // Allowing dead-code removal to remove the 'z's. - FfxFloat32x2 p0 = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw); - - // These are from p0 to avoid pulling two constants on pre-Navi hardware. - FfxFloat32x2 p1 = p0 + ffxAsFloat(con2.xy); - FfxFloat32x2 p2 = p0 + ffxAsFloat(con2.zw); - FfxFloat32x2 p3 = p0 + ffxAsFloat(con3.xy); - FfxFloat32x4 bczzR = FsrEasuRF(p0); - FfxFloat32x4 bczzG = FsrEasuGF(p0); - FfxFloat32x4 bczzB = FsrEasuBF(p0); - FfxFloat32x4 ijfeR = FsrEasuRF(p1); - FfxFloat32x4 ijfeG = FsrEasuGF(p1); - FfxFloat32x4 ijfeB = FsrEasuBF(p1); - FfxFloat32x4 klhgR = FsrEasuRF(p2); - FfxFloat32x4 klhgG = FsrEasuGF(p2); - FfxFloat32x4 klhgB = FsrEasuBF(p2); - FfxFloat32x4 zzonR = FsrEasuRF(p3); - FfxFloat32x4 zzonG = FsrEasuGF(p3); - FfxFloat32x4 zzonB = FsrEasuBF(p3); - - // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD). - FfxFloat32x4 bczzL = bczzB * ffxBroadcast4(0.5) + (bczzR * ffxBroadcast4(0.5) + bczzG); - FfxFloat32x4 ijfeL = ijfeB * ffxBroadcast4(0.5) + (ijfeR * ffxBroadcast4(0.5) + ijfeG); - FfxFloat32x4 klhgL = klhgB * ffxBroadcast4(0.5) + (klhgR * ffxBroadcast4(0.5) + klhgG); - FfxFloat32x4 zzonL = zzonB * ffxBroadcast4(0.5) + (zzonR * ffxBroadcast4(0.5) + zzonG); - - // Rename. - FfxFloat32 bL = bczzL.x; - FfxFloat32 cL = bczzL.y; - FfxFloat32 iL = ijfeL.x; - FfxFloat32 jL = ijfeL.y; - FfxFloat32 fL = ijfeL.z; - FfxFloat32 eL = ijfeL.w; - FfxFloat32 kL = klhgL.x; - FfxFloat32 lL = klhgL.y; - FfxFloat32 hL = klhgL.z; - FfxFloat32 gL = klhgL.w; - FfxFloat32 oL = zzonL.z; - FfxFloat32 nL = zzonL.w; - - // Accumulate for bilinear interpolation. - FfxFloat32x2 dir = ffxBroadcast2(0.0); - FfxFloat32 len = FfxFloat32(0.0); - fsrEasuSetFloat(dir, len, pp, FFX_TRUE, FFX_FALSE, FFX_FALSE, FFX_FALSE, bL, eL, fL, gL, jL); - fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_TRUE, FFX_FALSE, FFX_FALSE, cL, fL, gL, hL, kL); - fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_FALSE, FFX_TRUE, FFX_FALSE, fL, iL, jL, kL, nL); - fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_FALSE, FFX_FALSE, FFX_TRUE, gL, jL, kL, lL, oL); - - // Normalize with approximation, and cleanup close to zero. - FfxFloat32x2 dir2 = dir * dir; - FfxFloat32 dirR = dir2.x + dir2.y; - FfxBoolean zro = dirR < FfxFloat32(1.0 / 32768.0); - dirR = ffxApproximateReciprocalSquareRoot(dirR); - dirR = zro ? FfxFloat32(1.0) : dirR; - dir.x = zro ? FfxFloat32(1.0) : dir.x; - dir *= ffxBroadcast2(dirR); - - // Transform from {0 to 2} to {0 to 1} range, and shape with square. - len = len * FfxFloat32(0.5); - len *= len; - - // Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}. - FfxFloat32 stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocal(max(abs(dir.x), abs(dir.y))); - - // Anisotropic length after rotation, - // x := 1.0 lerp to 'stretch' on edges - // y := 1.0 lerp to 2x on edges - FfxFloat32x2 len2 = FfxFloat32x2(FfxFloat32(1.0) + (stretch - FfxFloat32(1.0)) * len, FfxFloat32(1.0) + FfxFloat32(-0.5) * len); - - // Based on the amount of 'edge', - // the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}. - FfxFloat32 lob = FfxFloat32(0.5) + FfxFloat32((1.0 / 4.0 - 0.04) - 0.5) * len; - - // Set distance^2 clipping point to the end of the adjustable window. - FfxFloat32 clp = ffxApproximateReciprocal(lob); - - // Accumulation mixed with min/max of 4 nearest. - // b c - // e f g h - // i j k l - // n o - FfxFloat32x3 min4 = - ffxMin(ffxMin3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)), - FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); - FfxFloat32x3 max4 = - max(ffxMax3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)), FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); - - // Accumulation. - FfxFloat32x3 aC = ffxBroadcast3(0.0); - FfxFloat32 aW = FfxFloat32(0.0); - fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.x, bczzG.x, bczzB.x)); // b - fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.y, bczzG.y, bczzB.y)); // c - fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.x, ijfeG.x, ijfeB.x)); // i - fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)); // j - fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z)); // f - fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.w, ijfeG.w, ijfeB.w)); // e - fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); // k - fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.y, klhgG.y, klhgB.y)); // l - fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.z, klhgG.z, klhgB.z)); // h - fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w)); // g - fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.z, zzonG.z, zzonB.z)); // o - fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.w, zzonG.w, zzonB.w)); // n - - // Normalize and dering. - pix = ffxMin(max4, max(min4, aC * ffxBroadcast3(rcp(aW)))); -} -#endif // #if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT) - -#if defined(FFX_GPU) && FFX_HALF == 1 && defined(FFX_FSR_EASU_HALF) -// Input callback prototypes, need to be implemented by calling shader -FfxFloat16x4 FsrEasuRH(FfxFloat32x2 p); -FfxFloat16x4 FsrEasuGH(FfxFloat32x2 p); -FfxFloat16x4 FsrEasuBH(FfxFloat32x2 p); - -// This runs 2 taps in parallel. -void FsrEasuTapH( - FFX_PARAMETER_INOUT FfxFloat16x2 aCR, - FFX_PARAMETER_INOUT FfxFloat16x2 aCG, - FFX_PARAMETER_INOUT FfxFloat16x2 aCB, - FFX_PARAMETER_INOUT FfxFloat16x2 aW, - FFX_PARAMETER_IN FfxFloat16x2 offX, - FFX_PARAMETER_IN FfxFloat16x2 offY, - FFX_PARAMETER_IN FfxFloat16x2 dir, - FFX_PARAMETER_IN FfxFloat16x2 len, - FFX_PARAMETER_IN FfxFloat16 lob, - FFX_PARAMETER_IN FfxFloat16 clp, - FFX_PARAMETER_IN FfxFloat16x2 cR, - FFX_PARAMETER_IN FfxFloat16x2 cG, - FFX_PARAMETER_IN FfxFloat16x2 cB) -{ - FfxFloat16x2 vX, vY; - vX = offX * dir.xx + offY * dir.yy; - vY = offX * (-dir.yy) + offY * dir.xx; - vX *= len.x; - vY *= len.y; - FfxFloat16x2 d2 = vX * vX + vY * vY; - d2 = min(d2, FFX_BROADCAST_FLOAT16X2(clp)); - FfxFloat16x2 wB = FFX_BROADCAST_FLOAT16X2(2.0 / 5.0) * d2 + FFX_BROADCAST_FLOAT16X2(-1.0); - FfxFloat16x2 wA = FFX_BROADCAST_FLOAT16X2(lob) * d2 + FFX_BROADCAST_FLOAT16X2(-1.0); - wB *= wB; - wA *= wA; - wB = FFX_BROADCAST_FLOAT16X2(25.0 / 16.0) * wB + FFX_BROADCAST_FLOAT16X2(-(25.0 / 16.0 - 1.0)); - FfxFloat16x2 w = wB * wA; - aCR += cR * w; - aCG += cG * w; - aCB += cB * w; - aW += w; -} - -// This runs 2 taps in parallel. -void FsrEasuSetH( - FFX_PARAMETER_INOUT FfxFloat16x2 dirPX, - FFX_PARAMETER_INOUT FfxFloat16x2 dirPY, - FFX_PARAMETER_INOUT FfxFloat16x2 lenP, - FFX_PARAMETER_IN FfxFloat16x2 pp, - FFX_PARAMETER_IN FfxBoolean biST, - FFX_PARAMETER_IN FfxBoolean biUV, - FFX_PARAMETER_IN FfxFloat16x2 lA, - FFX_PARAMETER_IN FfxFloat16x2 lB, - FFX_PARAMETER_IN FfxFloat16x2 lC, - FFX_PARAMETER_IN FfxFloat16x2 lD, - FFX_PARAMETER_IN FfxFloat16x2 lE) -{ - FfxFloat16x2 w = FFX_BROADCAST_FLOAT16X2(0.0); - - if (biST) - w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFX_BROADCAST_FLOAT16X2(FFX_BROADCAST_FLOAT16(1.0) - pp.y); - - if (biUV) - w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFX_BROADCAST_FLOAT16X2(pp.y); - - // ABS is not free in the packed FP16 path. - FfxFloat16x2 dc = lD - lC; - FfxFloat16x2 cb = lC - lB; - FfxFloat16x2 lenX = max(abs(dc), abs(cb)); - lenX = ffxReciprocalHalf(lenX); - - FfxFloat16x2 dirX = lD - lB; - dirPX += dirX * w; - lenX = FfxFloat16x2(ffxSaturate(abs(dirX) * lenX)); - lenX *= lenX; - lenP += lenX * w; - FfxFloat16x2 ec = lE - lC; - FfxFloat16x2 ca = lC - lA; - FfxFloat16x2 lenY = max(abs(ec), abs(ca)); - lenY = ffxReciprocalHalf(lenY); - FfxFloat16x2 dirY = lE - lA; - dirPY += dirY * w; - lenY = FfxFloat16x2(ffxSaturate(abs(dirY) * lenY)); - lenY *= lenY; - lenP += lenY * w; -} - -void FsrEasuH( - FFX_PARAMETER_OUT FfxFloat16x3 pix, - FFX_PARAMETER_IN FfxUInt32x2 ip, - FFX_PARAMETER_IN FfxUInt32x4 con0, - FFX_PARAMETER_IN FfxUInt32x4 con1, - FFX_PARAMETER_IN FfxUInt32x4 con2, - FFX_PARAMETER_IN FfxUInt32x4 con3) -{ - FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw); - FfxFloat32x2 fp = floor(pp); - pp -= fp; - FfxFloat16x2 ppp = FfxFloat16x2(pp); - - FfxFloat32x2 p0 = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw); - FfxFloat32x2 p1 = p0 + ffxAsFloat(con2.xy); - FfxFloat32x2 p2 = p0 + ffxAsFloat(con2.zw); - FfxFloat32x2 p3 = p0 + ffxAsFloat(con3.xy); - FfxFloat16x4 bczzR = FsrEasuRH(p0); - FfxFloat16x4 bczzG = FsrEasuGH(p0); - FfxFloat16x4 bczzB = FsrEasuBH(p0); - FfxFloat16x4 ijfeR = FsrEasuRH(p1); - FfxFloat16x4 ijfeG = FsrEasuGH(p1); - FfxFloat16x4 ijfeB = FsrEasuBH(p1); - FfxFloat16x4 klhgR = FsrEasuRH(p2); - FfxFloat16x4 klhgG = FsrEasuGH(p2); - FfxFloat16x4 klhgB = FsrEasuBH(p2); - FfxFloat16x4 zzonR = FsrEasuRH(p3); - FfxFloat16x4 zzonG = FsrEasuGH(p3); - FfxFloat16x4 zzonB = FsrEasuBH(p3); - - FfxFloat16x4 bczzL = bczzB * FFX_BROADCAST_FLOAT16X4(0.5) + (bczzR * FFX_BROADCAST_FLOAT16X4(0.5) + bczzG); - FfxFloat16x4 ijfeL = ijfeB * FFX_BROADCAST_FLOAT16X4(0.5) + (ijfeR * FFX_BROADCAST_FLOAT16X4(0.5) + ijfeG); - FfxFloat16x4 klhgL = klhgB * FFX_BROADCAST_FLOAT16X4(0.5) + (klhgR * FFX_BROADCAST_FLOAT16X4(0.5) + klhgG); - FfxFloat16x4 zzonL = zzonB * FFX_BROADCAST_FLOAT16X4(0.5) + (zzonR * FFX_BROADCAST_FLOAT16X4(0.5) + zzonG); - FfxFloat16 bL = bczzL.x; - FfxFloat16 cL = bczzL.y; - FfxFloat16 iL = ijfeL.x; - FfxFloat16 jL = ijfeL.y; - FfxFloat16 fL = ijfeL.z; - FfxFloat16 eL = ijfeL.w; - FfxFloat16 kL = klhgL.x; - FfxFloat16 lL = klhgL.y; - FfxFloat16 hL = klhgL.z; - FfxFloat16 gL = klhgL.w; - FfxFloat16 oL = zzonL.z; - FfxFloat16 nL = zzonL.w; - - // This part is different, accumulating 2 taps in parallel. - FfxFloat16x2 dirPX = FFX_BROADCAST_FLOAT16X2(0.0); - FfxFloat16x2 dirPY = FFX_BROADCAST_FLOAT16X2(0.0); - FfxFloat16x2 lenP = FFX_BROADCAST_FLOAT16X2(0.0); - FsrEasuSetH(dirPX, - dirPY, - lenP, - ppp, - FfxBoolean(true), - FfxBoolean(false), - FfxFloat16x2(bL, cL), - FfxFloat16x2(eL, fL), - FfxFloat16x2(fL, gL), - FfxFloat16x2(gL, hL), - FfxFloat16x2(jL, kL)); - FsrEasuSetH(dirPX, - dirPY, - lenP, - ppp, - FfxBoolean(false), - FfxBoolean(true), - FfxFloat16x2(fL, gL), - FfxFloat16x2(iL, jL), - FfxFloat16x2(jL, kL), - FfxFloat16x2(kL, lL), - FfxFloat16x2(nL, oL)); - FfxFloat16x2 dir = FfxFloat16x2(dirPX.r + dirPX.g, dirPY.r + dirPY.g); - FfxFloat16 len = lenP.r + lenP.g; - - FfxFloat16x2 dir2 = dir * dir; - FfxFloat16 dirR = dir2.x + dir2.y; - FfxUInt32 zro = FfxUInt32(dirR < FFX_BROADCAST_FLOAT16(1.0 / 32768.0)); - dirR = ffxApproximateReciprocalSquareRootHalf(dirR); - dirR = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dirR; - dir.x = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dir.x; - dir *= FFX_BROADCAST_FLOAT16X2(dirR); - len = len * FFX_BROADCAST_FLOAT16(0.5); - len *= len; - FfxFloat16 stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocalHalf(max(abs(dir.x), abs(dir.y))); - FfxFloat16x2 len2 = - FfxFloat16x2(FFX_BROADCAST_FLOAT16(1.0) + (stretch - FFX_BROADCAST_FLOAT16(1.0)) * len, FFX_BROADCAST_FLOAT16(1.0) + FFX_BROADCAST_FLOAT16(-0.5) * len); - FfxFloat16 lob = FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16((1.0 / 4.0 - 0.04) - 0.5) * len; - FfxFloat16 clp = ffxApproximateReciprocalHalf(lob); - - // FP16 is different, using packed trick to do min and max in same operation. - FfxFloat16x2 bothR = - max(max(FfxFloat16x2(-ijfeR.z, ijfeR.z), FfxFloat16x2(-klhgR.w, klhgR.w)), max(FfxFloat16x2(-ijfeR.y, ijfeR.y), FfxFloat16x2(-klhgR.x, klhgR.x))); - FfxFloat16x2 bothG = - max(max(FfxFloat16x2(-ijfeG.z, ijfeG.z), FfxFloat16x2(-klhgG.w, klhgG.w)), max(FfxFloat16x2(-ijfeG.y, ijfeG.y), FfxFloat16x2(-klhgG.x, klhgG.x))); - FfxFloat16x2 bothB = - max(max(FfxFloat16x2(-ijfeB.z, ijfeB.z), FfxFloat16x2(-klhgB.w, klhgB.w)), max(FfxFloat16x2(-ijfeB.y, ijfeB.y), FfxFloat16x2(-klhgB.x, klhgB.x))); - - // This part is different for FP16, working pairs of taps at a time. - FfxFloat16x2 pR = FFX_BROADCAST_FLOAT16X2(0.0); - FfxFloat16x2 pG = FFX_BROADCAST_FLOAT16X2(0.0); - FfxFloat16x2 pB = FFX_BROADCAST_FLOAT16X2(0.0); - FfxFloat16x2 pW = FFX_BROADCAST_FLOAT16X2(0.0); - FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, 1.0) - ppp.xx, FfxFloat16x2(-1.0, -1.0) - ppp.yy, dir, len2, lob, clp, bczzR.xy, bczzG.xy, bczzB.xy); - FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(-1.0, 0.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, ijfeR.xy, ijfeG.xy, ijfeB.xy); - FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, -1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, ijfeR.zw, ijfeG.zw, ijfeB.zw); - FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 2.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, klhgR.xy, klhgG.xy, klhgB.xy); - FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(2.0, 1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, klhgR.zw, klhgG.zw, klhgB.zw); - FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 0.0) - ppp.xx, FfxFloat16x2(2.0, 2.0) - ppp.yy, dir, len2, lob, clp, zzonR.zw, zzonG.zw, zzonB.zw); - FfxFloat16x3 aC = FfxFloat16x3(pR.x + pR.y, pG.x + pG.y, pB.x + pB.y); - FfxFloat16 aW = pW.x + pW.y; - - // Slightly different for FP16 version due to combined min and max. - pix = min(FfxFloat16x3(bothR.y, bothG.y, bothB.y), max(-FfxFloat16x3(bothR.x, bothG.x, bothB.x), aC * FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(aW)))); -} -#endif // #if defined(FFX_GPU) && defined(FFX_HALF) && defined(FFX_FSR_EASU_HALF) - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// FSR - [RCAS] ROBUST CONTRAST ADAPTIVE SHARPENING -// -//------------------------------------------------------------------------------------------------------------------------------ -// CAS uses a simplified mechanism to convert local contrast into a variable amount of sharpness. -// RCAS uses a more exact mechanism, solving for the maximum local sharpness possible before clipping. -// RCAS also has a built in process to limit sharpening of what it detects as possible noise. -// RCAS sharper does not support scaling, as it should be applied after EASU scaling. -// Pass EASU output straight into RCAS, no color conversions necessary. -//------------------------------------------------------------------------------------------------------------------------------ -// RCAS is based on the following logic. -// RCAS uses a 5 tap filter in a cross pattern (same as CAS), -// w n -// w 1 w for taps w m e -// w s -// Where 'w' is the negative lobe weight. -// output = (w*(n+e+w+s)+m)/(4*w+1) -// RCAS solves for 'w' by seeing where the signal might clip out of the {0 to 1} input range, -// 0 == (w*(n+e+w+s)+m)/(4*w+1) -> w = -m/(n+e+w+s) -// 1 == (w*(n+e+w+s)+m)/(4*w+1) -> w = (1-m)/(n+e+w+s-4*1) -// Then chooses the 'w' which results in no clipping, limits 'w', and multiplies by the 'sharp' amount. -// This solution above has issues with MSAA input as the steps along the gradient cause edge detection issues. -// So RCAS uses 4x the maximum and 4x the minimum (depending on equation)in place of the individual taps. -// As well as switching from 'm' to either the minimum or maximum (depending on side), to help in energy conservation. -// This stabilizes RCAS. -// RCAS does a simple highpass which is normalized against the local contrast then shaped, -// 0.25 -// 0.25 -1 0.25 -// 0.25 -// This is used as a noise detection filter, to reduce the effect of RCAS on grain, and focus on real edges. -// -// GLSL example for the required callbacks : -// -// FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p){return FfxFloat16x4(imageLoad(imgSrc,FfxInt32x2(p)));} -// void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b) -// { -// //do any simple input color conversions here or leave empty if none needed -// } -// -// FsrRcasCon need to be called from the CPU or GPU to set up constants. -// Including a GPU example here, the 'con' value would be stored out to a constant buffer. -// -// FfxUInt32x4 con; -// FsrRcasCon(con, -// 0.0); // The scale is {0.0 := maximum sharpness, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. -// --------------- -// RCAS sharpening supports a CAS-like pass-through alpha via, -// #define FSR_RCAS_PASSTHROUGH_ALPHA 1 -// RCAS also supports a define to enable a more expensive path to avoid some sharpening of noise. -// Would suggest it is better to apply film grain after RCAS sharpening (and after scaling) instead of using this define, -// #define FSR_RCAS_DENOISE 1 -//============================================================================================================================== -// This is set at the limit of providing unnatural results for sharpening. -#define FSR_RCAS_LIMIT (0.25-(1.0/16.0)) -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// CONSTANT SETUP -//============================================================================================================================== -// Call to setup required constant values (works on CPU or GPU). - FFX_STATIC void FsrRcasCon(FfxUInt32x4 con, - // The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. - FfxFloat32 sharpness) - { - // Transform from stops to linear value. - sharpness = exp2(-sharpness); - FfxFloat32x2 hSharp = {sharpness, sharpness}; - con[0] = ffxAsUInt32(sharpness); - con[1] = packHalf2x16(hSharp); - con[2] = 0; - con[3] = 0; - } - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// NON-PACKED 32-BIT VERSION -//============================================================================================================================== -#if defined(FFX_GPU)&&defined(FSR_RCAS_F) - // Input callback prototypes that need to be implemented by calling shader - FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p); - void FsrRcasInputF(inout FfxFloat32 r,inout FfxFloat32 g,inout FfxFloat32 b); -//------------------------------------------------------------------------------------------------------------------------------ - void FsrRcasF(out FfxFloat32 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. - out FfxFloat32 pixG, - out FfxFloat32 pixB, -#ifdef FSR_RCAS_PASSTHROUGH_ALPHA - out FfxFloat32 pixA, -#endif - FfxUInt32x2 ip, // Integer pixel position in output. - FfxUInt32x4 con) - { // Constant generated by RcasSetup(). - // Algorithm uses minimal 3x3 pixel neighborhood. - // b - // d e f - // h - FfxInt32x2 sp = FfxInt32x2(ip); - FfxFloat32x3 b = FsrRcasLoadF(sp + FfxInt32x2(0, -1)).rgb; - FfxFloat32x3 d = FsrRcasLoadF(sp + FfxInt32x2(-1, 0)).rgb; -#ifdef FSR_RCAS_PASSTHROUGH_ALPHA - FfxFloat32x4 ee = FsrRcasLoadF(sp); - FfxFloat32x3 e = ee.rgb; - pixA = ee.a; -#else - FfxFloat32x3 e = FsrRcasLoadF(sp).rgb; -#endif - FfxFloat32x3 f = FsrRcasLoadF(sp + FfxInt32x2(1, 0)).rgb; - FfxFloat32x3 h = FsrRcasLoadF(sp + FfxInt32x2(0, 1)).rgb; - // Rename (32-bit) or regroup (16-bit). - FfxFloat32 bR = b.r; - FfxFloat32 bG = b.g; - FfxFloat32 bB = b.b; - FfxFloat32 dR = d.r; - FfxFloat32 dG = d.g; - FfxFloat32 dB = d.b; - FfxFloat32 eR = e.r; - FfxFloat32 eG = e.g; - FfxFloat32 eB = e.b; - FfxFloat32 fR = f.r; - FfxFloat32 fG = f.g; - FfxFloat32 fB = f.b; - FfxFloat32 hR = h.r; - FfxFloat32 hG = h.g; - FfxFloat32 hB = h.b; - // Run optional input transform. - FsrRcasInputF(bR, bG, bB); - FsrRcasInputF(dR, dG, dB); - FsrRcasInputF(eR, eG, eB); - FsrRcasInputF(fR, fG, fB); - FsrRcasInputF(hR, hG, hB); - // Luma times 2. - FfxFloat32 bL = bB * FfxFloat32(0.5) + (bR * FfxFloat32(0.5) + bG); - FfxFloat32 dL = dB * FfxFloat32(0.5) + (dR * FfxFloat32(0.5) + dG); - FfxFloat32 eL = eB * FfxFloat32(0.5) + (eR * FfxFloat32(0.5) + eG); - FfxFloat32 fL = fB * FfxFloat32(0.5) + (fR * FfxFloat32(0.5) + fG); - FfxFloat32 hL = hB * FfxFloat32(0.5) + (hR * FfxFloat32(0.5) + hG); - // Noise detection. - FfxFloat32 nz = FfxFloat32(0.25) * bL + FfxFloat32(0.25) * dL + FfxFloat32(0.25) * fL + FfxFloat32(0.25) * hL - eL; - nz = ffxSaturate(abs(nz) * ffxApproximateReciprocalMedium(ffxMax3(ffxMax3(bL, dL, eL), fL, hL) - ffxMin3(ffxMin3(bL, dL, eL), fL, hL))); - nz = FfxFloat32(-0.5) * nz + FfxFloat32(1.0); - // Min and max of ring. - FfxFloat32 mn4R = ffxMin(ffxMin3(bR, dR, fR), hR); - FfxFloat32 mn4G = ffxMin(ffxMin3(bG, dG, fG), hG); - FfxFloat32 mn4B = ffxMin(ffxMin3(bB, dB, fB), hB); - FfxFloat32 mx4R = max(ffxMax3(bR, dR, fR), hR); - FfxFloat32 mx4G = max(ffxMax3(bG, dG, fG), hG); - FfxFloat32 mx4B = max(ffxMax3(bB, dB, fB), hB); - // Immediate constants for peak range. - FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0); - // Limiters, these need to be high precision RCPs. - FfxFloat32 hitMinR = mn4R * rcp(FfxFloat32(4.0) * mx4R); - FfxFloat32 hitMinG = mn4G * rcp(FfxFloat32(4.0) * mx4G); - FfxFloat32 hitMinB = mn4B * rcp(FfxFloat32(4.0) * mx4B); - FfxFloat32 hitMaxR = (peakC.x - mx4R) * rcp(FfxFloat32(4.0) * mn4R + peakC.y); - FfxFloat32 hitMaxG = (peakC.x - mx4G) * rcp(FfxFloat32(4.0) * mn4G + peakC.y); - FfxFloat32 hitMaxB = (peakC.x - mx4B) * rcp(FfxFloat32(4.0) * mn4B + peakC.y); - FfxFloat32 lobeR = max(-hitMinR, hitMaxR); - FfxFloat32 lobeG = max(-hitMinG, hitMaxG); - FfxFloat32 lobeB = max(-hitMinB, hitMaxB); - FfxFloat32 lobe = max(FfxFloat32(-FSR_RCAS_LIMIT), ffxMin(ffxMax3(lobeR, lobeG, lobeB), FfxFloat32(0.0))) * ffxAsFloat - (con.x); - // Apply noise removal. -#ifdef FSR_RCAS_DENOISE - lobe *= nz; -#endif - // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. - FfxFloat32 rcpL = ffxApproximateReciprocalMedium(FfxFloat32(4.0) * lobe + FfxFloat32(1.0)); - pixR = (lobe * bR + lobe * dR + lobe * hR + lobe * fR + eR) * rcpL; - pixG = (lobe * bG + lobe * dG + lobe * hG + lobe * fG + eG) * rcpL; - pixB = (lobe * bB + lobe * dB + lobe * hB + lobe * fB + eB) * rcpL; - return; - } -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// NON-PACKED 16-BIT VERSION -//============================================================================================================================== -#if defined(FFX_GPU) && FFX_HALF == 1 && defined(FSR_RCAS_H) - // Input callback prototypes that need to be implemented by calling shader - FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p); - void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b); -//------------------------------------------------------------------------------------------------------------------------------ - void FsrRcasH( - out FfxFloat16 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. - out FfxFloat16 pixG, - out FfxFloat16 pixB, - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - out FfxFloat16 pixA, - #endif - FfxUInt32x2 ip, // Integer pixel position in output. - FfxUInt32x4 con){ // Constant generated by RcasSetup(). - // Sharpening algorithm uses minimal 3x3 pixel neighborhood. - // b - // d e f - // h - FfxInt16x2 sp=FfxInt16x2(ip); - FfxFloat16x3 b=FsrRcasLoadH(sp+FfxInt16x2( 0,-1)).rgb; - FfxFloat16x3 d=FsrRcasLoadH(sp+FfxInt16x2(-1, 0)).rgb; - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - FfxFloat16x4 ee=FsrRcasLoadH(sp); - FfxFloat16x3 e=ee.rgb;pixA=ee.a; - #else - FfxFloat16x3 e=FsrRcasLoadH(sp).rgb; - #endif - FfxFloat16x3 f=FsrRcasLoadH(sp+FfxInt16x2( 1, 0)).rgb; - FfxFloat16x3 h=FsrRcasLoadH(sp+FfxInt16x2( 0, 1)).rgb; - // Rename (32-bit) or regroup (16-bit). - FfxFloat16 bR=b.r; - FfxFloat16 bG=b.g; - FfxFloat16 bB=b.b; - FfxFloat16 dR=d.r; - FfxFloat16 dG=d.g; - FfxFloat16 dB=d.b; - FfxFloat16 eR=e.r; - FfxFloat16 eG=e.g; - FfxFloat16 eB=e.b; - FfxFloat16 fR=f.r; - FfxFloat16 fG=f.g; - FfxFloat16 fB=f.b; - FfxFloat16 hR=h.r; - FfxFloat16 hG=h.g; - FfxFloat16 hB=h.b; - // Run optional input transform. - FsrRcasInputH(bR,bG,bB); - FsrRcasInputH(dR,dG,dB); - FsrRcasInputH(eR,eG,eB); - FsrRcasInputH(fR,fG,fB); - FsrRcasInputH(hR,hG,hB); - // Luma times 2. - FfxFloat16 bL=bB*FFX_BROADCAST_FLOAT16(0.5)+(bR*FFX_BROADCAST_FLOAT16(0.5)+bG); - FfxFloat16 dL=dB*FFX_BROADCAST_FLOAT16(0.5)+(dR*FFX_BROADCAST_FLOAT16(0.5)+dG); - FfxFloat16 eL=eB*FFX_BROADCAST_FLOAT16(0.5)+(eR*FFX_BROADCAST_FLOAT16(0.5)+eG); - FfxFloat16 fL=fB*FFX_BROADCAST_FLOAT16(0.5)+(fR*FFX_BROADCAST_FLOAT16(0.5)+fG); - FfxFloat16 hL=hB*FFX_BROADCAST_FLOAT16(0.5)+(hR*FFX_BROADCAST_FLOAT16(0.5)+hG); - // Noise detection. - FfxFloat16 nz=FFX_BROADCAST_FLOAT16(0.25)*bL+FFX_BROADCAST_FLOAT16(0.25)*dL+FFX_BROADCAST_FLOAT16(0.25)*fL+FFX_BROADCAST_FLOAT16(0.25)*hL-eL; - nz=FfxFloat16(ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL)))); - nz=FFX_BROADCAST_FLOAT16(-0.5)*nz+FFX_BROADCAST_FLOAT16(1.0); - // Min and max of ring. - FfxFloat16 mn4R=min(ffxMin3Half(bR,dR,fR),hR); - FfxFloat16 mn4G=min(ffxMin3Half(bG,dG,fG),hG); - FfxFloat16 mn4B=min(ffxMin3Half(bB,dB,fB),hB); - FfxFloat16 mx4R=max(ffxMax3Half(bR,dR,fR),hR); - FfxFloat16 mx4G=max(ffxMax3Half(bG,dG,fG),hG); - FfxFloat16 mx4B=max(ffxMax3Half(bB,dB,fB),hB); - // Immediate constants for peak range. - FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); - // Limiters, these need to be high precision RCPs. - FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R); - FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G); - FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B); - FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y); - FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y); - FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y); - FfxFloat16 lobeR=max(-hitMinR,hitMaxR); - FfxFloat16 lobeG=max(-hitMinG,hitMaxG); - FfxFloat16 lobeB=max(-hitMinB,hitMaxB); - FfxFloat16 lobe=max(FFX_BROADCAST_FLOAT16(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFX_BROADCAST_FLOAT16(0.0)))*FFX_UINT32_TO_FLOAT16X2(con.y).x; - // Apply noise removal. - #ifdef FSR_RCAS_DENOISE - lobe*=nz; - #endif - // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. - FfxFloat16 rcpL=ffxApproximateReciprocalMediumHalf(FFX_BROADCAST_FLOAT16(4.0)*lobe+FFX_BROADCAST_FLOAT16(1.0)); - pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; - pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; - pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL; -} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// PACKED 16-BIT VERSION -//============================================================================================================================== -#if defined(FFX_GPU)&& FFX_HALF == 1 && defined(FSR_RCAS_HX2) - // Input callback prototypes that need to be implemented by the calling shader - FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p); - void FsrRcasInputHx2(inout FfxFloat16x2 r,inout FfxFloat16x2 g,inout FfxFloat16x2 b); -//------------------------------------------------------------------------------------------------------------------------------ - // Can be used to convert from packed Structures of Arrays to Arrays of Structures for store. - void FsrRcasDepackHx2(out FfxFloat16x4 pix0,out FfxFloat16x4 pix1,FfxFloat16x2 pixR,FfxFloat16x2 pixG,FfxFloat16x2 pixB){ - #ifdef FFX_HLSL - // Invoke a slower path for DX only, since it won't allow uninitialized values. - pix0.a=pix1.a=0.0; - #endif - pix0.rgb=FfxFloat16x3(pixR.x,pixG.x,pixB.x); - pix1.rgb=FfxFloat16x3(pixR.y,pixG.y,pixB.y);} -//------------------------------------------------------------------------------------------------------------------------------ - void FsrRcasHx2( - // Output values are for 2 8x8 tiles in a 16x8 region. - // pix.x = left 8x8 tile - // pix.y = right 8x8 tile - // This enables later processing to easily be packed as well. - out FfxFloat16x2 pixR, - out FfxFloat16x2 pixG, - out FfxFloat16x2 pixB, - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - out FfxFloat16x2 pixA, - #endif - FfxUInt32x2 ip, // Integer pixel position in output. - FfxUInt32x4 con){ // Constant generated by RcasSetup(). - // No scaling algorithm uses minimal 3x3 pixel neighborhood. - FfxInt16x2 sp0=FfxInt16x2(ip); - FfxFloat16x3 b0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0,-1)).rgb; - FfxFloat16x3 d0=FsrRcasLoadHx2(sp0+FfxInt16x2(-1, 0)).rgb; - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - FfxFloat16x4 ee0=FsrRcasLoadHx2(sp0); - FfxFloat16x3 e0=ee0.rgb;pixA.r=ee0.a; - #else - FfxFloat16x3 e0=FsrRcasLoadHx2(sp0).rgb; - #endif - FfxFloat16x3 f0=FsrRcasLoadHx2(sp0+FfxInt16x2( 1, 0)).rgb; - FfxFloat16x3 h0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0, 1)).rgb; - FfxInt16x2 sp1=sp0+FfxInt16x2(8,0); - FfxFloat16x3 b1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0,-1)).rgb; - FfxFloat16x3 d1=FsrRcasLoadHx2(sp1+FfxInt16x2(-1, 0)).rgb; - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - FfxFloat16x4 ee1=FsrRcasLoadHx2(sp1); - FfxFloat16x3 e1=ee1.rgb;pixA.g=ee1.a; - #else - FfxFloat16x3 e1=FsrRcasLoadHx2(sp1).rgb; - #endif - FfxFloat16x3 f1=FsrRcasLoadHx2(sp1+FfxInt16x2( 1, 0)).rgb; - FfxFloat16x3 h1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0, 1)).rgb; - // Arrays of Structures to Structures of Arrays conversion. - FfxFloat16x2 bR=FfxFloat16x2(b0.r,b1.r); - FfxFloat16x2 bG=FfxFloat16x2(b0.g,b1.g); - FfxFloat16x2 bB=FfxFloat16x2(b0.b,b1.b); - FfxFloat16x2 dR=FfxFloat16x2(d0.r,d1.r); - FfxFloat16x2 dG=FfxFloat16x2(d0.g,d1.g); - FfxFloat16x2 dB=FfxFloat16x2(d0.b,d1.b); - FfxFloat16x2 eR=FfxFloat16x2(e0.r,e1.r); - FfxFloat16x2 eG=FfxFloat16x2(e0.g,e1.g); - FfxFloat16x2 eB=FfxFloat16x2(e0.b,e1.b); - FfxFloat16x2 fR=FfxFloat16x2(f0.r,f1.r); - FfxFloat16x2 fG=FfxFloat16x2(f0.g,f1.g); - FfxFloat16x2 fB=FfxFloat16x2(f0.b,f1.b); - FfxFloat16x2 hR=FfxFloat16x2(h0.r,h1.r); - FfxFloat16x2 hG=FfxFloat16x2(h0.g,h1.g); - FfxFloat16x2 hB=FfxFloat16x2(h0.b,h1.b); - // Run optional input transform. - FsrRcasInputHx2(bR,bG,bB); - FsrRcasInputHx2(dR,dG,dB); - FsrRcasInputHx2(eR,eG,eB); - FsrRcasInputHx2(fR,fG,fB); - FsrRcasInputHx2(hR,hG,hB); - // Luma times 2. - FfxFloat16x2 bL=bB*FFX_BROADCAST_FLOAT16X2(0.5)+(bR*FFX_BROADCAST_FLOAT16X2(0.5)+bG); - FfxFloat16x2 dL=dB*FFX_BROADCAST_FLOAT16X2(0.5)+(dR*FFX_BROADCAST_FLOAT16X2(0.5)+dG); - FfxFloat16x2 eL=eB*FFX_BROADCAST_FLOAT16X2(0.5)+(eR*FFX_BROADCAST_FLOAT16X2(0.5)+eG); - FfxFloat16x2 fL=fB*FFX_BROADCAST_FLOAT16X2(0.5)+(fR*FFX_BROADCAST_FLOAT16X2(0.5)+fG); - FfxFloat16x2 hL=hB*FFX_BROADCAST_FLOAT16X2(0.5)+(hR*FFX_BROADCAST_FLOAT16X2(0.5)+hG); - // Noise detection. - FfxFloat16x2 nz=FFX_BROADCAST_FLOAT16X2(0.25)*bL+FFX_BROADCAST_FLOAT16X2(0.25)*dL+FFX_BROADCAST_FLOAT16X2(0.25)*fL+FFX_BROADCAST_FLOAT16X2(0.25)*hL-eL; - nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL))); - nz=FFX_BROADCAST_FLOAT16X2(-0.5)*nz+FFX_BROADCAST_FLOAT16X2(1.0); - // Min and max of ring. - FfxFloat16x2 mn4R=min(ffxMin3Half(bR,dR,fR),hR); - FfxFloat16x2 mn4G=min(ffxMin3Half(bG,dG,fG),hG); - FfxFloat16x2 mn4B=min(ffxMin3Half(bB,dB,fB),hB); - FfxFloat16x2 mx4R=max(ffxMax3Half(bR,dR,fR),hR); - FfxFloat16x2 mx4G=max(ffxMax3Half(bG,dG,fG),hG); - FfxFloat16x2 mx4B=max(ffxMax3Half(bB,dB,fB),hB); - // Immediate constants for peak range. - FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); - // Limiters, these need to be high precision RCPs. - FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R); - FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G); - FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B); - FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y); - FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y); - FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y); - FfxFloat16x2 lobeR=max(-hitMinR,hitMaxR); - FfxFloat16x2 lobeG=max(-hitMinG,hitMaxG); - FfxFloat16x2 lobeB=max(-hitMinB,hitMaxB); - FfxFloat16x2 lobe=max(FFX_BROADCAST_FLOAT16X2(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFX_BROADCAST_FLOAT16X2(0.0)))*FFX_BROADCAST_FLOAT16X2(FFX_UINT32_TO_FLOAT16X2(con.y).x); - // Apply noise removal. - #ifdef FSR_RCAS_DENOISE - lobe*=nz; - #endif - // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. - FfxFloat16x2 rcpL=ffxApproximateReciprocalMediumHalf(FFX_BROADCAST_FLOAT16X2(4.0)*lobe+FFX_BROADCAST_FLOAT16X2(1.0)); - pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; - pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; - pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// FSR - [LFGA] LINEAR FILM GRAIN APPLICATOR -// -//------------------------------------------------------------------------------------------------------------------------------ -// Adding output-resolution film grain after scaling is a good way to mask both rendering and scaling artifacts. -// Suggest using tiled blue noise as film grain input, with peak noise frequency set for a specific look and feel. -// The 'Lfga*()' functions provide a convenient way to introduce grain. -// These functions limit grain based on distance to signal limits. -// This is done so that the grain is temporally energy preserving, and thus won't modify image tonality. -// Grain application should be done in a linear colorspace. -// The grain should be temporally changing, but have a temporal sum per pixel that adds to zero (non-biased). -//------------------------------------------------------------------------------------------------------------------------------ -// Usage, -// FsrLfga*( -// color, // In/out linear colorspace color {0 to 1} ranged. -// grain, // Per pixel grain texture value {-0.5 to 0.5} ranged, input is 3-channel to support colored grain. -// amount); // Amount of grain (0 to 1} ranged. -//------------------------------------------------------------------------------------------------------------------------------ -// Example if grain texture is monochrome: 'FsrLfgaF(color,ffxBroadcast3(grain),amount)' -//============================================================================================================================== -#if defined(FFX_GPU) - // Maximum grain is the minimum distance to the signal limit. - void FsrLfgaF(inout FfxFloat32x3 c, FfxFloat32x3 t, FfxFloat32 a) - { - c += (t * ffxBroadcast3(a)) * ffxMin(ffxBroadcast3(1.0) - c, c); - } -#endif -//============================================================================================================================== -#if defined(FFX_GPU)&& FFX_HALF == 1 - // Half precision version (slower). - void FsrLfgaH(inout FfxFloat16x3 c, FfxFloat16x3 t, FfxFloat16 a) - { - c += (t * FFX_BROADCAST_FLOAT16X3(a)) * min(FFX_BROADCAST_FLOAT16X3(1.0) - c, c); - } - //------------------------------------------------------------------------------------------------------------------------------ - // Packed half precision version (faster). - void FsrLfgaHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 tR,FfxFloat16x2 tG,FfxFloat16x2 tB,FfxFloat16 a){ - cR+=(tR*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cR,cR);cG+=(tG*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cG,cG);cB+=(tB*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cB,cB);} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// FSR - [SRTM] SIMPLE REVERSIBLE TONE-MAPPER -// -//------------------------------------------------------------------------------------------------------------------------------ -// This provides a way to take linear HDR color {0 to FP16_MAX} and convert it into a temporary {0 to 1} ranged post-tonemapped linear. -// The tonemapper preserves RGB ratio, which helps maintain HDR color bleed during filtering. -//------------------------------------------------------------------------------------------------------------------------------ -// Reversible tonemapper usage, -// FsrSrtm*(color); // {0 to FP16_MAX} converted to {0 to 1}. -// FsrSrtmInv*(color); // {0 to 1} converted into {0 to 32768, output peak safe for FP16}. -//============================================================================================================================== -#if defined(FFX_GPU) - void FsrSrtmF(inout FfxFloat32x3 c) - { - c *= ffxBroadcast3(rcp(ffxMax3(c.r, c.g, c.b) + FfxFloat32(1.0))); - } - // The extra max solves the c=1.0 case (which is a /0). - void FsrSrtmInvF(inout FfxFloat32x3 c){c*=ffxBroadcast3(rcp(max(FfxFloat32(1.0/32768.0),FfxFloat32(1.0)-ffxMax3(c.r,c.g,c.b))));} -#endif -//============================================================================================================================== -#if defined(FFX_GPU )&& FFX_HALF == 1 - void FsrSrtmH(inout FfxFloat16x3 c) - { - c *= FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(ffxMax3Half(c.r, c.g, c.b) + FFX_BROADCAST_FLOAT16(1.0))); - } - void FsrSrtmInvH(inout FfxFloat16x3 c) - { - c *= FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(max(FFX_BROADCAST_FLOAT16(1.0 / 32768.0), FFX_BROADCAST_FLOAT16(1.0) - ffxMax3Half(c.r, c.g, c.b)))); - } - //------------------------------------------------------------------------------------------------------------------------------ - void FsrSrtmHx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB) - { - FfxFloat16x2 rcp = ffxReciprocalHalf(ffxMax3Half(cR, cG, cB) + FFX_BROADCAST_FLOAT16X2(1.0)); - cR *= rcp; - cG *= rcp; - cB *= rcp; - } - void FsrSrtmInvHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB) - { - FfxFloat16x2 rcp=ffxReciprocalHalf(max(FFX_BROADCAST_FLOAT16X2(1.0/32768.0),FFX_BROADCAST_FLOAT16X2(1.0)-ffxMax3Half(cR,cG,cB))); - cR*=rcp; - cG*=rcp; - cB*=rcp; - } -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// FSR - [TEPD] TEMPORAL ENERGY PRESERVING DITHER -// -//------------------------------------------------------------------------------------------------------------------------------ -// Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion. -// Gamma 2.0 is used so that the conversion back to linear is just to square the color. -// The conversion comes in 8-bit and 10-bit modes, designed for output to 8-bit UNORM or 10:10:10:2 respectively. -// Given good non-biased temporal blue noise as dither input, -// the output dither will temporally conserve energy. -// This is done by choosing the linear nearest step point instead of perceptual nearest. -// See code below for details. -//------------------------------------------------------------------------------------------------------------------------------ -// DX SPEC RULES FOR FLOAT->UNORM 8-BIT CONVERSION -// =============================================== -// - Output is 'FfxUInt32(floor(saturate(n)*255.0+0.5))'. -// - Thus rounding is to nearest. -// - NaN gets converted to zero. -// - INF is clamped to {0.0 to 1.0}. -//============================================================================================================================== -#if defined(FFX_GPU) - // Hand tuned integer position to dither value, with more values than simple checkerboard. - // Only 32-bit has enough precision for this compddation. - // Output is {0 to <1}. - FfxFloat32 FsrTepdDitF(FfxUInt32x2 p, FfxUInt32 f) - { - FfxFloat32 x = FfxFloat32(p.x + f); - FfxFloat32 y = FfxFloat32(p.y); - // The 1.61803 golden ratio. - FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); - // Number designed to provide a good visual pattern. - FfxFloat32 b = FfxFloat32(1.0 / 3.69); - x = x * a + (y * b); - return ffxFract(x); - } - //------------------------------------------------------------------------------------------------------------------------------ - // This version is 8-bit gamma 2.0. - // The 'c' input is {0 to 1}. - // Output is {0 to 1} ready for image store. - void FsrTepdC8F(inout FfxFloat32x3 c, FfxFloat32 dit) - { - FfxFloat32x3 n = ffxSqrt(c); - n = floor(n * ffxBroadcast3(255.0)) * ffxBroadcast3(1.0 / 255.0); - FfxFloat32x3 a = n * n; - FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 255.0); - b = b * b; - // Ratio of 'a' to 'b' required to produce 'c'. - // ffxApproximateReciprocal() won't work here (at least for very high dynamic ranges). - // ffxApproximateReciprocalMedium() is an IADD,FMA,MUL. - FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b); - // Use the ratio as a cutoff to choose 'a' or 'b'. - // ffxIsGreaterThanZero() is a MUL. - c = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 255.0)); - } - //------------------------------------------------------------------------------------------------------------------------------ - // This version is 10-bit gamma 2.0. - // The 'c' input is {0 to 1}. - // Output is {0 to 1} ready for image store. - void FsrTepdC10F(inout FfxFloat32x3 c, FfxFloat32 dit) - { - FfxFloat32x3 n = ffxSqrt(c); - n = floor(n * ffxBroadcast3(1023.0)) * ffxBroadcast3(1.0 / 1023.0); - FfxFloat32x3 a = n * n; - FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 1023.0); - b = b * b; - FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b); - c = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 1023.0)); - } -#endif -//============================================================================================================================== -#if defined(FFX_GPU)&& FFX_HALF == 1 - FfxFloat16 FsrTepdDitH(FfxUInt32x2 p, FfxUInt32 f) - { - FfxFloat32 x = FfxFloat32(p.x + f); - FfxFloat32 y = FfxFloat32(p.y); - FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); - FfxFloat32 b = FfxFloat32(1.0 / 3.69); - x = x * a + (y * b); - return FfxFloat16(ffxFract(x)); - } - //------------------------------------------------------------------------------------------------------------------------------ - void FsrTepdC8H(inout FfxFloat16x3 c, FfxFloat16 dit) - { - FfxFloat16x3 n = sqrt(c); - n = floor(n * FFX_BROADCAST_FLOAT16X3(255.0)) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0); - FfxFloat16x3 a = n * n; - FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 255.0); - b = b * b; - FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b); - c = FfxFloat16x3(ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0))); - } - //------------------------------------------------------------------------------------------------------------------------------ - void FsrTepdC10H(inout FfxFloat16x3 c, FfxFloat16 dit) - { - FfxFloat16x3 n = sqrt(c); - n = floor(n * FFX_BROADCAST_FLOAT16X3(1023.0)) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0); - FfxFloat16x3 a = n * n; - FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0); - b = b * b; - FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b); - c = FfxFloat16x3(ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0))); - } - //============================================================================================================================== - // This computes dither for positions 'p' and 'p+{8,0}'. - FfxFloat16x2 FsrTepdDitHx2(FfxUInt32x2 p, FfxUInt32 f) - { - FfxFloat32x2 x; - x.x = FfxFloat32(p.x + f); - x.y = x.x + FfxFloat32(8.0); - FfxFloat32 y = FfxFloat32(p.y); - FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); - FfxFloat32 b = FfxFloat32(1.0 / 3.69); - x = x * ffxBroadcast2(a) + ffxBroadcast2(y * b); - return FfxFloat16x2(ffxFract(x)); - } - //------------------------------------------------------------------------------------------------------------------------------ - void FsrTepdC8Hx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB, FfxFloat16x2 dit) - { - FfxFloat16x2 nR = sqrt(cR); - FfxFloat16x2 nG = sqrt(cG); - FfxFloat16x2 nB = sqrt(cB); - nR = floor(nR * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); - nG = floor(nG * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); - nB = floor(nB * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); - FfxFloat16x2 aR = nR * nR; - FfxFloat16x2 aG = nG * nG; - FfxFloat16x2 aB = nB * nB; - FfxFloat16x2 bR = nR + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); - bR = bR * bR; - FfxFloat16x2 bG = nG + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); - bG = bG * bG; - FfxFloat16x2 bB = nB + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); - bB = bB * bB; - FfxFloat16x2 rR = (cR - bR) * ffxApproximateReciprocalMediumHalf(aR - bR); - FfxFloat16x2 rG = (cG - bG) * ffxApproximateReciprocalMediumHalf(aG - bG); - FfxFloat16x2 rB = (cB - bB) * ffxApproximateReciprocalMediumHalf(aB - bB); - cR = FfxFloat16x2(ffxSaturate(nR + ffxIsGreaterThanZeroHalf(dit - rR) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0))); - cG = FfxFloat16x2(ffxSaturate(nG + ffxIsGreaterThanZeroHalf(dit - rG) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0))); - cB = FfxFloat16x2(ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0))); - } - //------------------------------------------------------------------------------------------------------------------------------ - void FsrTepdC10Hx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 dit){ - FfxFloat16x2 nR=sqrt(cR); - FfxFloat16x2 nG=sqrt(cG); - FfxFloat16x2 nB=sqrt(cB); - nR=floor(nR*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0); - nG=floor(nG*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0); - nB=floor(nB*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0); - FfxFloat16x2 aR=nR*nR; - FfxFloat16x2 aG=nG*nG; - FfxFloat16x2 aB=nB*nB; - FfxFloat16x2 bR=nR+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bR=bR*bR; - FfxFloat16x2 bG=nG+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bG=bG*bG; - FfxFloat16x2 bB=nB+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bB=bB*bB; - FfxFloat16x2 rR=(cR-bR)*ffxApproximateReciprocalMediumHalf(aR-bR); - FfxFloat16x2 rG=(cG-bG)*ffxApproximateReciprocalMediumHalf(aG-bG); - FfxFloat16x2 rB=(cB-bB)*ffxApproximateReciprocalMediumHalf(aB-bB); - cR=FfxFloat16x2(ffxSaturate(nR+ffxIsGreaterThanZeroHalf(dit-rR)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0))); - cG=FfxFloat16x2(ffxSaturate(nG+ffxIsGreaterThanZeroHalf(dit-rG)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0))); - cB=FfxFloat16x2(ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 1023.0))); -} -#endif diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h.meta deleted file mode 100644 index 64e4ffa..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: 628e23510f46ef44bbf0035ce9a63be0 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/spd.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/spd.meta deleted file mode 100644 index 0b775af..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/spd.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: 0f03de1579ac3294595ae4f40106b7a2 -folderAsset: yes -DefaultImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h deleted file mode 100644 index 6441419..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h +++ /dev/null @@ -1,1009 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -/// @defgroup FfxGPUSpd FidelityFX SPD -/// FidelityFX Single Pass Downsampler 2.0 GPU documentation -/// -/// @ingroup FfxGPUEffects - -/// Setup required constant values for SPD (CPU). -/// -/// @param [out] dispatchThreadGroupCountXY CPU side: dispatch thread group count xy. z is number of slices of the input texture -/// @param [out] workGroupOffset GPU side: pass in as constant -/// @param [out] numWorkGroupsAndMips GPU side: pass in as constant -/// @param [in] rectInfo left, top, width, height -/// @param [in] mips optional: if -1, calculate based on rect width and height -/// -/// @ingroup FfxGPUSpd -#if defined(FFX_CPU) -FFX_STATIC void ffxSpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, - FfxUInt32x2 workGroupOffset, - FfxUInt32x2 numWorkGroupsAndMips, - FfxUInt32x4 rectInfo, - FfxInt32 mips) -{ - // determines the offset of the first tile to downsample based on - // left (rectInfo[0]) and top (rectInfo[1]) of the subregion. - workGroupOffset[0] = rectInfo[0] / 64; - workGroupOffset[1] = rectInfo[1] / 64; - - FfxUInt32 endIndexX = (rectInfo[0] + rectInfo[2] - 1) / 64; // rectInfo[0] = left, rectInfo[2] = width - FfxUInt32 endIndexY = (rectInfo[1] + rectInfo[3] - 1) / 64; // rectInfo[1] = top, rectInfo[3] = height - - // we only need to dispatch as many thread groups as tiles we need to downsample - // number of tiles per slice depends on the subregion to downsample - dispatchThreadGroupCountXY[0] = endIndexX + 1 - workGroupOffset[0]; - dispatchThreadGroupCountXY[1] = endIndexY + 1 - workGroupOffset[1]; - - // number of thread groups per slice - numWorkGroupsAndMips[0] = (dispatchThreadGroupCountXY[0]) * (dispatchThreadGroupCountXY[1]); - - if (mips >= 0) - { - numWorkGroupsAndMips[1] = FfxUInt32(mips); - } - else - { - // calculate based on rect width and height - FfxUInt32 resolution = ffxMax(rectInfo[2], rectInfo[3]); - numWorkGroupsAndMips[1] = FfxUInt32((ffxMin(floor(log2(FfxFloat32(resolution))), FfxFloat32(12)))); - } -} - -/// Setup required constant values for SPD (CPU). -/// -/// @param [out] dispatchThreadGroupCountXY CPU side: dispatch thread group count xy. z is number of slices of the input texture -/// @param [out] workGroupOffset GPU side: pass in as constant -/// @param [out] numWorkGroupsAndMips GPU side: pass in as constant -/// @param [in] rectInfo left, top, width, height -/// -/// @ingroup FfxGPUSpd -FFX_STATIC void ffxSpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, - FfxUInt32x2 workGroupOffset, - FfxUInt32x2 numWorkGroupsAndMips, - FfxUInt32x4 rectInfo) -{ - ffxSpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, -1); -} -#endif // #if defined(FFX_CPU) - - -//============================================================================================================================== -// NON-PACKED VERSION -//============================================================================================================================== -#if defined(FFX_GPU) -#if defined(FFX_SPD_PACKED_ONLY) -// Avoid compiler errors by including default implementations of these callbacks. -FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 p, FfxUInt32 slice) -{ - return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); -} - -FfxFloat32x4 SpdLoad(FfxInt32x2 p, FfxUInt32 slice) -{ - return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); -} -void SpdStore(FfxInt32x2 p, FfxFloat32x4 value, FfxUInt32 mip, FfxUInt32 slice) -{ -} -FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) -{ - return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); -} -void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) -{ -} -FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) -{ - return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); -} -#endif // #if FFX_SPD_PACKED_ONLY - -//_____________________________________________________________/\_______________________________________________________________ - -void ffxSpdWorkgroupShuffleBarrier() -{ - FFX_GROUP_MEMORY_BARRIER(); -} - -// Only last active workgroup should proceed -bool SpdExitWorkgroup(FfxUInt32 numWorkGroups, FfxUInt32 localInvocationIndex, FfxUInt32 slice) -{ - // global atomic counter - if (localInvocationIndex == 0) - { - SpdIncreaseAtomicCounter(slice); - } - - ffxSpdWorkgroupShuffleBarrier(); - return (SpdGetAtomicCounter() != (numWorkGroups - 1)); -} - -// User defined: FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3); -FfxFloat32x4 SpdReduceQuad(FfxFloat32x4 v) -{ -#if defined(FFX_GLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS) - - FfxFloat32x4 v0 = v; - FfxFloat32x4 v1 = subgroupQuadSwapHorizontal(v); - FfxFloat32x4 v2 = subgroupQuadSwapVertical(v); - FfxFloat32x4 v3 = subgroupQuadSwapDiagonal(v); - return SpdReduce4(v0, v1, v2, v3); - -#elif defined(FFX_HLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS) - - // requires SM6.0 - FfxUInt32 quad = WaveGetLaneIndex() & (~0x3); - FfxFloat32x4 v0 = v; - FfxFloat32x4 v1 = WaveReadLaneAt(v, quad | 1); - FfxFloat32x4 v2 = WaveReadLaneAt(v, quad | 2); - FfxFloat32x4 v3 = WaveReadLaneAt(v, quad | 3); - return SpdReduce4(v0, v1, v2, v3); -/* - // if SM6.0 is not available, you can use the AMD shader intrinsics - // the AMD shader intrinsics are available in AMD GPU Services (AGS) library: - // https://gpuopen.com/amd-gpu-services-ags-library/ - // works for DX11 - FfxFloat32x4 v0 = v; - FfxFloat32x4 v1; - v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); - v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); - v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); - v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); - FfxFloat32x4 v2; - v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); - v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); - v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); - v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); - FfxFloat32x4 v3; - v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); - v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); - v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); - v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); - return SpdReduce4(v0, v1, v2, v3); - */ -#endif - return v; -} - -FfxFloat32x4 SpdReduceIntermediate(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3) -{ - FfxFloat32x4 v0 = SpdLoadIntermediate(i0.x, i0.y); - FfxFloat32x4 v1 = SpdLoadIntermediate(i1.x, i1.y); - FfxFloat32x4 v2 = SpdLoadIntermediate(i2.x, i2.y); - FfxFloat32x4 v3 = SpdLoadIntermediate(i3.x, i3.y); - return SpdReduce4(v0, v1, v2, v3); -} - -FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) -{ - FfxFloat32x4 v0 = SpdLoad(FfxInt32x2(i0), slice); - FfxFloat32x4 v1 = SpdLoad(FfxInt32x2(i1), slice); - FfxFloat32x4 v2 = SpdLoad(FfxInt32x2(i2), slice); - FfxFloat32x4 v3 = SpdLoad(FfxInt32x2(i3), slice); - return SpdReduce4(v0, v1, v2, v3); -} - -FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 base, FfxUInt32 slice) -{ - return SpdReduceLoad4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); -} - -FfxFloat32x4 SpdReduceLoadSourceImage4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) -{ - FfxFloat32x4 v0 = SpdLoadSourceImage(FfxInt32x2(i0), slice); - FfxFloat32x4 v1 = SpdLoadSourceImage(FfxInt32x2(i1), slice); - FfxFloat32x4 v2 = SpdLoadSourceImage(FfxInt32x2(i2), slice); - FfxFloat32x4 v3 = SpdLoadSourceImage(FfxInt32x2(i3), slice); - return SpdReduce4(v0, v1, v2, v3); -} - -FfxFloat32x4 SpdReduceLoadSourceImage(FfxUInt32x2 base, FfxUInt32 slice) -{ -#if defined(SPD_LINEAR_SAMPLER) - return SpdLoadSourceImage(FfxInt32x2(base), slice); -#else - return SpdReduceLoadSourceImage4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); -#endif -} - -void SpdDownsampleMips_0_1_Intrinsics(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) -{ - FfxFloat32x4 v[4]; - - FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); - FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); - v[0] = SpdReduceLoadSourceImage(tex, slice); - SpdStore(pix, v[0], 0, slice); - - tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); - pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); - v[1] = SpdReduceLoadSourceImage(tex, slice); - SpdStore(pix, v[1], 0, slice); - - tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); - pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); - v[2] = SpdReduceLoadSourceImage(tex, slice); - SpdStore(pix, v[2], 0, slice); - - tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); - pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); - v[3] = SpdReduceLoadSourceImage(tex, slice); - SpdStore(pix, v[3], 0, slice); - - if (mip <= 1) - return; - - v[0] = SpdReduceQuad(v[0]); - v[1] = SpdReduceQuad(v[1]); - v[2] = SpdReduceQuad(v[2]); - v[3] = SpdReduceQuad(v[3]); - - if ((localInvocationIndex % 4) == 0) - { - SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice); - SpdStoreIntermediate(x / 2, y / 2, v[0]); - - SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice); - SpdStoreIntermediate(x / 2 + 8, y / 2, v[1]); - - SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice); - SpdStoreIntermediate(x / 2, y / 2 + 8, v[2]); - - SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice); - SpdStoreIntermediate(x / 2 + 8, y / 2 + 8, v[3]); - } -} - -void SpdDownsampleMips_0_1_LDS(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) -{ - FfxFloat32x4 v[4]; - - FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); - FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); - v[0] = SpdReduceLoadSourceImage(tex, slice); - SpdStore(pix, v[0], 0, slice); - - tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); - pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); - v[1] = SpdReduceLoadSourceImage(tex, slice); - SpdStore(pix, v[1], 0, slice); - - tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); - pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); - v[2] = SpdReduceLoadSourceImage(tex, slice); - SpdStore(pix, v[2], 0, slice); - - tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); - pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); - v[3] = SpdReduceLoadSourceImage(tex, slice); - SpdStore(pix, v[3], 0, slice); - - if (mip <= 1) - return; - - for (FfxUInt32 i = 0; i < 4; i++) - { - SpdStoreIntermediate(x, y, v[i]); - ffxSpdWorkgroupShuffleBarrier(); - if (localInvocationIndex < 64) - { - v[i] = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); - SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice); - } - ffxSpdWorkgroupShuffleBarrier(); - } - - if (localInvocationIndex < 64) - { - SpdStoreIntermediate(x + 0, y + 0, v[0]); - SpdStoreIntermediate(x + 8, y + 0, v[1]); - SpdStoreIntermediate(x + 0, y + 8, v[2]); - SpdStoreIntermediate(x + 8, y + 8, v[3]); - } -} - -void SpdDownsampleMips_0_1(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) -{ -#if defined(FFX_SPD_NO_WAVE_OPERATIONS) - SpdDownsampleMips_0_1_LDS(x, y, workGroupID, localInvocationIndex, mip, slice); -#else - SpdDownsampleMips_0_1_Intrinsics(x, y, workGroupID, localInvocationIndex, mip, slice); -#endif -} - - -void SpdDownsampleMip_2(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) -{ -#if defined(FFX_SPD_NO_WAVE_OPERATIONS) - if (localInvocationIndex < 64) - { - FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); - SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice); - // store to LDS, try to reduce bank conflicts - // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 - // ... - // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 - SpdStoreIntermediate(x * 2 + y % 2, y * 2, v); - } -#else - FfxFloat32x4 v = SpdLoadIntermediate(x, y); - v = SpdReduceQuad(v); - // quad index 0 stores result - if (localInvocationIndex % 4 == 0) - { - SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice); - SpdStoreIntermediate(x + (y / 2) % 2, y, v); - } -#endif -} - -void SpdDownsampleMip_3(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) -{ -#if defined(FFX_SPD_NO_WAVE_OPERATIONS) - if (localInvocationIndex < 16) - { - // x 0 x 0 - // 0 0 0 0 - // 0 x 0 x - // 0 0 0 0 - FfxFloat32x4 v = - SpdReduceIntermediate(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2)); - SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice); - // store to LDS - // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 - // ... - // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 - // ... - // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x - // ... - SpdStoreIntermediate(x * 4 + y, y * 4, v); - } -#else - if (localInvocationIndex < 64) - { - FfxFloat32x4 v = SpdLoadIntermediate(x * 2 + y % 2, y * 2); - v = SpdReduceQuad(v); - // quad index 0 stores result - if (localInvocationIndex % 4 == 0) - { - SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice); - SpdStoreIntermediate(x * 2 + y / 2, y * 2, v); - } - } -#endif -} - -void SpdDownsampleMip_4(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) -{ -#if defined(FFX_SPD_NO_WAVE_OPERATIONS) - if (localInvocationIndex < 4) - { - // x 0 0 0 x 0 0 0 - // ... - // 0 x 0 0 0 x 0 0 - FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0), - FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0), - FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4), - FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4)); - SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice); - // store to LDS - // x x x x 0 ... - // 0 ... - SpdStoreIntermediate(x + y * 2, 0, v); - } -#else - if (localInvocationIndex < 16) - { - FfxFloat32x4 v = SpdLoadIntermediate(x * 4 + y, y * 4); - v = SpdReduceQuad(v); - // quad index 0 stores result - if (localInvocationIndex % 4 == 0) - { - SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice); - SpdStoreIntermediate(x / 2 + y, 0, v); - } - } -#endif -} - -void SpdDownsampleMip_5(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) -{ -#if defined(FFX_SPD_NO_WAVE_OPERATIONS) - if (localInvocationIndex < 1) - { - // x x x x 0 ... - // 0 ... - FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0)); - SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice); - } -#else - if (localInvocationIndex < 4) - { - FfxFloat32x4 v = SpdLoadIntermediate(localInvocationIndex, 0); - v = SpdReduceQuad(v); - // quad index 0 stores result - if (localInvocationIndex % 4 == 0) - { - SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice); - } - } -#endif -} - -void SpdDownsampleMips_6_7(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice) -{ - FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0); - FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0); - FfxFloat32x4 v0 = SpdReduceLoad4(tex, slice); - SpdStore(pix, v0, 6, slice); - - tex = FfxInt32x2(x * 4 + 2, y * 4 + 0); - pix = FfxInt32x2(x * 2 + 1, y * 2 + 0); - FfxFloat32x4 v1 = SpdReduceLoad4(tex, slice); - SpdStore(pix, v1, 6, slice); - - tex = FfxInt32x2(x * 4 + 0, y * 4 + 2); - pix = FfxInt32x2(x * 2 + 0, y * 2 + 1); - FfxFloat32x4 v2 = SpdReduceLoad4(tex, slice); - SpdStore(pix, v2, 6, slice); - - tex = FfxInt32x2(x * 4 + 2, y * 4 + 2); - pix = FfxInt32x2(x * 2 + 1, y * 2 + 1); - FfxFloat32x4 v3 = SpdReduceLoad4(tex, slice); - SpdStore(pix, v3, 6, slice); - - if (mips <= 7) - return; - // no barrier needed, working on values only from the same thread - - FfxFloat32x4 v = SpdReduce4(v0, v1, v2, v3); - SpdStore(FfxInt32x2(x, y), v, 7, slice); - SpdStoreIntermediate(x, y, v); -} - -void SpdDownsampleNextFour(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice) -{ - if (mips <= baseMip) - return; - ffxSpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_2(x, y, workGroupID, localInvocationIndex, baseMip, slice); - - if (mips <= baseMip + 1) - return; - ffxSpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_3(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice); - - if (mips <= baseMip + 2) - return; - ffxSpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_4(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice); - - if (mips <= baseMip + 3) - return; - ffxSpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_5(workGroupID, localInvocationIndex, baseMip + 3, slice); -} - -/// Downsamples a 64x64 tile based on the work group id. -/// If after downsampling it's the last active thread group, computes the remaining MIP levels. -/// -/// @param [in] workGroupID index of the work group / thread group -/// @param [in] localInvocationIndex index of the thread within the thread group in 1D -/// @param [in] mips the number of total MIP levels to compute for the input texture -/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice -/// @param [in] slice the slice of the input texture -/// -/// @ingroup FfxGPUSpd -void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice) -{ - // compute MIP level 0 and 1 - FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64); - FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2); - FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7)); - SpdDownsampleMips_0_1(x, y, workGroupID, localInvocationIndex, mips, slice); - - // compute MIP level 2, 3, 4, 5 - SpdDownsampleNextFour(x, y, workGroupID, localInvocationIndex, 2, mips, slice); - - if (mips <= 6) - return; - - // increase the global atomic counter for the given slice and check if it's the last remaining thread group: - // terminate if not, continue if yes. - if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice)) - return; - - // reset the global atomic counter back to 0 for the next spd dispatch - SpdResetAtomicCounter(slice); - - // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. - // compute MIP level 6 and 7 - SpdDownsampleMips_6_7(x, y, mips, slice); - - // compute MIP level 8, 9, 10, 11 - SpdDownsampleNextFour(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice); -} -/// Downsamples a 64x64 tile based on the work group id and work group offset. -/// If after downsampling it's the last active thread group, computes the remaining MIP levels. -/// -/// @param [in] workGroupID index of the work group / thread group -/// @param [in] localInvocationIndex index of the thread within the thread group in 1D -/// @param [in] mips the number of total MIP levels to compute for the input texture -/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice -/// @param [in] slice the slice of the input texture -/// @param [in] workGroupOffset the work group offset. it's (0,0) in case the entire input texture is downsampled. -/// -/// @ingroup FfxGPUSpd -void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset) -{ - SpdDownsample(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -//============================================================================================================================== -// PACKED VERSION -//============================================================================================================================== - -#if FFX_HALF - -FfxFloat16x4 SpdReduceQuadH(FfxFloat16x4 v) -{ -#if defined(FFX_GLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS) - FfxFloat16x4 v0 = v; - FfxFloat16x4 v1 = subgroupQuadSwapHorizontal(v); - FfxFloat16x4 v2 = subgroupQuadSwapVertical(v); - FfxFloat16x4 v3 = subgroupQuadSwapDiagonal(v); - return SpdReduce4H(v0, v1, v2, v3); -#elif defined(FFX_HLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS) - // requires SM6.0 - FfxUInt32 quad = WaveGetLaneIndex() & (~0x3); - FfxFloat16x4 v0 = v; - FfxFloat16x4 v1 = WaveReadLaneAt(v, quad | 1); - FfxFloat16x4 v2 = WaveReadLaneAt(v, quad | 2); - FfxFloat16x4 v3 = WaveReadLaneAt(v, quad | 3); - return SpdReduce4H(v0, v1, v2, v3); -/* - // if SM6.0 is not available, you can use the AMD shader intrinsics - // the AMD shader intrinsics are available in AMD GPU Services (AGS) library: - // https://gpuopen.com/amd-gpu-services-ags-library/ - // works for DX11 - FfxFloat16x4 v0 = v; - FfxFloat16x4 v1; - v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); - v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); - v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); - v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); - FfxFloat16x4 v2; - v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); - v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); - v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); - v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); - FfxFloat16x4 v3; - v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); - v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); - v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); - v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); - return SpdReduce4H(v0, v1, v2, v3); - */ -#endif - return FfxFloat16x4(0.0, 0.0, 0.0, 0.0); -} - -FfxFloat16x4 SpdReduceIntermediateH(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3) -{ - FfxFloat16x4 v0 = SpdLoadIntermediateH(i0.x, i0.y); - FfxFloat16x4 v1 = SpdLoadIntermediateH(i1.x, i1.y); - FfxFloat16x4 v2 = SpdLoadIntermediateH(i2.x, i2.y); - FfxFloat16x4 v3 = SpdLoadIntermediateH(i3.x, i3.y); - return SpdReduce4H(v0, v1, v2, v3); -} - -FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) -{ - FfxFloat16x4 v0 = SpdLoadH(FfxInt32x2(i0), slice); - FfxFloat16x4 v1 = SpdLoadH(FfxInt32x2(i1), slice); - FfxFloat16x4 v2 = SpdLoadH(FfxInt32x2(i2), slice); - FfxFloat16x4 v3 = SpdLoadH(FfxInt32x2(i3), slice); - return SpdReduce4H(v0, v1, v2, v3); -} - -FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 base, FfxUInt32 slice) -{ - return SpdReduceLoad4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); -} - -FfxFloat16x4 SpdReduceLoadSourceImage4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) -{ - FfxFloat16x4 v0 = SpdLoadSourceImageH(FfxInt32x2(i0), slice); - FfxFloat16x4 v1 = SpdLoadSourceImageH(FfxInt32x2(i1), slice); - FfxFloat16x4 v2 = SpdLoadSourceImageH(FfxInt32x2(i2), slice); - FfxFloat16x4 v3 = SpdLoadSourceImageH(FfxInt32x2(i3), slice); - return SpdReduce4H(v0, v1, v2, v3); -} - -FfxFloat16x4 SpdReduceLoadSourceImageH(FfxUInt32x2 base, FfxUInt32 slice) -{ -#if defined(SPD_LINEAR_SAMPLER) - return SpdLoadSourceImageH(FfxInt32x2(base), slice); -#else - return SpdReduceLoadSourceImage4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); -#endif -} - -void SpdDownsampleMips_0_1_IntrinsicsH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice) -{ - FfxFloat16x4 v[4]; - - FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); - FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); - v[0] = SpdReduceLoadSourceImageH(tex, slice); - SpdStoreH(pix, v[0], 0, slice); - - tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); - pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); - v[1] = SpdReduceLoadSourceImageH(tex, slice); - SpdStoreH(pix, v[1], 0, slice); - - tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); - pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); - v[2] = SpdReduceLoadSourceImageH(tex, slice); - SpdStoreH(pix, v[2], 0, slice); - - tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); - pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); - v[3] = SpdReduceLoadSourceImageH(tex, slice); - SpdStoreH(pix, v[3], 0, slice); - - if (mips <= 1) - return; - - v[0] = SpdReduceQuadH(v[0]); - v[1] = SpdReduceQuadH(v[1]); - v[2] = SpdReduceQuadH(v[2]); - v[3] = SpdReduceQuadH(v[3]); - - if ((localInvocationIndex % 4) == 0) - { - SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice); - SpdStoreIntermediateH(x / 2, y / 2, v[0]); - - SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice); - SpdStoreIntermediateH(x / 2 + 8, y / 2, v[1]); - - SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice); - SpdStoreIntermediateH(x / 2, y / 2 + 8, v[2]); - - SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice); - SpdStoreIntermediateH(x / 2 + 8, y / 2 + 8, v[3]); - } -} - -void SpdDownsampleMips_0_1_LDSH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice) -{ - FfxFloat16x4 v[4]; - - FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); - FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); - v[0] = SpdReduceLoadSourceImageH(tex, slice); - SpdStoreH(pix, v[0], 0, slice); - - tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); - pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); - v[1] = SpdReduceLoadSourceImageH(tex, slice); - SpdStoreH(pix, v[1], 0, slice); - - tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); - pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); - v[2] = SpdReduceLoadSourceImageH(tex, slice); - SpdStoreH(pix, v[2], 0, slice); - - tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); - pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); - v[3] = SpdReduceLoadSourceImageH(tex, slice); - SpdStoreH(pix, v[3], 0, slice); - - if (mips <= 1) - return; - - for (FfxUInt32 i = 0; i < 4; i++) - { - SpdStoreIntermediateH(x, y, v[i]); - ffxSpdWorkgroupShuffleBarrier(); - if (localInvocationIndex < 64) - { - v[i] = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); - SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice); - } - ffxSpdWorkgroupShuffleBarrier(); - } - - if (localInvocationIndex < 64) - { - SpdStoreIntermediateH(x + 0, y + 0, v[0]); - SpdStoreIntermediateH(x + 8, y + 0, v[1]); - SpdStoreIntermediateH(x + 0, y + 8, v[2]); - SpdStoreIntermediateH(x + 8, y + 8, v[3]); - } -} - -void SpdDownsampleMips_0_1H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice) -{ -#if defined(FFX_SPD_NO_WAVE_OPERATIONS) - SpdDownsampleMips_0_1_LDSH(x, y, workGroupID, localInvocationIndex, mips, slice); -#else - SpdDownsampleMips_0_1_IntrinsicsH(x, y, workGroupID, localInvocationIndex, mips, slice); -#endif -} - - -void SpdDownsampleMip_2H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) -{ -#if defined(FFX_SPD_NO_WAVE_OPERATIONS) - if (localInvocationIndex < 64) - { - FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); - SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice); - // store to LDS, try to reduce bank conflicts - // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 - // ... - // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 - SpdStoreIntermediateH(x * 2 + y % 2, y * 2, v); - } -#else - FfxFloat16x4 v = SpdLoadIntermediateH(x, y); - v = SpdReduceQuadH(v); - // quad index 0 stores result - if (localInvocationIndex % 4 == 0) - { - SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice); - SpdStoreIntermediateH(x + (y / 2) % 2, y, v); - } -#endif -} - -void SpdDownsampleMip_3H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) -{ -#if defined(FFX_SPD_NO_WAVE_OPERATIONS) - if (localInvocationIndex < 16) - { - // x 0 x 0 - // 0 0 0 0 - // 0 x 0 x - // 0 0 0 0 - FfxFloat16x4 v = - SpdReduceIntermediateH(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2)); - SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice); - // store to LDS - // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 - // ... - // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 - // ... - // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x - // ... - SpdStoreIntermediateH(x * 4 + y, y * 4, v); - } -#else - if (localInvocationIndex < 64) - { - FfxFloat16x4 v = SpdLoadIntermediateH(x * 2 + y % 2, y * 2); - v = SpdReduceQuadH(v); - // quad index 0 stores result - if (localInvocationIndex % 4 == 0) - { - SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice); - SpdStoreIntermediateH(x * 2 + y / 2, y * 2, v); - } - } -#endif -} - -void SpdDownsampleMip_4H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) -{ -#if defined(FFX_SPD_NO_WAVE_OPERATIONS) - if (localInvocationIndex < 4) - { - // x 0 0 0 x 0 0 0 - // ... - // 0 x 0 0 0 x 0 0 - FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0), - FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0), - FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4), - FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4)); - SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice); - // store to LDS - // x x x x 0 ... - // 0 ... - SpdStoreIntermediateH(x + y * 2, 0, v); - } -#else - if (localInvocationIndex < 16) - { - FfxFloat16x4 v = SpdLoadIntermediateH(x * 4 + y, y * 4); - v = SpdReduceQuadH(v); - // quad index 0 stores result - if (localInvocationIndex % 4 == 0) - { - SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice); - SpdStoreIntermediateH(x / 2 + y, 0, v); - } - } -#endif -} - -void SpdDownsampleMip_5H(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) -{ -#if defined(FFX_SPD_NO_WAVE_OPERATIONS) - if (localInvocationIndex < 1) - { - // x x x x 0 ... - // 0 ... - FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0)); - SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice); - } -#else - if (localInvocationIndex < 4) - { - FfxFloat16x4 v = SpdLoadIntermediateH(localInvocationIndex, 0); - v = SpdReduceQuadH(v); - // quad index 0 stores result - if (localInvocationIndex % 4 == 0) - { - SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice); - } - } -#endif -} - -void SpdDownsampleMips_6_7H(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice) -{ - FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0); - FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0); - FfxFloat16x4 v0 = SpdReduceLoad4H(tex, slice); - SpdStoreH(pix, v0, 6, slice); - - tex = FfxInt32x2(x * 4 + 2, y * 4 + 0); - pix = FfxInt32x2(x * 2 + 1, y * 2 + 0); - FfxFloat16x4 v1 = SpdReduceLoad4H(tex, slice); - SpdStoreH(pix, v1, 6, slice); - - tex = FfxInt32x2(x * 4 + 0, y * 4 + 2); - pix = FfxInt32x2(x * 2 + 0, y * 2 + 1); - FfxFloat16x4 v2 = SpdReduceLoad4H(tex, slice); - SpdStoreH(pix, v2, 6, slice); - - tex = FfxInt32x2(x * 4 + 2, y * 4 + 2); - pix = FfxInt32x2(x * 2 + 1, y * 2 + 1); - FfxFloat16x4 v3 = SpdReduceLoad4H(tex, slice); - SpdStoreH(pix, v3, 6, slice); - - if (mips < 8) - return; - // no barrier needed, working on values only from the same thread - - FfxFloat16x4 v = SpdReduce4H(v0, v1, v2, v3); - SpdStoreH(FfxInt32x2(x, y), v, 7, slice); - SpdStoreIntermediateH(x, y, v); -} - -void SpdDownsampleNextFourH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice) -{ - if (mips <= baseMip) - return; - ffxSpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_2H(x, y, workGroupID, localInvocationIndex, baseMip, slice); - - if (mips <= baseMip + 1) - return; - ffxSpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_3H(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice); - - if (mips <= baseMip + 2) - return; - ffxSpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_4H(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice); - - if (mips <= baseMip + 3) - return; - ffxSpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_5H(workGroupID, localInvocationIndex, baseMip + 3, slice); -} - -/// Downsamples a 64x64 tile based on the work group id and work group offset. -/// If after downsampling it's the last active thread group, computes the remaining MIP levels. -/// Uses half types. -/// -/// @param [in] workGroupID index of the work group / thread group -/// @param [in] localInvocationIndex index of the thread within the thread group in 1D -/// @param [in] mips the number of total MIP levels to compute for the input texture -/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice -/// @param [in] slice the slice of the input texture -/// -/// @ingroup FfxGPUSpd -void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice) -{ - FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64); - FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2); - FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7)); - - // compute MIP level 0 and 1 - SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips, slice); - - // compute MIP level 2, 3, 4, 5 - SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips, slice); - - if (mips < 7) - return; - - // increase the global atomic counter for the given slice and check if it's the last remaining thread group: - // terminate if not, continue if yes. - if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice)) - return; - - // reset the global atomic counter back to 0 for the next spd dispatch - SpdResetAtomicCounter(slice); - - // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. - // compute MIP level 6 and 7 - SpdDownsampleMips_6_7H(x, y, mips, slice); - - // compute MIP level 8, 9, 10, 11 - SpdDownsampleNextFourH(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice); -} - -/// Downsamples a 64x64 tile based on the work group id and work group offset. -/// If after downsampling it's the last active thread group, computes the remaining MIP levels. -/// Uses half types. -/// -/// @param [in] workGroupID index of the work group / thread group -/// @param [in] localInvocationIndex index of the thread within the thread group in 1D -/// @param [in] mips the number of total MIP levels to compute for the input texture -/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice -/// @param [in] slice the slice of the input texture -/// @param [in] workGroupOffset the work group offset. it's (0,0) in case the entire input texture is downsampled. -/// -/// @ingroup FfxGPUSpd -void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset) -{ - SpdDownsampleH(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice); -} - -#endif // #if FFX_HALF -#endif // #if defined(FFX_GPU) diff --git a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h.meta b/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h.meta deleted file mode 100644 index 8d4e716..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Resources/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h.meta +++ /dev/null @@ -1,27 +0,0 @@ -fileFormatVersion: 2 -guid: face65176ee3b82498bd0b8fed0ddacd -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - Any: - second: - enabled: 1 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Scripts.meta b/Assets/AEG FSR/Runtime/Plugins/Scripts.meta deleted file mode 100644 index 47c1322..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Scripts.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: 35ab8f6540bc2b742b4d5c423c92fe57 -folderAsset: yes -DefaultImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3.cs b/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3.cs deleted file mode 100644 index 37be500..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3.cs +++ /dev/null @@ -1,282 +0,0 @@ -// Copyright (c) 2023 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -using System; -using System.Runtime.InteropServices; -using UnityEngine; -using UnityEngine.Rendering; - -namespace FidelityFX -{ - /// - /// A collection of helper functions and data structures required by the FSR3 process. - /// - public static class Fsr3 - { - /// - /// Creates a new FSR3 context with standard parameters that are appropriate for the current platform. - /// - public static Fsr3Context CreateContext(Vector2Int displaySize, Vector2Int maxRenderSize, IFsr3Callbacks callbacks, InitializationFlags flags = 0) { - if(SystemInfo.usesReversedZBuffer) - flags |= InitializationFlags.EnableDepthInverted; - else - flags &= ~InitializationFlags.EnableDepthInverted; - -#if AMD_FIDELITY_FSR3_DEBUG -flags |= InitializationFlags.EnableDebugChecking; -Debug.Log($"Setting up FSR3 with render size: {maxRenderSize.x}x{maxRenderSize.y}, display size: {displaySize.x}x{displaySize.y}, flags: {flags}"); -#endif - - - var contextDescription = new ContextDescription { - Flags = flags, - DisplaySize = displaySize, - MaxRenderSize = maxRenderSize, - Callbacks = callbacks, - }; - - var context = new Fsr3Context(); - context.Create(contextDescription); - return context; - } - - public static float GetUpscaleRatioFromQualityMode(QualityMode qualityMode) { - switch(qualityMode) { - case QualityMode.Off: - return 1.0f; - case QualityMode.FSRAA: - return 1.0f; - case QualityMode.Quality: - return 1.5f; - case QualityMode.Balanced: - return 1.7f; - case QualityMode.Performance: - return 2.0f; - case QualityMode.UltraPerformance: - return 3.0f; - default: - return 1.0f; - } - } - - public static void GetRenderResolutionFromQualityMode( - out int renderWidth, out int renderHeight, - int displayWidth, int displayHeight, QualityMode qualityMode) { - float ratio = GetUpscaleRatioFromQualityMode(qualityMode); - renderWidth = Mathf.RoundToInt(displayWidth / ratio); - renderHeight = Mathf.RoundToInt(displayHeight / ratio); - } - - public static float GetMipmapBiasOffset(int renderWidth, int displayWidth) { - return Mathf.Log((float)renderWidth / displayWidth, 2.0f) - 1.0f; - } - - public static int GetJitterPhaseCount(int renderWidth, int displayWidth) { - const float basePhaseCount = 8.0f; - int jitterPhaseCount = (int)(basePhaseCount * Mathf.Pow((float)displayWidth / renderWidth, 2.0f)); - return jitterPhaseCount; - } - - public static void GetJitterOffset(out float outX, out float outY, int index, int phaseCount) { - outX = Halton((index % phaseCount) + 1, 2) - 0.5f; - outY = Halton((index % phaseCount) + 1, 3) - 0.5f; - } - - // Calculate halton number for index and base. - private static float Halton(int index, int @base) { - float f = 1.0f, result = 0.0f; - - for(int currentIndex = index; currentIndex > 0;) { - - f /= @base; - result += f * (currentIndex % @base); - currentIndex = (int)Mathf.Floor((float)currentIndex / @base); - } - - return result; - } - - public static float Lanczos2(float value) { - return Mathf.Abs(value) < Mathf.Epsilon ? 1.0f : Mathf.Sin(Mathf.PI * value) / (Mathf.PI * value) * (Mathf.Sin(0.5f * Mathf.PI * value) / (0.5f * Mathf.PI * value)); - } - - public enum QualityMode - { - Off = 0, - FSRAA = 1, - Quality = 2, - Balanced = 3, - Performance = 4, - UltraPerformance = 5, - } - - [Flags] - public enum InitializationFlags - { - EnableHighDynamicRange = 1 << 0, - EnableDisplayResolutionMotionVectors = 1 << 1, - EnableMotionVectorsJitterCancellation = 1 << 2, - EnableDepthInverted = 1 << 3, - EnableDepthInfinite = 1 << 4, - EnableAutoExposure = 1 << 5, - EnableDynamicResolution = 1 << 6, - EnableFP16Usage = 1 << 7, - EnableDebugChecking = 1 << 8, - } - - public struct ContextDescription - { - public InitializationFlags Flags; - public Vector2Int MaxRenderSize; - public Vector2Int DisplaySize; - public IFsr3Callbacks Callbacks; - } - - /// - /// The input and output resources are all optional. If they are null, the Fsr3Context won't try to bind them to any shaders. - /// This allows for customized and more efficient resource management outside of Fsr3Context, tailored to the specific scenario. - /// - public class DispatchDescription - { - public RenderTargetIdentifier? Color; - public RenderTargetIdentifier? Depth; - public bool DepthFormat = true; - public RenderTargetIdentifier? MotionVectors; - public RenderTargetIdentifier? Exposure; - public RenderTargetIdentifier? Reactive; - public RenderTargetIdentifier? TransparencyAndComposition; - public RenderTargetIdentifier? Output; - public Vector2 JitterOffset; - public Vector2 MotionVectorScale; - public Vector2Int RenderSize; - public Vector2Int InputResourceSize; - public bool EnableSharpening; - public float Sharpness; - public float FrameTimeDelta; // in seconds - public float PreExposure; - public bool Reset; - public float CameraNear; - public float CameraFar; - public float CameraFovAngleVertical; - public float ViewSpaceToMetersFactor; - - // EXPERIMENTAL reactive mask generation parameters - public bool EnableAutoReactive; - public RenderTargetIdentifier? ColorOpaqueOnly; - public float AutoTcThreshold = 0.05f; - public float AutoTcScale = 1.0f; - public float AutoReactiveScale = 5.0f; - public float AutoReactiveMax = 0.9f; - } - - /// - /// The default values for Scale, CutoffThreshold, BinaryValue and Flags. - /// - public class GenerateReactiveDescription - { - public RenderTargetIdentifier? ColorOpaqueOnly; - public RenderTargetIdentifier? ColorPreUpscale; - public RenderTargetIdentifier? OutReactive; - public Vector2Int RenderSize; - public float Scale = 0.5f; - public float CutoffThreshold = 0.2f; - public float BinaryValue = 0.9f; - public GenerateReactiveFlags Flags = GenerateReactiveFlags.ApplyTonemap | GenerateReactiveFlags.ApplyThreshold | GenerateReactiveFlags.UseComponentsMax; - } - - [Flags] - public enum GenerateReactiveFlags - { - ApplyTonemap = 1 << 0, - ApplyInverseTonemap = 1 << 1, - ApplyThreshold = 1 << 2, - UseComponentsMax = 1 << 3, - } - - [Serializable, StructLayout(LayoutKind.Sequential)] - internal struct Fsr3Constants - { - public Vector2Int renderSize; - public Vector2Int maxRenderSize; - public Vector2Int displaySize; - public Vector2Int inputColorResourceDimensions; - public Vector2Int lumaMipDimensions; - public int lumaMipLevelToUse; - public int frameIndex; - - public Vector4 deviceToViewDepth; - public Vector2 jitterOffset; - public Vector2 motionVectorScale; - public Vector2 downscaleFactor; - public Vector2 motionVectorJitterCancellation; - public float preExposure; - public float previousFramePreExposure; - public float tanHalfFOV; - public float jitterPhaseCount; - public float deltaTime; - public float dynamicResChangeFactor; - public float viewSpaceToMetersFactor; - - public int dummy; - } - - [Serializable, StructLayout(LayoutKind.Sequential)] - internal struct SpdConstants - { - public uint mips; - public uint numWorkGroups; - public uint workGroupOffsetX, workGroupOffsetY; - public uint renderSizeX, renderSizeY; - } - - [Serializable, StructLayout(LayoutKind.Sequential)] - internal struct GenerateReactiveConstants - { - public float scale; - public float threshold; - public float binaryValue; - public uint flags; - } - - [Serializable, StructLayout(LayoutKind.Sequential)] - internal struct GenerateReactiveConstants2 - { - public float autoTcThreshold; - public float autoTcScale; - public float autoReactiveScale; - public float autoReactiveMax; - } - - [Serializable, StructLayout(LayoutKind.Sequential)] - internal struct RcasConstants - { - public RcasConstants(uint sharpness, uint halfSharp) { - this.sharpness = sharpness; - this.halfSharp = halfSharp; - dummy0 = dummy1 = 0; - } - - public readonly uint sharpness; - public readonly uint halfSharp; - public readonly uint dummy0; - public readonly uint dummy1; - } - } -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3.cs.meta b/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3.cs.meta deleted file mode 100644 index 54a2b79..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3.cs.meta +++ /dev/null @@ -1,11 +0,0 @@ -fileFormatVersion: 2 -guid: 94ab360600525bb4e90c93e95e7f9489 -MonoImporter: - externalObjects: {} - serializedVersion: 2 - defaultReferences: [] - executionOrder: 0 - icon: {instanceID: 0} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Callbacks.cs b/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Callbacks.cs deleted file mode 100644 index 8dc0632..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Callbacks.cs +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright (c) 2023 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -using UnityEngine; - -namespace FidelityFX -{ - /// - /// A collection of callbacks required by the FSR3 process. - /// This allows some customization by the game dev on how to integrate FSR3 into their own game setup. - /// - public interface IFsr3Callbacks - { - Shader LoadShader(string name); - void UnloadShader(Shader shader); - ComputeShader LoadComputeShader(string name); - void UnloadComputeShader(ComputeShader shader); - - /// - /// Apply a mipmap bias to in-game textures to prevent them from becoming blurry as the internal rendering resolution lowers. - /// This will need to be customized on a per-game basis, as there is no clear universal way to determine what are "in-game" textures. - /// The default implementation will simply apply a mipmap bias to all 2D textures, which will include things like UI textures and which might miss things like terrain texture arrays. - /// - /// Depending on how your game organizes its assets, you will want to create a filter that more specifically selects the textures that need to have this mipmap bias applied. - /// You may also want to store the bias offset value and apply it to any assets that are loaded in on demand. - /// - void OnMipMapAllTextures(float biasOffset); - - void OnResetAllMipMaps(); - } - - /// - /// Default implementation of IFsr3Callbacks using simple Resources calls. - /// These are fine for testing but a proper game will want to extend and override these methods. - /// - public class Fsr3CallbacksBase : IFsr3Callbacks - { - protected float CurrentBiasOffset = 0; - protected Texture[] m_allTextures; - - public virtual Shader LoadShader(string name) { - return Resources.Load(name); - } - - public virtual void UnloadShader(Shader shader) { - Resources.UnloadAsset(shader); - } - - public virtual ComputeShader LoadComputeShader(string name) { - return Resources.Load(name); - } - - public virtual void UnloadComputeShader(ComputeShader shader) { - Resources.UnloadAsset(shader); - } - - public void OnMipMapAllTextures(float _mipMapBias) { - m_allTextures = Resources.FindObjectsOfTypeAll(typeof(Texture)) as Texture[]; - for(int i = 0; i < m_allTextures.Length; i++) { - if(m_allTextures[i].mipmapCount <= 1) - continue; - m_allTextures[i].mipMapBias = _mipMapBias; - } - } - - public void OnResetAllMipMaps() { - m_allTextures = Resources.FindObjectsOfTypeAll(typeof(Texture)) as Texture[]; - for(int i = 0; i < m_allTextures.Length; i++) { - m_allTextures[i].mipMapBias = 0; - } - m_allTextures = null; - } - } -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Callbacks.cs.meta b/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Callbacks.cs.meta deleted file mode 100644 index b51c9de..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Callbacks.cs.meta +++ /dev/null @@ -1,11 +0,0 @@ -fileFormatVersion: 2 -guid: fafc0344da622d44db8899cd5c73fbb3 -MonoImporter: - externalObjects: {} - serializedVersion: 2 - defaultReferences: [] - executionOrder: 0 - icon: {instanceID: 0} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Context.cs b/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Context.cs deleted file mode 100644 index d51f16b..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Context.cs +++ /dev/null @@ -1,577 +0,0 @@ -// Copyright (c) 2023 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -using System; -using System.Collections.Generic; -using System.Runtime.InteropServices; -using UnityEngine; -using UnityEngine.Rendering; - -namespace FidelityFX -{ - /// - /// This class loosely matches the FfxFsr3Context struct from the original FSR3 codebase. - /// It manages the various resources and compute passes required by the FSR3 process. - /// Note that this class does not know anything about Unity render pipelines; all it knows is CommandBuffers and RenderTargetIdentifiers. - /// This should make it suitable for integration with any of the available Unity render pipelines. - /// - public class Fsr3Context - { - private const int MaxQueuedFrames = 16; - - private Fsr3.ContextDescription _contextDescription; - private CommandBuffer _commandBuffer; - - private Fsr3Pipeline _depthClipPipeline; - private Fsr3Pipeline _reconstructPreviousDepthPipeline; - private Fsr3Pipeline _lockPipeline; - private Fsr3Pipeline _accumulatePipeline; - private Fsr3Pipeline _rcasPipeline; - private Fsr3Pipeline _computeLuminancePyramidPipeline; - private Fsr3Pipeline _generateReactivePipeline; - private Fsr3Pipeline _tcrAutogeneratePipeline; - - private readonly Fsr3Resources _resources = new Fsr3Resources(); - - private ComputeBuffer _fsr3ConstantsBuffer; - private readonly Fsr3.Fsr3Constants[] _fsr3ConstantsArray = { new Fsr3.Fsr3Constants() }; - private ref Fsr3.Fsr3Constants Constants => ref _fsr3ConstantsArray[0]; - - private ComputeBuffer _spdConstantsBuffer; - private readonly Fsr3.SpdConstants[] _spdConstantsArray = { new Fsr3.SpdConstants() }; - private ref Fsr3.SpdConstants SpdConsts => ref _spdConstantsArray[0]; - - private ComputeBuffer _rcasConstantsBuffer; - private readonly Fsr3.RcasConstants[] _rcasConstantsArray = new Fsr3.RcasConstants[1]; - private ref Fsr3.RcasConstants RcasConsts => ref _rcasConstantsArray[0]; - - private ComputeBuffer _generateReactiveConstantsBuffer; - private readonly Fsr3.GenerateReactiveConstants[] _generateReactiveConstantsArray = { new Fsr3.GenerateReactiveConstants() }; - private ref Fsr3.GenerateReactiveConstants GenReactiveConsts => ref _generateReactiveConstantsArray[0]; - - private ComputeBuffer _tcrAutogenerateConstantsBuffer; - private readonly Fsr3.GenerateReactiveConstants2[] _tcrAutogenerateConstantsArray = { new Fsr3.GenerateReactiveConstants2() }; - private ref Fsr3.GenerateReactiveConstants2 TcrAutoGenConsts => ref _tcrAutogenerateConstantsArray[0]; - - private bool _firstExecution; - private Vector2 _previousJitterOffset; - private int _resourceFrameIndex; - - public void Create(Fsr3.ContextDescription contextDescription) { - _contextDescription = contextDescription; - _commandBuffer = new CommandBuffer { name = "FSR3" }; - - _fsr3ConstantsBuffer = CreateConstantBuffer(); - _spdConstantsBuffer = CreateConstantBuffer(); - _rcasConstantsBuffer = CreateConstantBuffer(); - _generateReactiveConstantsBuffer = CreateConstantBuffer(); - _tcrAutogenerateConstantsBuffer = CreateConstantBuffer(); - - // Set defaults - _firstExecution = true; - _resourceFrameIndex = 0; - - Constants.displaySize = _contextDescription.DisplaySize; - - _resources.Create(_contextDescription); - CreatePipelines(); - } - - private void CreatePipelines() { - _computeLuminancePyramidPipeline = new Fsr3ComputeLuminancePyramidPass(_contextDescription, _resources, _fsr3ConstantsBuffer, _spdConstantsBuffer); - _reconstructPreviousDepthPipeline = new Fsr3ReconstructPreviousDepthPass(_contextDescription, _resources, _fsr3ConstantsBuffer); - _depthClipPipeline = new Fsr3DepthClipPass(_contextDescription, _resources, _fsr3ConstantsBuffer); - _lockPipeline = new Fsr3LockPass(_contextDescription, _resources, _fsr3ConstantsBuffer); - _accumulatePipeline = new Fsr3AccumulatePass(_contextDescription, _resources, _fsr3ConstantsBuffer); - _rcasPipeline = new Fsr3RcasPass(_contextDescription, _resources, _fsr3ConstantsBuffer, _rcasConstantsBuffer); - _generateReactivePipeline = new Fsr3GenerateReactivePass(_contextDescription, _resources, _generateReactiveConstantsBuffer); - //_tcrAutogeneratePipeline = new Fsr3TcrAutogeneratePipeline(_contextDescription, _resources, _fsr3ConstantsBuffer, _tcrAutogenerateConstantsBuffer); - } - - public void Destroy() { - DestroyPipeline(ref _tcrAutogeneratePipeline); - DestroyPipeline(ref _generateReactivePipeline); - DestroyPipeline(ref _computeLuminancePyramidPipeline); - DestroyPipeline(ref _rcasPipeline); - DestroyPipeline(ref _accumulatePipeline); - DestroyPipeline(ref _lockPipeline); - DestroyPipeline(ref _reconstructPreviousDepthPipeline); - DestroyPipeline(ref _depthClipPipeline); - - _resources.Destroy(); - - DestroyConstantBuffer(ref _tcrAutogenerateConstantsBuffer); - DestroyConstantBuffer(ref _generateReactiveConstantsBuffer); - DestroyConstantBuffer(ref _rcasConstantsBuffer); - DestroyConstantBuffer(ref _spdConstantsBuffer); - DestroyConstantBuffer(ref _fsr3ConstantsBuffer); - - _commandBuffer.Dispose(); - _commandBuffer = null; - } - - public void Dispatch(Fsr3.DispatchDescription dispatchParams) { - _commandBuffer.Clear(); - Dispatch(dispatchParams, _commandBuffer); - Graphics.ExecuteCommandBuffer(_commandBuffer); - } - - public void Dispatch(Fsr3.DispatchDescription dispatchParams, CommandBuffer commandBuffer) { - if((_contextDescription.Flags & Fsr3.InitializationFlags.EnableDebugChecking) != 0) { - DebugCheckDispatch(dispatchParams); - } - - if(_firstExecution) { - commandBuffer.SetRenderTarget(_resources.LockStatus[0]); - commandBuffer.ClearRenderTarget(false, true, Color.clear); - commandBuffer.SetRenderTarget(_resources.LockStatus[1]); - commandBuffer.ClearRenderTarget(false, true, Color.clear); - } - - - - int frameIndex = _resourceFrameIndex % 2; - bool resetAccumulation = dispatchParams.Reset || _firstExecution; - _firstExecution = false; - - // If auto exposure is enabled use the auto exposure SRV, otherwise what the app sends - if((_contextDescription.Flags & Fsr3.InitializationFlags.EnableAutoExposure) != 0) - dispatchParams.Exposure = _resources.AutoExposure; - else if(dispatchParams.Exposure == null) - dispatchParams.Exposure = _resources.DefaultExposure; - - if(dispatchParams.EnableAutoReactive) { - // Create the auto-TCR resources only when we need them - if(_resources.AutoReactive == null) - _resources.CreateTcrAutogenResources(_contextDescription); - - if(resetAccumulation) - commandBuffer.Blit(_resources.PrevPreAlpha[frameIndex ^ 1], dispatchParams.ColorOpaqueOnly ?? Fsr3ShaderIDs.SrvOpaqueOnly); - } else if(_resources.AutoReactive != null) { - // Destroy the auto-TCR resources if we don't use the feature - _resources.DestroyTcrAutogenResources(); - } - - if(dispatchParams.Reactive == null) - dispatchParams.Reactive = _resources.DefaultReactive; - if(dispatchParams.TransparencyAndComposition == null) - dispatchParams.TransparencyAndComposition = _resources.DefaultReactive; - Fsr3Resources.CreateAliasableResources(commandBuffer, _contextDescription, dispatchParams); - - SetupConstants(dispatchParams, resetAccumulation); - - // Reactive mask bias - const int threadGroupWorkRegionDim = 8; - int dispatchSrcX = (Constants.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - int dispatchSrcY = (Constants.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - int dispatchDstX = (_contextDescription.DisplaySize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - int dispatchDstY = (_contextDescription.DisplaySize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - - // Clear reconstructed depth for max depth store - if(resetAccumulation) { - commandBuffer.SetRenderTarget(_resources.LockStatus[frameIndex ^ 1]); - commandBuffer.ClearRenderTarget(false, true, Color.clear); - - commandBuffer.SetRenderTarget(_resources.InternalUpscaled[frameIndex ^ 1]); - commandBuffer.ClearRenderTarget(false, true, Color.clear); - - commandBuffer.SetRenderTarget(_resources.SceneLuminance); - commandBuffer.ClearRenderTarget(false, true, Color.clear); - - // Auto exposure always used to track luma changes in locking logic - commandBuffer.SetRenderTarget(_resources.AutoExposure); - commandBuffer.ClearRenderTarget(false, true, new Color(0f, 1e8f, 0f, 0f)); - // Reset atomic counter to 0 - commandBuffer.SetRenderTarget(_resources.SpdAtomicCounter); - commandBuffer.ClearRenderTarget(false, true, Color.clear); - } - - // FSR3: need to clear here since we need the content of this surface for frame interpolation, so clearing in the lock pass is not an option - bool depthInverted = (_contextDescription.Flags & Fsr3.InitializationFlags.EnableDepthInverted) == Fsr3.InitializationFlags.EnableDepthInverted; - commandBuffer.SetRenderTarget(Fsr3ShaderIDs.UavReconstructedPrevNearestDepth); - commandBuffer.ClearRenderTarget(false, true, depthInverted ? Color.clear : Color.white); - - // Auto exposure - SetupSpdConstants(dispatchParams, out var dispatchThreadGroupCount); - - // Initialize constant buffers data -#if UNITY_2021_1_OR_NEWER - commandBuffer.SetBufferData(_fsr3ConstantsBuffer, _fsr3ConstantsArray); - commandBuffer.SetBufferData(_spdConstantsBuffer, _spdConstantsArray); -#else - _fsr3ConstantsBuffer.SetData(_fsr3ConstantsArray); - _spdConstantsBuffer.SetData(_spdConstantsArray); -#endif - - // Auto reactive - if(dispatchParams.EnableAutoReactive) { - GenerateTransparencyCompositionReactive(dispatchParams, commandBuffer, frameIndex); - dispatchParams.Reactive = _resources.AutoReactive; - dispatchParams.TransparencyAndComposition = _resources.AutoComposition; - } - - - - // Compute luminance pyramid - _computeLuminancePyramidPipeline.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchThreadGroupCount.x, dispatchThreadGroupCount.y); - - // Reconstruct previous depth - _reconstructPreviousDepthPipeline.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); - - // Depth clip - _depthClipPipeline.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); - - // Create locks - _lockPipeline.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); - - // Accumulate - _accumulatePipeline.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchDstX, dispatchDstY); - - - if(dispatchParams.EnableSharpening) { - // Compute the constants - SetupRcasConstants(dispatchParams); -#if UNITY_2021_1_OR_NEWER - commandBuffer.SetBufferData(_rcasConstantsBuffer, _rcasConstantsArray); -#else - _rcasConstantsBuffer.SetData(_rcasConstantsArray); -#endif - // Dispatch RCAS - const int threadGroupWorkRegionDimRcas = 16; - int threadGroupsX = (Screen.width + threadGroupWorkRegionDimRcas - 1) / threadGroupWorkRegionDimRcas; - int threadGroupsY = (Screen.height + threadGroupWorkRegionDimRcas - 1) / threadGroupWorkRegionDimRcas; - _rcasPipeline.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, threadGroupsX, threadGroupsY); - } - - _resourceFrameIndex = (_resourceFrameIndex + 1) % MaxQueuedFrames; - - Fsr3Resources.DestroyAliasableResources(commandBuffer); - } - - public void GenerateReactiveMask(Fsr3.GenerateReactiveDescription dispatchParams) { - _commandBuffer.Clear(); - GenerateReactiveMask(dispatchParams, _commandBuffer); - Graphics.ExecuteCommandBuffer(_commandBuffer); - } - - public void GenerateReactiveMask(Fsr3.GenerateReactiveDescription dispatchParams, CommandBuffer commandBuffer) { - const int threadGroupWorkRegionDim = 8; - int dispatchSrcX = (dispatchParams.RenderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - int dispatchSrcY = (dispatchParams.RenderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - - GenReactiveConsts.scale = dispatchParams.Scale; - GenReactiveConsts.threshold = dispatchParams.CutoffThreshold; - GenReactiveConsts.binaryValue = dispatchParams.BinaryValue; - GenReactiveConsts.flags = (uint)dispatchParams.Flags; -#if UNITY_2021_1_OR_NEWER - commandBuffer.SetBufferData(_generateReactiveConstantsBuffer, _generateReactiveConstantsArray); -#else - _generateReactiveConstantsBuffer.SetData(_generateReactiveConstantsArray); -#endif - ((Fsr3GenerateReactivePass)_generateReactivePipeline).ScheduleDispatch(commandBuffer, dispatchParams, dispatchSrcX, dispatchSrcY); - } - - private void GenerateTransparencyCompositionReactive(Fsr3.DispatchDescription dispatchParams, CommandBuffer commandBuffer, int frameIndex) { - const int threadGroupWorkRegionDim = 8; - int dispatchSrcX = (dispatchParams.RenderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - int dispatchSrcY = (dispatchParams.RenderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - - TcrAutoGenConsts.autoTcThreshold = dispatchParams.AutoTcThreshold; - TcrAutoGenConsts.autoTcScale = dispatchParams.AutoTcScale; - TcrAutoGenConsts.autoReactiveScale = dispatchParams.AutoReactiveScale; - TcrAutoGenConsts.autoReactiveMax = dispatchParams.AutoReactiveMax; -#if UNITY_2021_1_OR_NEWER - commandBuffer.SetBufferData(_tcrAutogenerateConstantsBuffer, _tcrAutogenerateConstantsArray); -#else - _tcrAutogenerateConstantsBuffer.SetData(_tcrAutogenerateConstantsArray); -#endif - _tcrAutogeneratePipeline.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); - } - - private void SetupConstants(Fsr3.DispatchDescription dispatchParams, bool resetAccumulation) { - - - ref Fsr3.Fsr3Constants constants = ref Constants; - - constants.jitterOffset = dispatchParams.JitterOffset; - constants.renderSize = dispatchParams.RenderSize; - constants.maxRenderSize = _contextDescription.MaxRenderSize; - constants.inputColorResourceDimensions = dispatchParams.InputResourceSize; - - // Compute the horizontal FOV for the shader from the vertical one - float aspectRatio = (float)dispatchParams.RenderSize.x / dispatchParams.RenderSize.y; - float cameraAngleHorizontal = Mathf.Atan(Mathf.Tan(dispatchParams.CameraFovAngleVertical / 2.0f) * aspectRatio) * 2.0f; - constants.tanHalfFOV = Mathf.Tan(cameraAngleHorizontal * 0.5f); - constants.viewSpaceToMetersFactor = (dispatchParams.ViewSpaceToMetersFactor > 0.0f) ? dispatchParams.ViewSpaceToMetersFactor : 1.0f; - - // Compute params to enable device depth to view space depth computation in shader - constants.deviceToViewDepth = SetupDeviceDepthToViewSpaceDepthParams(dispatchParams); - - // To be updated if resource is larger than the actual image size - constants.downscaleFactor = new Vector2((float)constants.renderSize.x / _contextDescription.DisplaySize.x, (float)constants.renderSize.y / _contextDescription.DisplaySize.y); - constants.previousFramePreExposure = constants.preExposure; - constants.preExposure = (dispatchParams.PreExposure != 0) ? dispatchParams.PreExposure : 1.0f; - - // Motion vector data - Vector2Int motionVectorsTargetSize = (_contextDescription.Flags & Fsr3.InitializationFlags.EnableDisplayResolutionMotionVectors) != 0 ? constants.displaySize : constants.renderSize; - constants.motionVectorScale = dispatchParams.MotionVectorScale / motionVectorsTargetSize; - - // Compute jitter cancellation - if((_contextDescription.Flags & Fsr3.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0) { - constants.motionVectorJitterCancellation = (_previousJitterOffset - constants.jitterOffset) / motionVectorsTargetSize; - _previousJitterOffset = constants.jitterOffset; - } - - int jitterPhaseCount = Fsr3.GetJitterPhaseCount(dispatchParams.RenderSize.x, _contextDescription.DisplaySize.x); - if(resetAccumulation || constants.jitterPhaseCount == 0) { - constants.jitterPhaseCount = jitterPhaseCount; - } else { - int jitterPhaseCountDelta = (int)(jitterPhaseCount - constants.jitterPhaseCount); - if(jitterPhaseCountDelta > 0) - constants.jitterPhaseCount++; - else if(jitterPhaseCountDelta < 0) - constants.jitterPhaseCount--; - } - - // Convert delta time to seconds and clamp to [0, 1] - constants.deltaTime = Mathf.Clamp01(dispatchParams.FrameTimeDelta); - - if(resetAccumulation) - constants.frameIndex = 0; - else - constants.frameIndex++; - - // Shading change usage of the SPD mip levels - constants.lumaMipLevelToUse = Fsr3Pipeline.ShadingChangeMipLevel; - - float mipDiv = 2 << constants.lumaMipLevelToUse; - constants.lumaMipDimensions.x = (int)(constants.maxRenderSize.x / mipDiv); - constants.lumaMipDimensions.y = (int)(constants.maxRenderSize.y / mipDiv); - } - - private Vector4 SetupDeviceDepthToViewSpaceDepthParams(Fsr3.DispatchDescription dispatchParams) { - bool inverted = (_contextDescription.Flags & Fsr3.InitializationFlags.EnableDepthInverted) != 0; - bool infinite = (_contextDescription.Flags & Fsr3.InitializationFlags.EnableDepthInfinite) != 0; - - // make sure it has no impact if near and far plane values are swapped in dispatch params - // the flags "inverted" and "infinite" will decide what transform to use - float min = Mathf.Min(dispatchParams.CameraNear, dispatchParams.CameraFar); - float max = Mathf.Max(dispatchParams.CameraNear, dispatchParams.CameraFar); - - if(inverted) { - (min, max) = (max, min); - } - - float q = max / (min - max); - float d = -1.0f; - - Vector4 matrixElemC = new Vector4(q, -1.0f - Mathf.Epsilon, q, 0.0f + Mathf.Epsilon); - Vector4 matrixElemE = new Vector4(q * min, -min - Mathf.Epsilon, q * min, max); - - // Revert x and y coords - float aspect = (float)dispatchParams.RenderSize.x / dispatchParams.RenderSize.y; - float cotHalfFovY = Mathf.Cos(0.5f * dispatchParams.CameraFovAngleVertical) / Mathf.Sin(0.5f * dispatchParams.CameraFovAngleVertical); - - int matrixIndex = (inverted ? 2 : 0) + (infinite ? 1 : 0); - return new Vector4( - d * matrixElemC[matrixIndex], - matrixElemE[matrixIndex], - aspect / cotHalfFovY, - 1.0f / cotHalfFovY); - } - - private void SetupRcasConstants(Fsr3.DispatchDescription dispatchParams) { - int sharpnessIndex = Mathf.RoundToInt(Mathf.Clamp01(dispatchParams.Sharpness) * (RcasConfigs.Count - 1)); - RcasConsts = RcasConfigs[sharpnessIndex]; - } - - private void SetupSpdConstants(Fsr3.DispatchDescription dispatchParams, out Vector2Int dispatchThreadGroupCount) { - RectInt rectInfo = new RectInt(0, 0, dispatchParams.RenderSize.x, dispatchParams.RenderSize.y); - SpdSetup(rectInfo, out dispatchThreadGroupCount, out var workGroupOffset, out var numWorkGroupsAndMips); - - // Downsample - ref Fsr3.SpdConstants spdConstants = ref SpdConsts; - spdConstants.numWorkGroups = (uint)numWorkGroupsAndMips.x; - spdConstants.mips = (uint)numWorkGroupsAndMips.y; - spdConstants.workGroupOffsetX = (uint)workGroupOffset.x; - spdConstants.workGroupOffsetY = (uint)workGroupOffset.y; - spdConstants.renderSizeX = (uint)dispatchParams.RenderSize.x; - spdConstants.renderSizeY = (uint)dispatchParams.RenderSize.y; - } - - private static void SpdSetup(RectInt rectInfo, out Vector2Int dispatchThreadGroupCount, out Vector2Int workGroupOffset, out Vector2Int numWorkGroupsAndMips, int mips = -1) { - workGroupOffset = new Vector2Int(rectInfo.x / 64, rectInfo.y / 64); - - int endIndexX = (rectInfo.x + rectInfo.width - 1) / 64; - int endIndexY = (rectInfo.y + rectInfo.height - 1) / 64; - - dispatchThreadGroupCount = new Vector2Int(endIndexX + 1 - workGroupOffset.x, endIndexY + 1 - workGroupOffset.y); - - numWorkGroupsAndMips = new Vector2Int(dispatchThreadGroupCount.x * dispatchThreadGroupCount.y, mips); - if(mips < 0) { - float resolution = Math.Max(rectInfo.width, rectInfo.height); - numWorkGroupsAndMips.y = Math.Min(Mathf.FloorToInt(Mathf.Log(resolution, 2.0f)), 12); - } - } - - private void DebugCheckDispatch(Fsr3.DispatchDescription dispatchParams) { - // Global texture binding may be queued as part of the command list, which is why we check these after running the process at least once - if(!_firstExecution && !dispatchParams.Reset) { - if(!dispatchParams.Color.HasValue && Shader.GetGlobalTexture(Fsr3ShaderIDs.SrvInputColor) == null) { - Debug.LogError("Color resource is null"); - } - - if(!dispatchParams.Depth.HasValue && Shader.GetGlobalTexture(Fsr3ShaderIDs.SrvInputDepth) == null) { - Debug.LogError("Depth resource is null"); - } - - if(!dispatchParams.MotionVectors.HasValue && Shader.GetGlobalTexture(Fsr3ShaderIDs.SrvInputMotionVectors) == null) { - Debug.LogError("MotionVectors resource is null"); - } - - if(!dispatchParams.Output.HasValue && Shader.GetGlobalTexture(Fsr3ShaderIDs.UavUpscaledOutput) == null) { - Debug.LogError("Output resource is null"); - } - } - - if(dispatchParams.Exposure.HasValue && (_contextDescription.Flags & Fsr3.InitializationFlags.EnableAutoExposure) != 0) { - Debug.LogWarning("Exposure resource provided, however auto exposure flag is present"); - } - - if(Mathf.Abs(dispatchParams.JitterOffset.x) > 1.0f || Mathf.Abs(dispatchParams.JitterOffset.y) > 1.0f) { - Debug.LogWarning("JitterOffset contains value outside of expected range [-1.0, 1.0]"); - } - - if(dispatchParams.MotionVectorScale.x > _contextDescription.MaxRenderSize.x || dispatchParams.MotionVectorScale.y > _contextDescription.MaxRenderSize.y) { - Debug.LogWarning("MotionVectorScale contains scale value greater than MaxRenderSize"); - } - - if(dispatchParams.MotionVectorScale.x == 0.0f || dispatchParams.MotionVectorScale.y == 0.0f) { - Debug.LogWarning("MotionVectorScale contains zero scale value"); - } - - if(dispatchParams.RenderSize.x > _contextDescription.MaxRenderSize.x || dispatchParams.RenderSize.y > _contextDescription.MaxRenderSize.y) { - Debug.LogWarning("RenderSize is greater than context MaxRenderSize"); - } - - if(dispatchParams.RenderSize.x == 0 || dispatchParams.RenderSize.y == 0) { - Debug.LogWarning("RenderSize contains zero dimension"); - } - - if(dispatchParams.FrameTimeDelta > 1.0f) { - Debug.LogWarning("FrameTimeDelta is greater than 1.0f - this value should be seconds (~0.0166 for 60fps)"); - } - - if(dispatchParams.PreExposure == 0.0f) { - Debug.LogError("PreExposure provided as 0.0f which is invalid"); - } - - bool infiniteDepth = (_contextDescription.Flags & Fsr3.InitializationFlags.EnableDepthInfinite) != 0; - bool inverseDepth = (_contextDescription.Flags & Fsr3.InitializationFlags.EnableDepthInverted) != 0; - - if(inverseDepth) { - if(dispatchParams.CameraNear < dispatchParams.CameraFar) { - Debug.LogWarning("EnableDepthInverted flag is present yet CameraNear is less than CameraFar"); - } - - if(infiniteDepth) { - if(dispatchParams.CameraNear < float.MaxValue) { - Debug.LogWarning("EnableDepthInfinite and EnableDepthInverted present, yet CameraNear != float.MaxValue"); - } - } - - if(dispatchParams.CameraFar < 0.075f) { - Debug.LogWarning("EnableDepthInverted present, CameraFar value is very low which may result in depth separation artefacting"); - } - } else { - if(dispatchParams.CameraNear > dispatchParams.CameraFar) { - Debug.LogWarning("CameraNear is greater than CameraFar in non-inverted-depth context"); - } - - if(infiniteDepth) { - if(dispatchParams.CameraFar < float.MaxValue) { - Debug.LogWarning("EnableDepthInfinite present, yet CameraFar != float.MaxValue"); - } - } - - if(dispatchParams.CameraNear < 0.075f) { - Debug.LogWarning("CameraNear value is very low which may result in depth separation artefacting"); - } - } - - if(dispatchParams.CameraFovAngleVertical <= 0.0f) { - Debug.LogError("CameraFovAngleVertical is 0.0f - this value should be > 0.0f"); - } - - if(dispatchParams.CameraFovAngleVertical > Mathf.PI) { - Debug.LogError("CameraFovAngleVertical is greater than 180 degrees/PI"); - } - } - - /// - /// The FSR3 C++ codebase uses floats bitwise converted to ints to pass sharpness parameters to the RCAS shader. - /// This is not possible in C# without enabling unsafe code compilation, so to avoid that we instead use a table of precomputed values. - /// - private static readonly List RcasConfigs = new List() - { - new Fsr3.RcasConstants(1048576000u, 872428544u), - new Fsr3.RcasConstants(1049178080u, 877212745u), - new Fsr3.RcasConstants(1049823372u, 882390168u), - new Fsr3.RcasConstants(1050514979u, 887895276u), - new Fsr3.RcasConstants(1051256227u, 893859143u), - new Fsr3.RcasConstants(1052050675u, 900216232u), - new Fsr3.RcasConstants(1052902144u, 907032080u), - new Fsr3.RcasConstants(1053814727u, 914306687u), - new Fsr3.RcasConstants(1054792807u, 922105590u), - new Fsr3.RcasConstants(1055841087u, 930494326u), - new Fsr3.RcasConstants(1056964608u, 939538432u), - new Fsr3.RcasConstants(1057566688u, 944322633u), - new Fsr3.RcasConstants(1058211980u, 949500056u), - new Fsr3.RcasConstants(1058903587u, 955005164u), - new Fsr3.RcasConstants(1059644835u, 960969031u), - new Fsr3.RcasConstants(1060439283u, 967326120u), - new Fsr3.RcasConstants(1061290752u, 974141968u), - new Fsr3.RcasConstants(1062203335u, 981416575u), - new Fsr3.RcasConstants(1063181415u, 989215478u), - new Fsr3.RcasConstants(1064229695u, 997604214u), - new Fsr3.RcasConstants(1065353216u, 1006648320), - }; - - private static ComputeBuffer CreateConstantBuffer() where TConstants : struct { - return new ComputeBuffer(1, Marshal.SizeOf(), ComputeBufferType.Constant); - } - - private static void DestroyConstantBuffer(ref ComputeBuffer bufferRef) { - if(bufferRef == null) - return; - - bufferRef.Release(); - bufferRef = null; - } - - private static void DestroyPipeline(ref Fsr3Pipeline pipeline) { - if(pipeline == null) - return; - - pipeline.Dispose(); - pipeline = null; - } - } -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Context.cs.meta b/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Context.cs.meta deleted file mode 100644 index ac824f7..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Context.cs.meta +++ /dev/null @@ -1,11 +0,0 @@ -fileFormatVersion: 2 -guid: b513426f140cf384b88215489eb70655 -MonoImporter: - externalObjects: {} - serializedVersion: 2 - defaultReferences: [] - executionOrder: 0 - icon: {instanceID: 0} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Pipeline.cs b/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Pipeline.cs deleted file mode 100644 index 959dd68..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Pipeline.cs +++ /dev/null @@ -1,350 +0,0 @@ -// Copyright (c) 2023 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -using System; -using System.Runtime.InteropServices; -using UnityEngine; -using UnityEngine.Rendering; - -namespace FidelityFX -{ - /// - /// Base class for all of the compute passes that make up the FSR3 process. - /// This loosely matches the FfxPipelineState struct from the original FSR3 codebase, wrapped in an object-oriented blanket. - /// These classes are responsible for loading compute shaders, managing temporary resources, binding resources to shader kernels and dispatching said shaders. - /// - internal abstract class Fsr3Pipeline : IDisposable - { - internal const int ShadingChangeMipLevel = 4; // This matches the FFX_FSR3_SHADING_CHANGE_MIP_LEVEL define - - protected readonly Fsr3.ContextDescription ContextDescription; - protected readonly Fsr3Resources Resources; - protected readonly ComputeBuffer Constants; - - protected ComputeShader ComputeShader; - protected int KernelIndex; - - protected virtual bool AllowFP16 => true; - - protected Fsr3Pipeline(Fsr3.ContextDescription contextDescription, Fsr3Resources resources, ComputeBuffer constants) { - ContextDescription = contextDescription; - Resources = resources; - Constants = constants; - } - - public virtual void Dispose() { - UnloadComputeShader(); - } - - public abstract void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY); - - protected void LoadComputeShader(string name) { - LoadComputeShader(name, ContextDescription.Flags, ref ComputeShader, out KernelIndex); - } - - private void LoadComputeShader(string name, Fsr3.InitializationFlags flags, ref ComputeShader shaderRef, out int kernelIndex) { - - if(shaderRef == null) { - shaderRef = ContextDescription.Callbacks.LoadComputeShader(name); - if(shaderRef == null) - throw new MissingReferenceException($"Shader '{name}' could not be loaded! Please ensure it is included in the project correctly."); - } - - kernelIndex = shaderRef.FindKernel("CS"); - - bool useLut = false; -#if UNITY_2022_1_OR_NEWER // This will also work in 2020.3.43+ and 2021.3.14+ - if(SystemInfo.computeSubGroupSize == 64) { - useLut = true; - } -#endif - - // Allow 16-bit floating point as a configuration option, except on passes that explicitly disable it - bool supportedFP16 = ((flags & Fsr3.InitializationFlags.EnableFP16Usage) != 0 && AllowFP16); - - // This matches the permutation rules from the CreatePass* functions - if((flags & Fsr3.InitializationFlags.EnableHighDynamicRange) != 0) - shaderRef.EnableKeyword("FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT"); - if((flags & Fsr3.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) - shaderRef.EnableKeyword("FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS"); - if((flags & Fsr3.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0) - shaderRef.EnableKeyword("FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS"); - if((flags & Fsr3.InitializationFlags.EnableDepthInverted) != 0) - shaderRef.EnableKeyword("FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH"); - if(useLut) - shaderRef.EnableKeyword("FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE"); - if(supportedFP16) - shaderRef.EnableKeyword("FFX_HALF"); - -#if UNITY_HDRP - shaderRef.EnableKeyword("UNITY_FSR3UPSCALER_HDRP"); -#endif - } - - private void UnloadComputeShader() { - UnloadComputeShader(ref ComputeShader); - } - - private void UnloadComputeShader(ref ComputeShader shaderRef) { - if(shaderRef == null) - return; - - ContextDescription.Callbacks.UnloadComputeShader(shaderRef); - shaderRef = null; - } - } - - internal class Fsr3ComputeLuminancePyramidPass : Fsr3Pipeline - { - private readonly ComputeBuffer _spdConstants; - - public Fsr3ComputeLuminancePyramidPass(Fsr3.ContextDescription contextDescription, Fsr3Resources resources, ComputeBuffer constants, ComputeBuffer spdConstants) - : base(contextDescription, resources, constants) { - _spdConstants = spdConstants; - - LoadComputeShader("FSR3/ffx_fsr3upscaler_compute_luminance_pyramid_pass"); - } - - public override void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { - if(dispatchParams.Color.HasValue) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputColor, dispatchParams.Color.Value, 0, RenderTextureSubElement.Color); - - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavSpdAtomicCount, Resources.SpdAtomicCounter); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavExposureMipLumaChange, Resources.SceneLuminance, ShadingChangeMipLevel); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavExposureMip5, Resources.SceneLuminance, 5); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavAutoExposure, Resources.AutoExposure); - - commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3, Constants, 0, Marshal.SizeOf()); - commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbSpd, _spdConstants, 0, Marshal.SizeOf()); - - commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); - } - } - - internal class Fsr3ReconstructPreviousDepthPass : Fsr3Pipeline - { - public Fsr3ReconstructPreviousDepthPass(Fsr3.ContextDescription contextDescription, Fsr3Resources resources, ComputeBuffer constants) - : base(contextDescription, resources, constants) { - LoadComputeShader("FSR3/ffx_fsr3upscaler_reconstruct_previous_depth_pass"); - } - - public override void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { - if(dispatchParams.Color.HasValue) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputColor, dispatchParams.Color.Value, 0, RenderTextureSubElement.Color); - - if(dispatchParams.Depth.HasValue) { - if(dispatchParams.DepthFormat) { - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputDepth, dispatchParams.Depth.Value, 0, RenderTextureSubElement.Depth); - } else { - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputDepth, dispatchParams.Depth.Value, 0, RenderTextureSubElement.Color); - } - } - - if(dispatchParams.MotionVectors.HasValue) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputMotionVectors, dispatchParams.MotionVectors.Value); - - if(dispatchParams.Exposure.HasValue) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputExposure, dispatchParams.Exposure.Value); - - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); - - commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3, Constants, 0, Marshal.SizeOf()); - - commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); - } - } - - internal class Fsr3DepthClipPass : Fsr3Pipeline - { - public Fsr3DepthClipPass(Fsr3.ContextDescription contextDescription, Fsr3Resources resources, ComputeBuffer constants) - : base(contextDescription, resources, constants) { - LoadComputeShader("FSR3/ffx_fsr3upscaler_depth_clip_pass"); - } - - public override void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { - if(dispatchParams.Color.HasValue) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputColor, dispatchParams.Color.Value, 0, RenderTextureSubElement.Color); - - if(dispatchParams.Depth.HasValue) { - if(dispatchParams.DepthFormat) { - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputDepth, dispatchParams.Depth.Value, 0, RenderTextureSubElement.Depth); - } else { - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputDepth, dispatchParams.Depth.Value, 0, RenderTextureSubElement.Color); - } - } - - if(dispatchParams.MotionVectors.HasValue) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputMotionVectors, dispatchParams.MotionVectors.Value); - - if(dispatchParams.Exposure.HasValue) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputExposure, dispatchParams.Exposure.Value); - - if(dispatchParams.Reactive.HasValue) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvReactiveMask, dispatchParams.Reactive.Value); - - if(dispatchParams.TransparencyAndComposition.HasValue) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvTransparencyAndCompositionMask, dispatchParams.TransparencyAndComposition.Value); - - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvReconstructedPrevNearestDepth, Fsr3ShaderIDs.UavReconstructedPrevNearestDepth); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedDepth, Fsr3ShaderIDs.UavDilatedDepth); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvPrevDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex ^ 1]); - - commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3, Constants, 0, Marshal.SizeOf()); - - commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); - } - } - - internal class Fsr3LockPass : Fsr3Pipeline - { - public Fsr3LockPass(Fsr3.ContextDescription contextDescription, Fsr3Resources resources, ComputeBuffer constants) - : base(contextDescription, resources, constants) { - LoadComputeShader("FSR3/ffx_fsr3upscaler_lock_pass"); - } - - public override void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvLockInputLuma, Fsr3ShaderIDs.UavLockInputLuma); - commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3, Constants, 0, Marshal.SizeOf()); - - commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); - } - } - - internal class Fsr3AccumulatePass : Fsr3Pipeline - { - private const string SharpeningKeyword = "FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING"; - - // Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput. - protected override bool AllowFP16 => SystemInfo.graphicsDeviceVendorID != 0x10DE; - -#if UNITY_2021_2_OR_NEWER - private readonly LocalKeyword _sharpeningKeyword; -#endif - - public Fsr3AccumulatePass(Fsr3.ContextDescription contextDescription, Fsr3Resources resources, ComputeBuffer constants) - : base(contextDescription, resources, constants) { - LoadComputeShader("FSR3/ffx_fsr3upscaler_accumulate_pass"); -#if UNITY_2021_2_OR_NEWER - _sharpeningKeyword = new LocalKeyword(ComputeShader, SharpeningKeyword); -#endif - } - - public override void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { -#if UNITY_2021_2_OR_NEWER - if(dispatchParams.EnableSharpening) - commandBuffer.EnableKeyword(ComputeShader, _sharpeningKeyword); - else - commandBuffer.DisableKeyword(ComputeShader, _sharpeningKeyword); -#else - if(dispatchParams.EnableSharpening) - commandBuffer.EnableShaderKeyword(SharpeningKeyword); - else - commandBuffer.DisableShaderKeyword(SharpeningKeyword); -#endif - - if((ContextDescription.Flags & Fsr3.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); - else if(dispatchParams.MotionVectors.HasValue) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputMotionVectors, dispatchParams.MotionVectors.Value); - - if(dispatchParams.Exposure.HasValue) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputExposure, dispatchParams.Exposure.Value); - - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedReactiveMasks, Fsr3ShaderIDs.UavDilatedReactiveMasks); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInternalUpscaled, Resources.InternalUpscaled[frameIndex ^ 1]); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvLockStatus, Resources.LockStatus[frameIndex ^ 1]); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvPreparedInputColor, Fsr3ShaderIDs.UavPreparedInputColor); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvLanczosLut, Resources.LanczosLut); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvUpscaleMaximumBiasLut, Resources.MaximumBiasLut); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvSceneLuminanceMips, Resources.SceneLuminance); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvAutoExposure, Resources.AutoExposure); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvLumaHistory, Resources.LumaHistory[frameIndex ^ 1]); - - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavInternalUpscaled, Resources.InternalUpscaled[frameIndex]); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavLockStatus, Resources.LockStatus[frameIndex]); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavLumaHistory, Resources.LumaHistory[frameIndex]); - - if(dispatchParams.Output.HasValue) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavUpscaledOutput, dispatchParams.Output.Value); - - commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3, Constants, 0, Marshal.SizeOf()); - - commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); - } - } - - internal class Fsr3RcasPass : Fsr3Pipeline - { - private readonly ComputeBuffer _rcasConstants; - - public Fsr3RcasPass(Fsr3.ContextDescription contextDescription, Fsr3Resources resources, ComputeBuffer constants, ComputeBuffer rcasConstants) - : base(contextDescription, resources, constants) { - _rcasConstants = rcasConstants; - - LoadComputeShader("FSR3/ffx_fsr3upscaler_rcas_pass"); - } - - public override void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { - if(dispatchParams.Exposure.HasValue) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputExposure, dispatchParams.Exposure.Value); - - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvRcasInput, Resources.InternalUpscaled[frameIndex]); - - if(dispatchParams.Output.HasValue) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavUpscaledOutput, dispatchParams.Output.Value); - - commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3, Constants, 0, Marshal.SizeOf()); - commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbRcas, _rcasConstants, 0, Marshal.SizeOf()); - - commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); - } - } - - internal class Fsr3GenerateReactivePass : Fsr3Pipeline - { - private readonly ComputeBuffer _generateReactiveConstants; - - public Fsr3GenerateReactivePass(Fsr3.ContextDescription contextDescription, Fsr3Resources resources, ComputeBuffer generateReactiveConstants) - : base(contextDescription, resources, null) { - _generateReactiveConstants = generateReactiveConstants; - - LoadComputeShader("FSR3/ffx_fsr3upscaler_autogen_reactive_pass"); - } - - public override void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { - } - - public void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3.GenerateReactiveDescription dispatchParams, int dispatchX, int dispatchY) { - if(dispatchParams.ColorOpaqueOnly.HasValue) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvOpaqueOnly, dispatchParams.ColorOpaqueOnly.Value, 0, RenderTextureSubElement.Color); - - if(dispatchParams.ColorPreUpscale.HasValue) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputColor, dispatchParams.ColorPreUpscale.Value, 0, RenderTextureSubElement.Color); - - if(dispatchParams.OutReactive.HasValue) - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavAutoReactive, dispatchParams.OutReactive.Value); - - commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbGenReactive, _generateReactiveConstants, 0, Marshal.SizeOf()); - - commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); - } - } -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Pipeline.cs.meta b/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Pipeline.cs.meta deleted file mode 100644 index fbf3dde..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Pipeline.cs.meta +++ /dev/null @@ -1,11 +0,0 @@ -fileFormatVersion: 2 -guid: 0152893517f232641a46bbf7b6d7bcd1 -MonoImporter: - externalObjects: {} - serializedVersion: 2 - defaultReferences: [] - executionOrder: 0 - icon: {instanceID: 0} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Resources.cs b/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Resources.cs deleted file mode 100644 index f302254..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Resources.cs +++ /dev/null @@ -1,253 +0,0 @@ -// Copyright (c) 2023 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -using System; -using UnityEngine; -using UnityEngine.Experimental.Rendering; -using UnityEngine.Rendering; - -namespace FidelityFX -{ - /// - /// Helper class for bundling and managing persistent resources required by the FSR3 process. - /// This includes lookup tables, default fallback resources and double-buffered resources that get swapped between frames. - /// - internal class Fsr3Resources - { - public Texture2D DefaultExposure; - public Texture2D DefaultReactive; - public Texture2D LanczosLut; - public Texture2D MaximumBiasLut; - public RenderTexture SpdAtomicCounter; - public RenderTexture AutoExposure; - public RenderTexture SceneLuminance; - public RenderTexture AutoReactive; - public RenderTexture AutoComposition; - public readonly RenderTexture[] DilatedMotionVectors = new RenderTexture[2]; - public readonly RenderTexture[] LockStatus = new RenderTexture[2]; - public readonly RenderTexture[] InternalUpscaled = new RenderTexture[2]; - public readonly RenderTexture[] LumaHistory = new RenderTexture[2]; - public readonly RenderTexture[] PrevPreAlpha = new RenderTexture[2]; - public readonly RenderTexture[] PrevPostAlpha = new RenderTexture[2]; - - public void Create(Fsr3.ContextDescription contextDescription) - { - // Generate the data for the LUT - const int lanczos2LutWidth = 128; - float[] lanczos2Weights = new float[lanczos2LutWidth]; - for (int currentLanczosWidthIndex = 0; currentLanczosWidthIndex < lanczos2LutWidth; ++currentLanczosWidthIndex) - { - float x = 2.0f * currentLanczosWidthIndex / (lanczos2LutWidth - 1); - float y = Fsr3.Lanczos2(x); - lanczos2Weights[currentLanczosWidthIndex] = y; - } - - float[] maximumBias = new float[MaximumBiasTextureWidth * MaximumBiasTextureHeight]; - for (int i = 0; i < maximumBias.Length; ++i) - { - maximumBias[i] = MaximumBias[i] / 2.0f; - } - - // Resource FSR3_LanczosLutData: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R16_SNORM, FFX_RESOURCE_FLAGS_NONE - // R16_SNorm textures are not supported by Unity on most platforms, strangely enough. So instead we use R32_SFloat and upload pre-normalized float data. - LanczosLut = new Texture2D(lanczos2LutWidth, 1, GraphicsFormat.R32_SFloat, TextureCreationFlags.None) { name = "FSR3_LanczosLutData" }; - LanczosLut.SetPixelData(lanczos2Weights, 0); - LanczosLut.Apply(); - - // Resource FSR3_MaximumUpsampleBias: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R16_SNORM, FFX_RESOURCE_FLAGS_NONE - MaximumBiasLut = new Texture2D(MaximumBiasTextureWidth, MaximumBiasTextureHeight, GraphicsFormat.R32_SFloat, TextureCreationFlags.None) { name = "FSR3_MaximumUpsampleBias" }; - MaximumBiasLut.SetPixelData(maximumBias, 0); - MaximumBiasLut.Apply(); - - // Resource FSR3_DefaultExposure: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R32G32_FLOAT, FFX_RESOURCE_FLAGS_NONE - DefaultExposure = new Texture2D(1, 1, GraphicsFormat.R32G32_SFloat, TextureCreationFlags.None) { name = "FSR3_DefaultExposure" }; - DefaultExposure.SetPixel(0, 0, Color.clear); - DefaultExposure.Apply(); - - // Resource FSR3_DefaultReactivityMask: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_NONE - DefaultReactive = new Texture2D(1, 1, GraphicsFormat.R8_UNorm, TextureCreationFlags.None) { name = "FSR3_DefaultReactivityMask" }; - DefaultReactive.SetPixel(0, 0, Color.clear); - DefaultReactive.Apply(); - - // Resource FSR3_SpdAtomicCounter: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, FFX_RESOURCE_FLAGS_ALIASABLE - // Despite what the original FSR3 codebase says, this resource really isn't aliasable. Resetting this counter to 0 every frame breaks auto-exposure on MacOS Metal. - SpdAtomicCounter = new RenderTexture(1, 1, 0, GraphicsFormat.R32_UInt) { name = "FSR3_SpdAtomicCounter", enableRandomWrite = true }; - SpdAtomicCounter.Create(); - - // Resource FSR3_AutoExposure: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32G32_FLOAT, FFX_RESOURCE_FLAGS_NONE - AutoExposure = new RenderTexture(1, 1, 0, GraphicsFormat.R32G32_SFloat) { name = "FSR3_AutoExposure", enableRandomWrite = true }; - AutoExposure.Create(); - - // Resource FSR3_ExposureMips: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE - // This is a rather special case: it's an aliasable resource, but because we require a mipmap chain and bind specific mip levels per shader, we can't easily use temporary RTs for this. - int w = contextDescription.MaxRenderSize.x / 2, h = contextDescription.MaxRenderSize.y / 2; - int mipCount = 1 + Mathf.FloorToInt(Mathf.Log(Math.Max(w, h), 2.0f)); - SceneLuminance = new RenderTexture(w, h, 0, GraphicsFormat.R16_SFloat, mipCount) { name = "FSR3_ExposureMips", enableRandomWrite = true, useMipMap = true, autoGenerateMips = false }; - SceneLuminance.Create(); - - // Resources FSR3_InternalDilatedVelocity1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16_FLOAT, FFX_RESOURCE_FLAGS_NONE - CreateDoubleBufferedResource(DilatedMotionVectors, "FSR3_InternalDilatedVelocity", contextDescription.MaxRenderSize, GraphicsFormat.R16G16_SFloat); - - // Resources FSR3_LockStatus1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16_FLOAT, FFX_RESOURCE_FLAGS_NONE - CreateDoubleBufferedResource(LockStatus, "FSR3_LockStatus", contextDescription.DisplaySize, GraphicsFormat.R16G16_SFloat); - - // Resources FSR3_InternalUpscaled1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, FFX_RESOURCE_FLAGS_NONE - CreateDoubleBufferedResource(InternalUpscaled, "FSR3_InternalUpscaled", contextDescription.DisplaySize, GraphicsFormat.R16G16B16A16_SFloat); - - // Resources FSR3_LumaHistory1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, FFX_RESOURCE_FLAGS_NONE - CreateDoubleBufferedResource(LumaHistory, "FSR3_LumaHistory", contextDescription.DisplaySize, GraphicsFormat.R8G8B8A8_UNorm); - } - - public void CreateTcrAutogenResources(Fsr3.ContextDescription contextDescription) - { - // Resource FSR3_AutoReactive: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_NONE - AutoReactive = new RenderTexture(contextDescription.MaxRenderSize.x, contextDescription.MaxRenderSize.y, 0, GraphicsFormat.R8_UNorm) { name = "FSR3_AutoReactive", enableRandomWrite = true }; - AutoReactive.Create(); - - // Resource FSR3_AutoComposition: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_NONE - AutoComposition = new RenderTexture(contextDescription.MaxRenderSize.x, contextDescription.MaxRenderSize.y, 0, GraphicsFormat.R8_UNorm) { name = "FSR3_AutoComposition", enableRandomWrite = true }; - AutoComposition.Create(); - - // Resources FSR3_PrevPreAlpha0/1: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R11G11B10_FLOAT, FFX_RESOURCE_FLAGS_NONE - CreateDoubleBufferedResource(PrevPreAlpha, "FSR3_PrevPreAlpha", contextDescription.MaxRenderSize, GraphicsFormat.B10G11R11_UFloatPack32); - - // Resources FSR3_PrevPostAlpha0/1: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R11G11B10_FLOAT, FFX_RESOURCE_FLAGS_NONE - CreateDoubleBufferedResource(PrevPostAlpha, "FSR3_PrevPostAlpha", contextDescription.MaxRenderSize, GraphicsFormat.B10G11R11_UFloatPack32); - } - - // Set up shared aliasable resources, i.e. temporary render textures - // These do not need to persist between frames, but they do need to be available between passes - public static void CreateAliasableResources(CommandBuffer commandBuffer, Fsr3.ContextDescription contextDescription, Fsr3.DispatchDescription dispatchParams) - { - Vector2Int displaySize = contextDescription.DisplaySize; - Vector2Int maxRenderSize = contextDescription.MaxRenderSize; - - // FSR3_ReconstructedPrevNearestDepth: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, FFX_RESOURCE_FLAGS_ALIASABLE - commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavReconstructedPrevNearestDepth, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R32_UInt, 1, true); - - // FSR3_DilatedDepth: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE - commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavDilatedDepth, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R32_SFloat, 1, true); - - // FSR3_LockInputLuma: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE - commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavLockInputLuma, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16_SFloat, 1, true); - - // FSR3_DilatedReactiveMasks: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8G8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE - commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavDilatedReactiveMasks, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R8G8_UNorm, 1, true); - - // FSR3_PreparedInputColor: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE - commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavPreparedInputColor, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16G16B16A16_SFloat, 1, true); - - // FSR3_NewLocks: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE - commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavNewLocks, displaySize.x, displaySize.y, 0, default, GraphicsFormat.R8_UNorm, 1, true); - } - - public static void DestroyAliasableResources(CommandBuffer commandBuffer) - { - // Release all of the aliasable resources used this frame - commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavReconstructedPrevNearestDepth); - commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavDilatedDepth); - commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavLockInputLuma); - commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavDilatedReactiveMasks); - commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavPreparedInputColor); - commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavNewLocks); - } - - private static void CreateDoubleBufferedResource(RenderTexture[] resource, string name, Vector2Int size, GraphicsFormat format) - { - for (int i = 0; i < 2; ++i) - { - resource[i] = new RenderTexture(size.x, size.y, 0, format) { name = name + (i + 1), enableRandomWrite = true }; - resource[i].Create(); - } - } - - public void Destroy() - { - DestroyTcrAutogenResources(); - - DestroyResource(LumaHistory); - DestroyResource(InternalUpscaled); - DestroyResource(LockStatus); - DestroyResource(DilatedMotionVectors); - DestroyResource(ref SceneLuminance); - DestroyResource(ref AutoExposure); - DestroyResource(ref DefaultReactive); - DestroyResource(ref SpdAtomicCounter); - DestroyResource(ref DefaultExposure); - DestroyResource(ref MaximumBiasLut); - DestroyResource(ref LanczosLut); - } - - public void DestroyTcrAutogenResources() - { - DestroyResource(PrevPostAlpha); - DestroyResource(PrevPreAlpha); - DestroyResource(ref AutoComposition); - DestroyResource(ref AutoReactive); - } - - private static void DestroyResource(ref Texture2D resource) - { - if (resource == null) - return; - - UnityEngine.Object.Destroy(resource); - resource = null; - } - - private static void DestroyResource(ref RenderTexture resource) - { - if (resource == null) - return; - - resource.Release(); - UnityEngine.Object.Destroy(resource); - resource = null; - } - - private static void DestroyResource(RenderTexture[] resource) - { - for (int i = 0; i < resource.Length; ++i) - DestroyResource(ref resource[i]); - } - - private const int MaximumBiasTextureWidth = 16; - private const int MaximumBiasTextureHeight = 16; - private static readonly float[] MaximumBias = - { - 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.876f, 1.809f, 1.772f, 1.753f, 1.748f, - 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.869f, 1.801f, 1.764f, 1.745f, 1.739f, - 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.976f, 1.841f, 1.774f, 1.737f, 1.716f, 1.71f, - 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.914f, 1.784f, 1.716f, 1.673f, 1.649f, 1.641f, - 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.793f, 1.676f, 1.604f, 1.562f, 1.54f, 1.533f, - 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.802f, 1.619f, 1.536f, 1.492f, 1.467f, 1.454f, 1.449f, - 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.812f, 1.575f, 1.496f, 1.456f, 1.432f, 1.416f, 1.408f, 1.405f, - 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.555f, 1.479f, 1.438f, 1.413f, 1.398f, 1.387f, 1.381f, 1.379f, - 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.812f, 1.555f, 1.474f, 1.43f, 1.404f, 1.387f, 1.376f, 1.368f, 1.363f, 1.362f, - 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.802f, 1.575f, 1.479f, 1.43f, 1.401f, 1.382f, 1.369f, 1.36f, 1.354f, 1.351f, 1.35f, - 2.0f, 2.0f, 1.976f, 1.914f, 1.793f, 1.619f, 1.496f, 1.438f, 1.404f, 1.382f, 1.367f, 1.357f, 1.349f, 1.344f, 1.341f, 1.34f, - 1.876f, 1.869f, 1.841f, 1.784f, 1.676f, 1.536f, 1.456f, 1.413f, 1.387f, 1.369f, 1.357f, 1.347f, 1.341f, 1.336f, 1.333f, 1.332f, - 1.809f, 1.801f, 1.774f, 1.716f, 1.604f, 1.492f, 1.432f, 1.398f, 1.376f, 1.36f, 1.349f, 1.341f, 1.335f, 1.33f, 1.328f, 1.327f, - 1.772f, 1.764f, 1.737f, 1.673f, 1.562f, 1.467f, 1.416f, 1.387f, 1.368f, 1.354f, 1.344f, 1.336f, 1.33f, 1.326f, 1.323f, 1.323f, - 1.753f, 1.745f, 1.716f, 1.649f, 1.54f, 1.454f, 1.408f, 1.381f, 1.363f, 1.351f, 1.341f, 1.333f, 1.328f, 1.323f, 1.321f, 1.32f, - 1.748f, 1.739f, 1.71f, 1.641f, 1.533f, 1.449f, 1.405f, 1.379f, 1.362f, 1.35f, 1.34f, 1.332f, 1.327f, 1.323f, 1.32f, 1.319f, - }; - } -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Resources.cs.meta b/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Resources.cs.meta deleted file mode 100644 index b309703..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3Resources.cs.meta +++ /dev/null @@ -1,11 +0,0 @@ -fileFormatVersion: 2 -guid: 0e9facfbb0f9e3f4d83aecf76695076a -MonoImporter: - externalObjects: {} - serializedVersion: 2 - defaultReferences: [] - executionOrder: 0 - icon: {instanceID: 0} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3ShaderIDs.cs b/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3ShaderIDs.cs deleted file mode 100644 index 072fe47..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3ShaderIDs.cs +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) 2023 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -using UnityEngine; - -namespace FidelityFX -{ - public static class Fsr3ShaderIDs - { - // Shader resource views, i.e. read-only bindings - public static readonly int SrvInputColor = Shader.PropertyToID("r_input_color_jittered"); - internal static readonly int SrvOpaqueOnly = Shader.PropertyToID("r_input_opaque_only"); - public static readonly int SrvInputMotionVectors = Shader.PropertyToID("r_input_motion_vectors"); - public static readonly int SrvInputDepth = Shader.PropertyToID("r_input_depth"); - internal static readonly int SrvInputExposure = Shader.PropertyToID("r_input_exposure"); - internal static readonly int SrvAutoExposure = Shader.PropertyToID("r_auto_exposure"); - internal static readonly int SrvReactiveMask = Shader.PropertyToID("r_reactive_mask"); - internal static readonly int SrvTransparencyAndCompositionMask = Shader.PropertyToID("r_transparency_and_composition_mask"); - internal static readonly int SrvReconstructedPrevNearestDepth = Shader.PropertyToID("r_reconstructed_previous_nearest_depth"); - internal static readonly int SrvDilatedMotionVectors = Shader.PropertyToID("r_dilated_motion_vectors"); - internal static readonly int SrvPrevDilatedMotionVectors = Shader.PropertyToID("r_previous_dilated_motion_vectors"); - internal static readonly int SrvDilatedDepth = Shader.PropertyToID("r_dilated_depth"); - internal static readonly int SrvInternalUpscaled = Shader.PropertyToID("r_internal_upscaled_color"); - internal static readonly int SrvLockStatus = Shader.PropertyToID("r_lock_status"); - internal static readonly int SrvLockInputLuma = Shader.PropertyToID("r_lock_input_luma"); - internal static readonly int SrvPreparedInputColor = Shader.PropertyToID("r_prepared_input_color"); - internal static readonly int SrvLumaHistory = Shader.PropertyToID("r_luma_history"); - internal static readonly int SrvRcasInput = Shader.PropertyToID("r_rcas_input"); - internal static readonly int SrvLanczosLut = Shader.PropertyToID("r_lanczos_lut"); - internal static readonly int SrvSceneLuminanceMips = Shader.PropertyToID("r_imgMips"); - internal static readonly int SrvUpscaleMaximumBiasLut = Shader.PropertyToID("r_upsample_maximum_bias_lut"); - internal static readonly int SrvDilatedReactiveMasks = Shader.PropertyToID("r_dilated_reactive_masks"); - internal static readonly int SrvPrevColorPreAlpha = Shader.PropertyToID("r_input_prev_color_pre_alpha"); - internal static readonly int SrvPrevColorPostAlpha = Shader.PropertyToID("r_input_prev_color_post_alpha"); - - // Unordered access views, i.e. random read/write bindings - internal static readonly int UavReconstructedPrevNearestDepth = Shader.PropertyToID("rw_reconstructed_previous_nearest_depth"); - internal static readonly int UavDilatedMotionVectors = Shader.PropertyToID("rw_dilated_motion_vectors"); - internal static readonly int UavDilatedDepth = Shader.PropertyToID("rw_dilated_depth"); - internal static readonly int UavInternalUpscaled = Shader.PropertyToID("rw_internal_upscaled_color"); - internal static readonly int UavLockStatus = Shader.PropertyToID("rw_lock_status"); - internal static readonly int UavLockInputLuma = Shader.PropertyToID("rw_lock_input_luma"); - internal static readonly int UavNewLocks = Shader.PropertyToID("rw_new_locks"); - internal static readonly int UavPreparedInputColor = Shader.PropertyToID("rw_prepared_input_color"); - internal static readonly int UavLumaHistory = Shader.PropertyToID("rw_luma_history"); - internal static readonly int UavUpscaledOutput = Shader.PropertyToID("rw_upscaled_output"); - internal static readonly int UavExposureMipLumaChange = Shader.PropertyToID("rw_img_mip_shading_change"); - internal static readonly int UavExposureMip5 = Shader.PropertyToID("rw_img_mip_5"); - internal static readonly int UavDilatedReactiveMasks = Shader.PropertyToID("rw_dilated_reactive_masks"); - internal static readonly int UavAutoExposure = Shader.PropertyToID("rw_auto_exposure"); - internal static readonly int UavSpdAtomicCount = Shader.PropertyToID("rw_spd_global_atomic"); - public static readonly int UavAutoReactive = Shader.PropertyToID("rw_output_autoreactive"); - internal static readonly int UavAutoComposition = Shader.PropertyToID("rw_output_autocomposition"); - internal static readonly int UavPrevColorPreAlpha = Shader.PropertyToID("rw_output_prev_color_pre_alpha"); - internal static readonly int UavPrevColorPostAlpha = Shader.PropertyToID("rw_output_prev_color_post_alpha"); - - // Constant buffer bindings - internal static readonly int CbFsr3 = Shader.PropertyToID("cbFSR3Upscaler"); - internal static readonly int CbSpd = Shader.PropertyToID("cbSPD"); - internal static readonly int CbRcas = Shader.PropertyToID("cbRCAS"); - internal static readonly int CbGenReactive = Shader.PropertyToID("cbGenerateReactive"); - } -} diff --git a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3ShaderIDs.cs.meta b/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3ShaderIDs.cs.meta deleted file mode 100644 index b8436c7..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Scripts/Fsr3ShaderIDs.cs.meta +++ /dev/null @@ -1,11 +0,0 @@ -fileFormatVersion: 2 -guid: 8737d8a870d88b045902805356eedc3f -MonoImporter: - externalObjects: {} - serializedVersion: 2 - defaultReferences: [] - executionOrder: 0 - icon: {instanceID: 0} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/Plugins/Scripts/LICENSE.txt b/Assets/AEG FSR/Runtime/Plugins/Scripts/LICENSE.txt deleted file mode 100644 index 0251ef6..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Scripts/LICENSE.txt +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) 2023 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. \ No newline at end of file diff --git a/Assets/AEG FSR/Runtime/Plugins/Scripts/LICENSE.txt.meta b/Assets/AEG FSR/Runtime/Plugins/Scripts/LICENSE.txt.meta deleted file mode 100644 index cfa5f74..0000000 --- a/Assets/AEG FSR/Runtime/Plugins/Scripts/LICENSE.txt.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 5b0355b580fee1b4c90a081da804fd3e -TextScriptImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/URP.meta b/Assets/AEG FSR/Runtime/URP.meta deleted file mode 100644 index 68cb433..0000000 --- a/Assets/AEG FSR/Runtime/URP.meta +++ /dev/null @@ -1,8 +0,0 @@ -fileFormatVersion: 2 -guid: 98dc57e5147b0924fad33b9aa5ca6c7b -folderAsset: yes -DefaultImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/URP/FSR3_URP.cs b/Assets/AEG FSR/Runtime/URP/FSR3_URP.cs deleted file mode 100644 index 0389773..0000000 --- a/Assets/AEG FSR/Runtime/URP/FSR3_URP.cs +++ /dev/null @@ -1,416 +0,0 @@ -using System; -using System.Reflection; -using UnityEngine; -using UnityEngine.Rendering; -using UnityEngine.Rendering.Universal; -using UnityEngine.Experimental.Rendering; -using System.Collections.Generic; -using System.Runtime.InteropServices; -using FidelityFX; - -namespace AEG.FSR -{ - [RequireComponent(typeof(Camera))] - public class FSR3_URP : FSR3_BASE - { - //Rendertextures - private RTHandleSystem RTHandleS; - public RTHandle m_opaqueOnlyColorBuffer; - public RTHandle m_afterOpaqueOnlyColorBuffer; - public RTHandle m_reactiveMaskOutput; - //public RTHandle m_colorBuffer; - public RTHandle m_fsrOutput; - public bool m_autoHDR; - - - private List fsrScriptableRenderFeature; - private bool containsRenderFeature = false; - - private Matrix4x4 m_jitterMatrix; - private Matrix4x4 m_projectionMatrix; - - //UniversalRenderPipelineAsset - private UniversalRenderPipelineAsset UniversalRenderPipelineAsset; - private UniversalAdditionalCameraData m_cameraData; - - private GraphicsFormat m_graphicsFormat = GraphicsFormat.R16G16B16A16_SFloat; - public GraphicsFormat m_prevGraphicsFormat; - - public readonly Fsr3.DispatchDescription m_dispatchDescription = new Fsr3.DispatchDescription(); - public readonly Fsr3.GenerateReactiveDescription m_genReactiveDescription = new Fsr3.GenerateReactiveDescription(); - private IFsr3Callbacks Callbacks { get; set; } = new Fsr3CallbacksBase(); - public Fsr3Context m_context; - - public bool m_cameraStacking = false; - public Camera m_topCamera; - private int m_prevCameraStackCount; - private bool m_isBaseCamera; - private List m_prevCameraStack = new List(); - private FSR_Quality m_prevStackQuality = (FSR_Quality)(-1); - - protected override void InitializeFSR() { - base.InitializeFSR(); - m_mainCamera.depthTextureMode = DepthTextureMode.Depth | DepthTextureMode.MotionVectors; - - SetupResolution(); - - if(!m_fsrInitialized) { - RenderPipelineManager.beginCameraRendering += PreRenderCamera; - RenderPipelineManager.endCameraRendering += PostRenderCamera; - } - - if(m_cameraData == null) { - m_cameraData = m_mainCamera.GetUniversalAdditionalCameraData(); - if(m_cameraData != null) { - if(m_cameraData.renderType == CameraRenderType.Base) { - m_isBaseCamera = true; - // delete by james - //SetupCameraStacking(); - } - } - } - } - - /// - /// Sets up the buffers, initializes the fsr context, and sets up the command buffer - /// Must be recalled whenever the display resolution changes - /// - private void SetupCommandBuffer() { - if(m_fsrOutput != null) { - m_fsrOutput.Release(); - if(m_opaqueOnlyColorBuffer != null) { - m_opaqueOnlyColorBuffer.Release(); - m_afterOpaqueOnlyColorBuffer.Release(); - m_reactiveMaskOutput.Release(); - } - - if(m_genReactiveDescription.ColorOpaqueOnly != null) { - m_genReactiveDescription.ColorOpaqueOnly = null; - m_genReactiveDescription.ColorPreUpscale = null; - m_genReactiveDescription.OutReactive = null; - m_dispatchDescription.Reactive = null; - } - } - - if(fsrScriptableRenderFeature != null) { - for(int i = 0; i < fsrScriptableRenderFeature.Count; i++) { - fsrScriptableRenderFeature[i].OnDispose(); - } - } else { - containsRenderFeature = GetRenderFeature(); - } - SetDynamicResolution(m_scaleFactor); - - m_renderWidth = (int)(m_mainCamera.pixelWidth / m_scaleFactor); - m_renderHeight = (int)(m_mainCamera.pixelHeight / m_scaleFactor); - - m_fsrOutput = RTHandleS.Alloc(m_mainCamera.pixelWidth, m_mainCamera.pixelHeight, enableRandomWrite: true, colorFormat: m_graphicsFormat, msaaSamples: MSAASamples.None, name: "FSR OUTPUT"); - - m_dispatchDescription.InputResourceSize = new Vector2Int(m_renderWidth, m_renderHeight); - m_dispatchDescription.Output = m_fsrOutput; - - if(generateReactiveMask) { - m_opaqueOnlyColorBuffer = RTHandleS.Alloc(m_renderWidth, m_renderHeight, enableRandomWrite: false, colorFormat: m_graphicsFormat, msaaSamples: MSAASamples.None, name: "OPAQUE ONLY BUFFER"); - m_afterOpaqueOnlyColorBuffer = RTHandleS.Alloc(m_renderWidth, m_renderHeight, enableRandomWrite: false, colorFormat: m_graphicsFormat, msaaSamples: MSAASamples.None, name: "AFTER OPAQUE ONLY BUFFER"); - m_reactiveMaskOutput = RTHandleS.Alloc(m_renderWidth, m_renderHeight, enableRandomWrite: true, colorFormat: m_graphicsFormat, msaaSamples: MSAASamples.None, name: "FSR REACTIVE MASK OUTPUT"); - - m_genReactiveDescription.ColorOpaqueOnly = m_opaqueOnlyColorBuffer; - m_genReactiveDescription.ColorPreUpscale = m_afterOpaqueOnlyColorBuffer; - m_genReactiveDescription.OutReactive = m_reactiveMaskOutput; - m_dispatchDescription.Reactive = m_genReactiveDescription.OutReactive; - } - - if(!containsRenderFeature) { - Debug.LogError("Current Universal Render Data is missing the 'FSR Scriptable Render Pass URP' Rendering Feature"); - } else { - for(int i = 0; i < fsrScriptableRenderFeature.Count; i++) { - fsrScriptableRenderFeature[i].OnSetReference(this); - } - } - for(int i = 0; i < fsrScriptableRenderFeature.Count; i++) { - fsrScriptableRenderFeature[i].IsEnabled = true; - } - } - - - private bool GetRenderFeature() { - fsrScriptableRenderFeature = new List(); - - UniversalRenderPipelineAsset = GraphicsSettings.currentRenderPipeline as UniversalRenderPipelineAsset; - bool fsrScriptableRenderFeatureFound = false; - if(UniversalRenderPipelineAsset != null) { - UniversalRenderPipelineAsset.upscalingFilter = UpscalingFilterSelection.Linear; - UniversalRenderPipelineAsset.msaaSampleCount = (int)MsaaQuality.Disabled; - - var type = UniversalRenderPipelineAsset.GetType(); - var propertyInfo = type.GetField("m_RendererDataList", BindingFlags.Instance | BindingFlags.NonPublic); - - if(propertyInfo != null) { - var scriptableRenderData = (ScriptableRendererData[])propertyInfo.GetValue(UniversalRenderPipelineAsset); - - - - if(scriptableRenderData != null && scriptableRenderData.Length > 0) { - foreach(var renderData in scriptableRenderData) { - - foreach(var rendererFeature in renderData.rendererFeatures) { - - FSRScriptableRenderFeature _renderFeature = rendererFeature as FSRScriptableRenderFeature; - if(_renderFeature != null) { - fsrScriptableRenderFeature.Add(rendererFeature as FSRScriptableRenderFeature); - fsrScriptableRenderFeatureFound = true; - } - } - } - } - } - } else { - Debug.LogError("FSR 2: Can't find UniversalRenderPipelineAsset"); - } - - return fsrScriptableRenderFeatureFound; - } - - void PreRenderCamera(ScriptableRenderContext context, Camera cameras) { - - if(cameras != m_mainCamera) { - return; - } - - // Set up the parameters to auto-generate a reactive mask - if(generateReactiveMask) { - m_genReactiveDescription.RenderSize = new Vector2Int(m_renderWidth, m_renderHeight); - m_genReactiveDescription.Scale = autoReactiveScale; - m_genReactiveDescription.CutoffThreshold = autoTcThreshold; - m_genReactiveDescription.BinaryValue = autoReactiveBinaryValue; - - m_genReactiveDescription.Flags = reactiveFlags; - } - - m_dispatchDescription.Exposure = null; - m_dispatchDescription.PreExposure = 1; - m_dispatchDescription.EnableSharpening = sharpening; - m_dispatchDescription.Sharpness = sharpness; - m_dispatchDescription.MotionVectorScale.x = -m_renderWidth; - m_dispatchDescription.MotionVectorScale.y = -m_renderHeight; - m_dispatchDescription.RenderSize = new Vector2Int(m_renderWidth, m_renderHeight); - m_dispatchDescription.FrameTimeDelta = Time.deltaTime; - m_dispatchDescription.CameraNear = m_mainCamera.nearClipPlane; - m_dispatchDescription.CameraFar = m_mainCamera.farClipPlane; - m_dispatchDescription.CameraFovAngleVertical = m_mainCamera.fieldOfView * Mathf.Deg2Rad; - m_dispatchDescription.ViewSpaceToMetersFactor = 1.0f; - m_dispatchDescription.Reset = m_resetCamera; - m_resetCamera = false; - - if(SystemInfo.usesReversedZBuffer) { - // Swap the near and far clip plane distances as FSR2 expects this when using inverted depth - (m_dispatchDescription.CameraNear, m_dispatchDescription.CameraFar) = (m_dispatchDescription.CameraFar, m_dispatchDescription.CameraNear); - } - - JitterCameraMatrix(); - - if(UniversalRenderPipelineAsset != GraphicsSettings.currentRenderPipeline as UniversalRenderPipelineAsset) { - for(int i = 0; i < fsrScriptableRenderFeature.Count; i++) { - fsrScriptableRenderFeature[i].OnDispose(); - } - fsrScriptableRenderFeature = null; - OnSetQuality(FSRQuality); - SetupCommandBuffer(); - } - - //Check if display resolution has changed - if(m_displayWidth != m_mainCamera.pixelWidth || m_displayHeight != m_mainCamera.pixelHeight || m_previousHDR != m_autoHDR || m_prevGraphicsFormat != m_graphicsFormat) { - SetupResolution(); - } - - if(m_previousScaleFactor != m_scaleFactor || m_previousReactiveMask != generateReactiveMask || m_previousTCMask != generateTCMask || m_previousRenderingPath != m_mainCamera.actualRenderingPath) { - SetupFrameBuffers(); - } - - //Camera Stacking - // delete by james - //if (m_isBaseCamera) - //{ - // if (m_cameraData != null) - // { - // if (m_cameraData.cameraStack.Count > 0) - // { - // try - // { - // if (m_topCamera != m_cameraData.cameraStack[m_cameraData.cameraStack.Count - 1] || m_prevCameraStackCount != m_cameraData.cameraStack.Count || m_prevStackQuality != FSRQuality) - // { - // SetupCameraStacking(); - // } - // } - // catch { } - // } - // } - //} - } - - void PostRenderCamera(ScriptableRenderContext context, Camera cameras) { - if(cameras != m_mainCamera) { - return; - } - - m_mainCamera.ResetProjectionMatrix(); - } - - /// - /// FSR TAA Jitter - /// - private void JitterCameraMatrix() { - if(fsrScriptableRenderFeature == null) { - return; - } else if(!fsrScriptableRenderFeature[0].IsEnabled) { - return; - } - - - int jitterPhaseCount = Fsr3.GetJitterPhaseCount(m_renderWidth, (int)(m_renderWidth * m_scaleFactor)); - Fsr3.GetJitterOffset(out float jitterX, out float jitterY, Time.frameCount, jitterPhaseCount); - m_dispatchDescription.JitterOffset = new Vector2(jitterX, jitterY); - - jitterX = 2.0f * jitterX / (float)m_renderWidth; - jitterY = 2.0f * jitterY / (float)m_renderHeight; - - jitterX += UnityEngine.Random.Range(-0.0001f * antiGhosting, 0.0001f * antiGhosting); - jitterY += UnityEngine.Random.Range(-0.0001f * antiGhosting, 0.0001f * antiGhosting); - - m_jitterMatrix = Matrix4x4.Translate(new Vector2(jitterX, jitterY)); - m_projectionMatrix = m_mainCamera.projectionMatrix; - m_mainCamera.nonJitteredProjectionMatrix = m_projectionMatrix; - m_mainCamera.projectionMatrix = m_jitterMatrix * m_projectionMatrix; - m_mainCamera.useJitteredProjectionMatrixForTransparentRendering = true; - } - - /// - /// Handle Dynamic Scaling - /// - /// - public void SetDynamicResolution(float _value) { - if(UniversalRenderPipelineAsset != null) { - UniversalRenderPipelineAsset.renderScale = 1 / _value; - } - } - - /// - /// Creates new buffers and sends them to the plugin - /// - private void SetupFrameBuffers() { - m_previousScaleFactor = m_scaleFactor; - m_previousReactiveMask = generateReactiveMask; - - SetupCommandBuffer(); - - m_previousRenderingPath = m_mainCamera.actualRenderingPath; - } - - /// - /// Creates new buffers, sends them to the plugin, and reintilized FSR to adjust the display size - /// - private void SetupResolution() { - m_displayWidth = m_mainCamera.pixelWidth; - m_displayHeight = m_mainCamera.pixelHeight; - m_previousHDR = m_autoHDR; - - m_prevGraphicsFormat = m_graphicsFormat; - - RTHandleS = new RTHandleSystem(); - RTHandleS.Initialize(m_mainCamera.pixelWidth, m_mainCamera.pixelHeight); - - Fsr3.InitializationFlags flags = Fsr3.InitializationFlags.EnableMotionVectorsJitterCancellation - | Fsr3.InitializationFlags.EnableHighDynamicRange - | Fsr3.InitializationFlags.EnableAutoExposure; - if(enableF16) - flags |= Fsr3.InitializationFlags.EnableFP16Usage; - - if(m_context != null) { - m_context.Destroy(); - m_context = null; - } - m_context = Fsr3.CreateContext(new Vector2Int(m_displayWidth, m_displayHeight), new Vector2Int((int)(m_displayWidth), (int)(m_displayHeight)), Callbacks, flags); - - SetupFrameBuffers(); - } - - /// - /// Automatically Setup camera stacking - /// - private void SetupCameraStacking() { - m_prevCameraStackCount = m_cameraData.cameraStack.Count; - if(m_cameraData.renderType == CameraRenderType.Base) { - m_isBaseCamera = true; - - m_cameraStacking = m_cameraData.cameraStack.Count > 0; - if(m_cameraStacking) { - CleanupOverlayCameras(); - m_prevStackQuality = FSRQuality; - - m_topCamera = m_cameraData.cameraStack[m_cameraData.cameraStack.Count - 1]; - - for(int i = 0; i < m_cameraData.cameraStack.Count; i++) { - FSR3_URP stackedCamera = m_cameraData.cameraStack[i].gameObject.GetComponent(); - if(stackedCamera == null) { - stackedCamera = m_cameraData.cameraStack[i].gameObject.AddComponent(); - } - m_prevCameraStack.Add(m_cameraData.cameraStack[i].gameObject.GetComponent()); - - //stackedCamera.hideFlags = HideFlags.HideInInspector; - stackedCamera.m_cameraStacking = true; - stackedCamera.m_topCamera = m_topCamera; - - stackedCamera.OnSetQuality(FSRQuality); - - stackedCamera.sharpening = sharpening; - stackedCamera.sharpness = sharpness; - stackedCamera.generateReactiveMask = generateReactiveMask; - stackedCamera.autoReactiveScale = autoReactiveScale; - stackedCamera.autoReactiveThreshold = autoReactiveThreshold; - stackedCamera.autoReactiveBinaryValue = autoReactiveBinaryValue; - } - } - } - } - - private void CleanupOverlayCameras() { - for(int i = 0; i < m_prevCameraStack.Count; i++) { - if(!m_prevCameraStack[i].m_isBaseCamera) - DestroyImmediate(m_prevCameraStack[i]); - } - m_prevCameraStack = new List(); - } - - protected override void DisableFSR() { - base.DisableFSR(); - - RenderPipelineManager.beginCameraRendering -= PreRenderCamera; - RenderPipelineManager.endCameraRendering -= PostRenderCamera; - - SetDynamicResolution(1); - if(fsrScriptableRenderFeature != null) { - for(int i = 0; i < fsrScriptableRenderFeature.Count; i++) { - fsrScriptableRenderFeature[i].IsEnabled = false; - } - } - CleanupOverlayCameras(); - m_previousScaleFactor = -1; - m_prevStackQuality = (FSR_Quality)(-1); - - if(m_fsrOutput != null) { - m_fsrOutput.Release(); - - if(m_opaqueOnlyColorBuffer != null) { - m_opaqueOnlyColorBuffer.Release(); - m_afterOpaqueOnlyColorBuffer.Release(); - m_reactiveMaskOutput.Release(); - } - } - - if(m_context != null) { - m_context.Destroy(); - m_context = null; - } - } - } -} diff --git a/Assets/AEG FSR/Runtime/URP/FSR3_URP.cs.meta b/Assets/AEG FSR/Runtime/URP/FSR3_URP.cs.meta deleted file mode 100644 index 91dcdab..0000000 --- a/Assets/AEG FSR/Runtime/URP/FSR3_URP.cs.meta +++ /dev/null @@ -1,11 +0,0 @@ -fileFormatVersion: 2 -guid: 4d37d054e457113459accf4b905624c6 -MonoImporter: - externalObjects: {} - serializedVersion: 2 - defaultReferences: [] - executionOrder: 0 - icon: {instanceID: 0} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/URP/FSRRenderPass.cs b/Assets/AEG FSR/Runtime/URP/FSRRenderPass.cs deleted file mode 100644 index f79bff2..0000000 --- a/Assets/AEG FSR/Runtime/URP/FSRRenderPass.cs +++ /dev/null @@ -1,159 +0,0 @@ -using System; -using UnityEngine; -using UnityEngine.Rendering; -using UnityEngine.Rendering.Universal; - -#if UNITY_EDITOR -using UnityEditor; -#endif - -namespace AEG.FSR -{ - public class FSRRenderPass : ScriptableRenderPass - { - private CommandBuffer cmd; - - private FSR3_URP m_fsrURP; - private readonly Vector4 flipVector = new Vector4(1, -1, 0, 1); - - public FSRRenderPass(FSR3_URP _fsrURP) { - renderPassEvent = RenderPassEvent.AfterRendering + 5; - m_fsrURP = _fsrURP; - } - - public void OnSetReference(FSR3_URP _fsrURP) { - m_fsrURP = _fsrURP; - } - - // The actual execution of the pass. This is where custom rendering occurs. - public override void Execute(ScriptableRenderContext context, ref RenderingData renderingData) { - try { - cmd = CommandBufferPool.Get("FSRRender"); - - if(m_fsrURP.generateReactiveMask) { - m_fsrURP.m_context.GenerateReactiveMask(m_fsrURP.m_genReactiveDescription, cmd); - } - m_fsrURP.m_context.Dispatch(m_fsrURP.m_dispatchDescription, cmd); - - -#if UNITY_2022_1_OR_NEWER - Blitter.BlitCameraTexture(cmd, m_fsrURP.m_fsrOutput, renderingData.cameraData.renderer.cameraColorTargetHandle, flipVector, 0, false); -#else - Blit(cmd, m_fsrURP.m_fsrOutput, renderingData.cameraData.renderer.cameraColorTarget); -#endif - context.ExecuteCommandBuffer(cmd); - CommandBufferPool.Release(cmd); - } - catch { } - } - } - - public class FSRBufferPass : ScriptableRenderPass - { - private FSR3_URP m_fsrURP; - -#if !UNITY_2022_1_OR_NEWER - private readonly int depthTexturePropertyID = Shader.PropertyToID("_CameraDepthTexture"); -#endif - private readonly int motionTexturePropertyID = Shader.PropertyToID("_MotionVectorTexture"); - - public FSRBufferPass(FSR3_URP _fsrURP) { - renderPassEvent = RenderPassEvent.AfterRenderingPostProcessing; - ConfigureInput(ScriptableRenderPassInput.Depth); - m_fsrURP = _fsrURP; - } - - //2022 and up - public void Setup(RenderTargetIdentifier color, RenderTargetIdentifier depth) { - if(!Application.isPlaying) { - return; - } - if(m_fsrURP == null) { - return; - } - - m_fsrURP.m_dispatchDescription.Color = color; - m_fsrURP.m_dispatchDescription.Depth = depth; - m_fsrURP.m_dispatchDescription.MotionVectors = Shader.GetGlobalTexture(motionTexturePropertyID); - } - - public void OnSetReference(FSR3_URP _fsrURP) { - m_fsrURP = _fsrURP; - } - - // The actual execution of the pass. This is where custom rendering occurs. - public override void Execute(ScriptableRenderContext context, ref RenderingData renderingData) { -#if UNITY_2022_1_OR_NEWER - m_fsrURP.m_dispatchDescription.Color = renderingData.cameraData.renderer.cameraColorTargetHandle; -#else - - m_fsrURP.m_dispatchDescription.Color = renderingData.cameraData.renderer.cameraColorTarget; - m_fsrURP.m_dispatchDescription.Depth = Shader.GetGlobalTexture(depthTexturePropertyID); - m_fsrURP.m_dispatchDescription.MotionVectors = Shader.GetGlobalTexture(motionTexturePropertyID); - - try { - m_fsrURP.m_dispatchDescription.DepthFormat = Shader.GetGlobalTexture(depthTexturePropertyID).graphicsFormat == UnityEngine.Experimental.Rendering.GraphicsFormat.None; - } - catch { - m_fsrURP.m_dispatchDescription.DepthFormat = true; - } -#endif - } - } - - public class FSROpaqueOnlyPass : ScriptableRenderPass - { - private CommandBuffer cmd; - private FSR3_URP m_fsrURP; - - public FSROpaqueOnlyPass(FSR3_URP _fsrURP) { - renderPassEvent = RenderPassEvent.BeforeRenderingTransparents; - m_fsrURP = _fsrURP; - } - - public void OnSetReference(FSR3_URP _fsrURP) { - m_fsrURP = _fsrURP; - } - - // The actual execution of the pass. This is where custom rendering occurs. - public override void Execute(ScriptableRenderContext context, ref RenderingData renderingData) { - cmd = CommandBufferPool.Get("FSROpaqueOnly"); - -#if UNITY_2022_1_OR_NEWER - Blit(cmd, renderingData.cameraData.renderer.cameraColorTargetHandle, m_fsrURP.m_opaqueOnlyColorBuffer); -#else - Blit(cmd, renderingData.cameraData.renderer.cameraColorTarget, m_fsrURP.m_opaqueOnlyColorBuffer); -#endif - context.ExecuteCommandBuffer(cmd); - CommandBufferPool.Release(cmd); - } - } - - public class FSRTransparentPass : ScriptableRenderPass - { - private CommandBuffer cmd; - private FSR3_URP m_fsrURP; - - public FSRTransparentPass(FSR3_URP _fsrURP) { - renderPassEvent = RenderPassEvent.AfterRenderingTransparents; - m_fsrURP = _fsrURP; - } - - public void OnSetReference(FSR3_URP _fsrURP) { - m_fsrURP = _fsrURP; - } - - // The actual execution of the pass. This is where custom rendering occurs. - public override void Execute(ScriptableRenderContext context, ref RenderingData renderingData) { - cmd = CommandBufferPool.Get("FSRTransparent"); - -#if UNITY_2022_1_OR_NEWER - Blit(cmd, renderingData.cameraData.renderer.cameraColorTargetHandle, m_fsrURP.m_afterOpaqueOnlyColorBuffer); -#else - Blit(cmd, renderingData.cameraData.renderer.cameraColorTarget, m_fsrURP.m_afterOpaqueOnlyColorBuffer); -#endif - context.ExecuteCommandBuffer(cmd); - CommandBufferPool.Release(cmd); - } - } -} diff --git a/Assets/AEG FSR/Runtime/URP/FSRRenderPass.cs.meta b/Assets/AEG FSR/Runtime/URP/FSRRenderPass.cs.meta deleted file mode 100644 index 6612642..0000000 --- a/Assets/AEG FSR/Runtime/URP/FSRRenderPass.cs.meta +++ /dev/null @@ -1,11 +0,0 @@ -fileFormatVersion: 2 -guid: 3c48cfc3e547c374fb0cbbdd820b2f5d -MonoImporter: - externalObjects: {} - serializedVersion: 2 - defaultReferences: [] - executionOrder: 0 - icon: {instanceID: 0} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/URP/FSRScriptableRenderFeature.cs b/Assets/AEG FSR/Runtime/URP/FSRScriptableRenderFeature.cs deleted file mode 100644 index d86c81b..0000000 --- a/Assets/AEG FSR/Runtime/URP/FSRScriptableRenderFeature.cs +++ /dev/null @@ -1,89 +0,0 @@ -using UnityEngine.Rendering; -using UnityEngine.Rendering.Universal; -using System; -using UnityEngine; - -namespace AEG.FSR -{ - //Not allowed to be in a namespace - public class FSRScriptableRenderFeature : ScriptableRendererFeature - { - [HideInInspector] - public bool IsEnabled = false; - - private FSR3_URP m_fsrURP; - - private FSRBufferPass fsrBufferPass; - private FSRRenderPass fsrRenderPass; - private FSROpaqueOnlyPass fsrReactiveMaskPass; - private FSRTransparentPass fsrReactiveMaskTransparentPass; - - private CameraData cameraData; - - public void OnSetReference(FSR3_URP _fsrURP) { - m_fsrURP = _fsrURP; - fsrBufferPass.OnSetReference(m_fsrURP); - fsrRenderPass.OnSetReference(m_fsrURP); - fsrReactiveMaskPass.OnSetReference(m_fsrURP); - fsrReactiveMaskTransparentPass.OnSetReference(m_fsrURP); - } - - public override void Create() { - name = "FSRScriptableRenderFeature"; - - // Pass the settings as a parameter to the constructor of the pass. - fsrBufferPass = new FSRBufferPass(m_fsrURP); - fsrRenderPass = new FSRRenderPass(m_fsrURP); - fsrReactiveMaskPass = new FSROpaqueOnlyPass(m_fsrURP); - fsrReactiveMaskTransparentPass = new FSRTransparentPass(m_fsrURP); - - fsrBufferPass.ConfigureInput(ScriptableRenderPassInput.Depth | ScriptableRenderPassInput.Motion); - } - - public void OnDispose() { - } - -#if UNITY_2022_1_OR_NEWER - public override void SetupRenderPasses(ScriptableRenderer renderer, in RenderingData renderingData) { - fsrBufferPass.Setup(renderer.cameraColorTargetHandle, renderer.cameraDepthTargetHandle); - } -#endif - - public override void AddRenderPasses(ScriptableRenderer renderer, ref RenderingData renderingData) { - if(!IsEnabled) { - return; - } - if(!Application.isPlaying) { - return; - } - - if(m_fsrURP == null) { - return; - } - if(m_fsrURP.m_context == null) { - return; - } - - cameraData = renderingData.cameraData; - if(cameraData.cameraType != CameraType.Game) { - return; - } - if(cameraData.camera.GetComponent() == null) { - return; - } - if(!cameraData.resolveFinalTarget) { - return; - } - - m_fsrURP.m_autoHDR = cameraData.isHdrEnabled; - - // Here you can queue up multiple passes after each other. - renderer.EnqueuePass(fsrBufferPass); - renderer.EnqueuePass(fsrRenderPass); - if(m_fsrURP.generateReactiveMask) { - renderer.EnqueuePass(fsrReactiveMaskPass); - renderer.EnqueuePass(fsrReactiveMaskTransparentPass); - } - } - } -} diff --git a/Assets/AEG FSR/Runtime/URP/FSRScriptableRenderFeature.cs.meta b/Assets/AEG FSR/Runtime/URP/FSRScriptableRenderFeature.cs.meta deleted file mode 100644 index 2d4df29..0000000 --- a/Assets/AEG FSR/Runtime/URP/FSRScriptableRenderFeature.cs.meta +++ /dev/null @@ -1,11 +0,0 @@ -fileFormatVersion: 2 -guid: cde85230acf669d43a012f11b632f852 -MonoImporter: - externalObjects: {} - serializedVersion: 2 - defaultReferences: [] - executionOrder: 0 - icon: {instanceID: 0} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/URP/com.alteregogames.aeg-fsr.Runtime.URP.asmdef b/Assets/AEG FSR/Runtime/URP/com.alteregogames.aeg-fsr.Runtime.URP.asmdef deleted file mode 100644 index aab38f9..0000000 --- a/Assets/AEG FSR/Runtime/URP/com.alteregogames.aeg-fsr.Runtime.URP.asmdef +++ /dev/null @@ -1,18 +0,0 @@ -{ - "name": "com.alteregogames.aeg-fsr.Runtime.URP", - "rootNamespace": "AEG.FSR", - "references": [ - "com.alteregogames.aeg-fsr.Runtime", - "Unity.RenderPipelines.Core.Runtime", - "Unity.RenderPipelines.Universal.Runtime" - ], - "includePlatforms": [], - "excludePlatforms": [], - "allowUnsafeCode": false, - "overrideReferences": false, - "precompiledReferences": [], - "autoReferenced": true, - "defineConstraints": [], - "versionDefines": [], - "noEngineReferences": false -} \ No newline at end of file diff --git a/Assets/AEG FSR/Runtime/URP/com.alteregogames.aeg-fsr.Runtime.URP.asmdef.meta b/Assets/AEG FSR/Runtime/URP/com.alteregogames.aeg-fsr.Runtime.URP.asmdef.meta deleted file mode 100644 index 89fc6de..0000000 --- a/Assets/AEG FSR/Runtime/URP/com.alteregogames.aeg-fsr.Runtime.URP.asmdef.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 80c864bd3dbd1e148ac2a70c8ac30162 -AssemblyDefinitionImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Runtime/com.alteregogames.aeg-fsr.Runtime.asmdef b/Assets/AEG FSR/Runtime/com.alteregogames.aeg-fsr.Runtime.asmdef deleted file mode 100644 index 90285f8..0000000 --- a/Assets/AEG FSR/Runtime/com.alteregogames.aeg-fsr.Runtime.asmdef +++ /dev/null @@ -1,14 +0,0 @@ -{ - "name": "com.alteregogames.aeg-fsr.Runtime", - "rootNamespace": "AEG.FSR", - "references": [], - "includePlatforms": [], - "excludePlatforms": [], - "allowUnsafeCode": false, - "overrideReferences": false, - "precompiledReferences": [], - "autoReferenced": true, - "defineConstraints": [], - "versionDefines": [], - "noEngineReferences": false -} \ No newline at end of file diff --git a/Assets/AEG FSR/Runtime/com.alteregogames.aeg-fsr.Runtime.asmdef.meta b/Assets/AEG FSR/Runtime/com.alteregogames.aeg-fsr.Runtime.asmdef.meta deleted file mode 100644 index 338728c..0000000 --- a/Assets/AEG FSR/Runtime/com.alteregogames.aeg-fsr.Runtime.asmdef.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: ad4c948ae9bc1834fbf6b10e2f8c8b16 -AssemblyDefinitionImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/AEG FSR/Third-Party Notices.txt b/Assets/AEG FSR/Third-Party Notices.txt deleted file mode 100644 index 402d747..0000000 --- a/Assets/AEG FSR/Third-Party Notices.txt +++ /dev/null @@ -1,20 +0,0 @@ -Assets\Alterego Games\AEG FSR\Runtime\Plugins\LICENSE.txt -https://github.com/ndepoel/FSR2Unity - -FSR 2.2 for Unity -This project aims to bring FSR2 to Unity, as an alternative to Unity's existing FXAA, SMAA and -TAA anti-aliasing solutions. Compared to Unity's TAA implementation, FSR2 offers a sharper and more stable image, -with a better sub-pixel detail resolve and better performance due to its reliance on upscaling. - -Assets\Alterego Games\AEG FSR\Runtime\Plugins\Resources\FSR2\shaders\LICENSE.txt -https://github.com/GPUOpen-Effects/FidelityFX-FSR2 - -FidelityFX Super Resolution 2.2 (FSR 2.2) -AMD FidelityFX Super Resolution 2 (FSR 2) is an open source, high-quality solution for producing high -resolution frames from lower resolution inputs. - -You can find the binaries for FidelityFX FSR in the release section on GitHub. - -FSR 1.02 for Unity -Assets\Alterego Games\AEG FSR\Runtime\Plugins\Resources\FSR1\LICENSE.txt -https://github.com/GPUOpen-Effects/FidelityFX-FSR \ No newline at end of file diff --git a/Assets/AEG FSR/Third-Party Notices.txt.meta b/Assets/AEG FSR/Third-Party Notices.txt.meta deleted file mode 100644 index 94e7f0c..0000000 --- a/Assets/AEG FSR/Third-Party Notices.txt.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 392f9ea23c2870645b9498efb0bb15a9 -TextScriptImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/Scenes/Garden/Art/Vegetation/Materials/JM_WuJian_TianChuang003.mat b/Assets/Scenes/Garden/Art/Vegetation/Materials/JM_WuJian_TianChuang003.mat index eac5bee..c27804d 100644 --- a/Assets/Scenes/Garden/Art/Vegetation/Materials/JM_WuJian_TianChuang003.mat +++ b/Assets/Scenes/Garden/Art/Vegetation/Materials/JM_WuJian_TianChuang003.mat @@ -11,8 +11,7 @@ Material: m_Shader: {fileID: 4800000, guid: 933532a4fcc9baf4fa0491de14d08ed7, type: 3} m_Parent: {fileID: 0} m_ModifiedSerializedProperties: 0 - m_ValidKeywords: - - _SPECULAR_SETUP + m_ValidKeywords: [] m_InvalidKeywords: [] m_LightmapFlags: 4 m_EnableInstancingVariants: 0 @@ -111,11 +110,11 @@ Material: - _SrcBlend: 1 - _SrcBlendAlpha: 1 - _Surface: 0 - - _WorkflowMode: 0 + - _WorkflowMode: 1 - _ZWrite: 1 m_Colors: - - _BaseColor: {r: 0.6509434, g: 0.6509434, b: 0.6509434, a: 1} - - _Color: {r: 0.6509434, g: 0.6509434, b: 0.6509434, a: 1} + - _BaseColor: {r: 1, g: 1, b: 1, a: 1} + - _Color: {r: 1, g: 1, b: 1, a: 1} - _EmissionColor: {r: 0, g: 0, b: 0, a: 1} - _SpecColor: {r: 0.19999996, g: 0.19999996, b: 0.19999996, a: 1} m_BuildTextureStacks: [] diff --git a/Assets/Scenes/Garden/GardenScene.unity b/Assets/Scenes/Garden/GardenScene.unity index 8cca49c..b30e084 100644 --- a/Assets/Scenes/Garden/GardenScene.unity +++ b/Assets/Scenes/Garden/GardenScene.unity @@ -28039,11 +28039,6 @@ Transform: m_CorrespondingSourceObject: {fileID: 1080931527096372456, guid: 152c91511bb38db4ab7a3c4dce746976, type: 3} m_PrefabInstance: {fileID: 503244848} m_PrefabAsset: {fileID: 0} ---- !u!1 &503266359 stripped -GameObject: - m_CorrespondingSourceObject: {fileID: 7019485897165914679, guid: 8e5bed6b8dcfed240a38affefd9c58f6, type: 3} - m_PrefabInstance: {fileID: 875701726} - m_PrefabAsset: {fileID: 0} --- !u!1001 &503632451 PrefabInstance: m_ObjectHideFlags: 0 @@ -47525,9 +47520,6 @@ PrefabInstance: - {fileID: 2760477045267651031, guid: 8e5bed6b8dcfed240a38affefd9c58f6, type: 3} m_AddedGameObjects: [] m_AddedComponents: - - targetCorrespondingSourceObject: {fileID: 7019485897165914679, guid: 8e5bed6b8dcfed240a38affefd9c58f6, type: 3} - insertIndex: -1 - addedObject: {fileID: 875701728} - targetCorrespondingSourceObject: {fileID: 141850472070699230, guid: 8e5bed6b8dcfed240a38affefd9c58f6, type: 3} insertIndex: -1 addedObject: {fileID: 1834942216} @@ -47537,48 +47529,12 @@ MonoBehaviour: m_CorrespondingSourceObject: {fileID: 6968620290940108330, guid: 8e5bed6b8dcfed240a38affefd9c58f6, type: 3} m_PrefabInstance: {fileID: 875701726} m_PrefabAsset: {fileID: 0} - m_GameObject: {fileID: 503266359} + m_GameObject: {fileID: 0} m_Enabled: 1 m_EditorHideFlags: 0 m_Script: {fileID: 11500000, guid: 72ece51f2901e7445ab60da3685d6b5f, type: 3} m_Name: m_EditorClassIdentifier: ---- !u!114 &875701728 -MonoBehaviour: - m_ObjectHideFlags: 0 - m_CorrespondingSourceObject: {fileID: 0} - m_PrefabInstance: {fileID: 0} - m_PrefabAsset: {fileID: 0} - m_GameObject: {fileID: 503266359} - m_Enabled: 0 - m_EditorHideFlags: 0 - m_Script: {fileID: 11500000, guid: 4d37d054e457113459accf4b905624c6, type: 3} - m_Name: - m_EditorClassIdentifier: - FSRQuality: 0 - antiGhosting: 0 - sharpening: 1 - sharpness: 0.5 - enableF16: 0 - enableAutoExposure: 1 - generateReactiveMask: 1 - generateTCMask: 0 - autoReactiveScale: 0.9 - autoReactiveThreshold: 0.05 - autoReactiveBinaryValue: 0.5 - autoTcThreshold: 0.05 - autoTcScale: 1 - autoTcReactiveScale: 5 - autoTcReactiveMax: 0.9 - reactiveFlags: 13 - mipmapBiasOverride: 1 - autoTextureUpdate: 1 - mipMapUpdateFrequency: 2 - m_resetCamera: 0 - m_autoHDR: 0 - m_prevGraphicsFormat: 0 - m_cameraStacking: 0 - m_topCamera: {fileID: 0} --- !u!1001 &876443529 PrefabInstance: m_ObjectHideFlags: 0 diff --git a/Assets/Settings/Mobile/Mobile_High.asset b/Assets/Settings/Mobile/Mobile_High.asset index 7adbc00..5bbacbc 100644 --- a/Assets/Settings/Mobile/Mobile_High.asset +++ b/Assets/Settings/Mobile/Mobile_High.asset @@ -114,5 +114,5 @@ MonoBehaviour: m_PrefilterNativeRenderPass: 1 m_ShaderVariantLogLevel: 0 m_ShadowCascades: 0 - superResolution: 5 + superResolution: 3 vrsRate: 0 diff --git a/Assets/Settings/Mobile/Mobile_High_Renderer.asset b/Assets/Settings/Mobile/Mobile_High_Renderer.asset index 32f6c45..030232f 100644 --- a/Assets/Settings/Mobile/Mobile_High_Renderer.asset +++ b/Assets/Settings/Mobile/Mobile_High_Renderer.asset @@ -43,8 +43,8 @@ MonoBehaviour: m_Script: {fileID: 11500000, guid: 386e439eeef2849448d91896c74ff1d5, type: 3} m_Name: FSR m_EditorClassIdentifier: - m_Active: 0 - Quality: 1 + m_Active: 1 + Quality: 0 v1setting: EasuCompute: {fileID: 7200000, guid: 787b0c165dad9074e9489817de945916, type: 3} RacsCompute: {fileID: 7200000, guid: a50f730ab549f794cbe91f005703e208, type: 3} @@ -90,11 +90,11 @@ MonoBehaviour: autoGenReactivePass: {fileID: 7200000, guid: 3b0d05a92dc6af24fb6f30d6606b37cf, type: 3} tcrAutoGenPass: {fileID: 7200000, guid: 78865acef22baa24f9ce68fb50877fe8, type: 3} debugViewPass: {fileID: 7200000, guid: cb24a71d54164c54eb5e86839acd48c5, type: 3} - AutoExposureSource: 1 + AutoExposureSource: 0 TransparencyAndCompositionMask: {fileID: 0} PreExposure: 1 - PerformSharpenPass: 1 - Sharpness: 0.8 + PerformSharpenPass: 0 + Sharpness: 1 VelocityFactor: 1 AutoGenerateTransparencyAndComposition: 0 GenerateTransparencyAndCompositionParameters: @@ -137,7 +137,7 @@ MonoBehaviour: m_Script: {fileID: 11500000, guid: a00dddc5b3ea7fe45953ccbd49b58b94, type: 3} m_Name: GSR m_EditorClassIdentifier: - m_Active: 1 + m_Active: 0 quality: 1 v1settings: EnableEdgeDirection: 1 @@ -250,9 +250,8 @@ MonoBehaviour: - {fileID: 4395980931634258890} - {fileID: -8043853166483734676} - {fileID: 4071882023117421450} - - {fileID: 5468006144702062375} - {fileID: -7284859345190182597} - m_RendererFeatureMap: bc3f630842f2e70dd6a559c442a94bfd4529d15534f2d3de228858dca8d12222716523fbf3439fdb7a327b7bff4bdd446ac59dfa966ffa88ca6373cd5da9013d6cff55ca297e5e908a7b3653203b8238279f9e2c9e42e24b3b2141bb05fbe69a + m_RendererFeatureMap: bc3f630842f2e70dd6a559c442a94bfd4529d15534f2d3de228858dca8d12222716523fbf3439fdb7a327b7bff4bdd446ac59dfa966ffa88ca6373cd5da9013d6cff55ca297e5e908a7b3653203b82383b2141bb05fbe69a m_UseNativeRenderPass: 0 postProcessData: {fileID: 11400000, guid: 41439944d30ece34e96484bdb6645b55, type: 2} shaders: @@ -466,6 +465,7 @@ MonoBehaviour: m_EditorClassIdentifier: m_Active: 1 IsEnabled: 0 + m_fsrURP: {fileID: 0} --- !u!114 &6334271670068977784 MonoBehaviour: m_ObjectHideFlags: 0 diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/GSR2.shader b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/GSR2.shader index b376701..09c3026 100644 --- a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/GSR2.shader +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/SuperRendering/SR/Shader/GSR2.shader @@ -261,6 +261,7 @@ Shader "MRP/SR/GSR2" float baseoffset_dot = dot(baseoffset, baseoffset); float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); float weight = FastLanczos(base); + Upsampledcw += float4(samplecolor * weight, weight); float boxweight = exp(baseoffset_dot * curvebias); rectboxmin = samplecolor;