// Each #kernel tells which function to compile; you can have many kernels #pragma kernel CSMain #pragma kernel CSMain _SKIP_3_MIP // #define FFX_SPD_NO_WAVE_OPERATIONS 1 #pragma multi_compile _ FFX_SPD_NO_WAVE_OPERATIONS #pragma enable_d3d11_debug_symbols #pragma target 6.0 #pragma require wavebasic #pragma require quadshuffle #define FFX_GPU 1 #define FFX_HLSL 1 #define FFX_HALF 1 #define FFXM_GPU 1 #define FFXM_HALF 1 #define FFXM_HLSL 1 #include "ffx/ffx_core.h" #include "ffx/ffx_common_types.h" #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" Texture2D _InputDepth; RWTexture2D _OutDepth; float4 _MipOffsetAndSizeArray[16]; FfxUInt32 mips; FfxUInt32x2 inputTextureSize; FfxUInt32 numWorkGroups; FFX_GROUPSHARED FfxUInt32 spdCounter; RWTexture2D rw_internal_global_atomic; FFX_GROUPSHARED FfxFloat32 spdIntermediate[16][16]; void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) { spdIntermediate[x][y] = value.x; } FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) { FfxFloat32 f = spdIntermediate[x][y]; return FfxFloat32x4(f.x, f.x, f.x, f.x); } FFX_GROUPSHARED FfxFloat32 spdIntermediateH[16][16]; void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) { spdIntermediateH[x][y] = value.x; } FfxFloat32x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) { FfxFloat32 f = spdIntermediateH[x][y]; return FfxFloat32x4(f.x, f.x, f.x, f.x); } FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) { #if UNITY_REVERSED_Z return min(min(v0, v1), min(v2, v3)); #else return max(max(v0, v1), max(v2, v3)); #endif } FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) { #if UNITY_REVERSED_Z return min(min(v0, v1), min(v2, v3)); #else return max(max(v0, v1), max(v2, v3)); #endif } FfxFloat32x4 SpdLoad(FfxInt32x2 coordinate, FfxUInt32 slice) { uint4 cur = _MipOffsetAndSizeArray[6]; return _OutDepth[coordinate + cur.xy].xxxx; // 5 -> 6 as we store a copy of the depth buffer at index 0 } void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 coordinate, FfxUInt32 slice) { #if _SKIP_3_MIP if(coordinate < 3) { return; } #endif uint4 cur = _MipOffsetAndSizeArray[coordinate + 1]; _OutDepth[pix + cur.xy] = outValue.x; // + 1 as we store a copy of the depth buffer at index 0 } FfxFloat32x4 SpdLoadH(FfxInt32x2 coordinate, FfxUInt32 slice) { uint4 cur = _MipOffsetAndSizeArray[6]; return _OutDepth[coordinate + cur.xy].xxxx; // 5 -> 6 as we store a copy of the depth buffer at index 0 } void SpdStoreH(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 coordinate, FfxUInt32 slice) { #if _SKIP_3_MIP if(coordinate < 3) { return; } #endif uint4 cur = _MipOffsetAndSizeArray[coordinate + 1]; _OutDepth[pix + cur.xy] = outValue.x; // + 1 as we store a copy of the depth buffer at index 0 } FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 coordinate, FfxUInt32 slice) { return _InputDepth[coordinate].xxxx; } FfxFloat32x4 SpdLoadSourceImageH(FfxInt32x2 coordinate, FfxUInt32 slice) { uint4 cur = _MipOffsetAndSizeArray[0]; float2 uv = coordinate / float2(cur.zw); FfxInt32x2 uv2 = FfxInt32x2(uv * inputTextureSize); return _InputDepth[uv2].xxxx; } void IncreaseAtomicCounter(FFX_PARAMETER_IN FfxUInt32 slice, FFX_PARAMETER_INOUT FfxUInt32 counter) { InterlockedAdd(rw_internal_global_atomic[FfxInt32x2(0, 0)] , 1, counter); } void ResetAtomicCounter(FFX_PARAMETER_IN FfxUInt32 slice) { rw_internal_global_atomic[FfxInt32x2(0, 0)] = 0; } FfxUInt32 SpdGetAtomicCounter() { return spdCounter; } void SpdResetAtomicCounter(FfxUInt32 slice) { ResetAtomicCounter(slice); } void SpdIncreaseAtomicCounter(FfxUInt32 slice) { IncreaseAtomicCounter(slice, spdCounter); } #include "ffx/ffx_spd.h" [numthreads(256,1,1)] void CSMain (uint LocalThreadIndex : SV_GroupIndex, uint3 WorkGroupId : SV_GroupID) { SpdDownsampleH(WorkGroupId.xy, LocalThreadIndex, mips, numWorkGroups, WorkGroupId.z, 0); }