152 lines
4.1 KiB
Plaintext
152 lines
4.1 KiB
Plaintext
// Each #kernel tells which function to compile; you can have many kernels
|
|
#pragma kernel CSMain
|
|
#pragma kernel CSMain _SKIP_3_MIP
|
|
// #define FFX_SPD_NO_WAVE_OPERATIONS 1
|
|
#pragma multi_compile _ FFX_SPD_NO_WAVE_OPERATIONS
|
|
#pragma enable_d3d11_debug_symbols
|
|
#pragma target 6.0
|
|
#pragma require wavebasic
|
|
#pragma require quadshuffle
|
|
|
|
#define FFX_GPU 1
|
|
#define FFX_HLSL 1
|
|
#define FFX_HALF 1
|
|
|
|
#define FFXM_GPU 1
|
|
#define FFXM_HALF 1
|
|
#define FFXM_HLSL 1
|
|
|
|
#include "ffx/ffx_core.h"
|
|
#include "ffx/ffx_common_types.h"
|
|
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
|
|
|
|
Texture2D<FfxFloat32> _InputDepth;
|
|
RWTexture2D<FfxFloat32> _OutDepth;
|
|
float4 _MipOffsetAndSizeArray[16];
|
|
|
|
FfxUInt32 mips;
|
|
FfxUInt32x2 inputTextureSize;
|
|
FfxUInt32 numWorkGroups;
|
|
FFX_GROUPSHARED FfxUInt32 spdCounter;
|
|
RWTexture2D<FfxUInt32> rw_internal_global_atomic;
|
|
|
|
FFX_GROUPSHARED FfxFloat32 spdIntermediate[16][16];
|
|
void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) {
|
|
spdIntermediate[x][y] = value.x;
|
|
}
|
|
|
|
FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) {
|
|
FfxFloat32 f = spdIntermediate[x][y];
|
|
return FfxFloat32x4(f.x, f.x, f.x, f.x);
|
|
}
|
|
|
|
FFX_GROUPSHARED FfxFloat32 spdIntermediateH[16][16];
|
|
void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) {
|
|
spdIntermediateH[x][y] = value.x;
|
|
}
|
|
|
|
FfxFloat32x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) {
|
|
FfxFloat32 f = spdIntermediateH[x][y];
|
|
return FfxFloat32x4(f.x, f.x, f.x, f.x);
|
|
}
|
|
|
|
FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) {
|
|
#if UNITY_REVERSED_Z
|
|
return min(min(v0, v1), min(v2, v3));
|
|
#else
|
|
return max(max(v0, v1), max(v2, v3));
|
|
#endif
|
|
}
|
|
|
|
FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) {
|
|
#if UNITY_REVERSED_Z
|
|
return min(min(v0, v1), min(v2, v3));
|
|
#else
|
|
return max(max(v0, v1), max(v2, v3));
|
|
#endif
|
|
}
|
|
|
|
FfxFloat32x4 SpdLoad(FfxInt32x2 coordinate, FfxUInt32 slice)
|
|
{
|
|
uint4 cur = _MipOffsetAndSizeArray[6];
|
|
return _OutDepth[coordinate + cur.xy].xxxx; // 5 -> 6 as we store a copy of the depth buffer at index 0
|
|
}
|
|
|
|
void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 coordinate, FfxUInt32 slice)
|
|
{
|
|
#if _SKIP_3_MIP
|
|
if(coordinate < 3)
|
|
{
|
|
return;
|
|
}
|
|
#endif
|
|
uint4 cur = _MipOffsetAndSizeArray[coordinate + 1];
|
|
_OutDepth[pix + cur.xy] = outValue.x; // + 1 as we store a copy of the depth buffer at index 0
|
|
}
|
|
|
|
FfxFloat32x4 SpdLoadH(FfxInt32x2 coordinate, FfxUInt32 slice)
|
|
{
|
|
uint4 cur = _MipOffsetAndSizeArray[6];
|
|
return _OutDepth[coordinate + cur.xy].xxxx; // 5 -> 6 as we store a copy of the depth buffer at index 0
|
|
}
|
|
|
|
void SpdStoreH(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 coordinate, FfxUInt32 slice)
|
|
{
|
|
#if _SKIP_3_MIP
|
|
if(coordinate < 3)
|
|
{
|
|
return;
|
|
}
|
|
#endif
|
|
uint4 cur = _MipOffsetAndSizeArray[coordinate + 1];
|
|
_OutDepth[pix + cur.xy] = outValue.x; // + 1 as we store a copy of the depth buffer at index 0
|
|
}
|
|
|
|
FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 coordinate, FfxUInt32 slice)
|
|
{
|
|
return _InputDepth[coordinate].xxxx;
|
|
}
|
|
|
|
FfxFloat32x4 SpdLoadSourceImageH(FfxInt32x2 coordinate, FfxUInt32 slice)
|
|
{
|
|
uint4 cur = _MipOffsetAndSizeArray[0];
|
|
float2 uv = coordinate / float2(cur.zw);
|
|
FfxInt32x2 uv2 = FfxInt32x2(uv * inputTextureSize);
|
|
return _InputDepth[uv2].xxxx;
|
|
}
|
|
|
|
void IncreaseAtomicCounter(FFX_PARAMETER_IN FfxUInt32 slice, FFX_PARAMETER_INOUT FfxUInt32 counter)
|
|
{
|
|
InterlockedAdd(rw_internal_global_atomic[FfxInt32x2(0, 0)] , 1, counter);
|
|
}
|
|
|
|
void ResetAtomicCounter(FFX_PARAMETER_IN FfxUInt32 slice)
|
|
{
|
|
rw_internal_global_atomic[FfxInt32x2(0, 0)] = 0;
|
|
}
|
|
|
|
FfxUInt32 SpdGetAtomicCounter()
|
|
{
|
|
return spdCounter;
|
|
}
|
|
|
|
void SpdResetAtomicCounter(FfxUInt32 slice)
|
|
{
|
|
ResetAtomicCounter(slice);
|
|
}
|
|
|
|
void SpdIncreaseAtomicCounter(FfxUInt32 slice)
|
|
{
|
|
IncreaseAtomicCounter(slice, spdCounter);
|
|
}
|
|
#include "ffx/ffx_spd.h"
|
|
|
|
[numthreads(256,1,1)]
|
|
void CSMain (uint LocalThreadIndex : SV_GroupIndex, uint3 WorkGroupId : SV_GroupID)
|
|
{
|
|
SpdDownsampleH(WorkGroupId.xy, LocalThreadIndex, mips, numWorkGroups, WorkGroupId.z, 0);
|
|
}
|
|
|
|
|
|
|