fix spd err.

This commit is contained in:
StarBeats 2025-06-24 21:26:55 +08:00
parent 3a68dfd783
commit 3e340fe793
7 changed files with 64 additions and 41 deletions

View File

@ -351,6 +351,7 @@ namespace X.Rendering.Feature
{ {
var hizIndex = GetHizIndex(); var hizIndex = GetHizIndex();
RTHandle hizBuffer = depthPyramidTexs[hizIndex]; RTHandle hizBuffer = depthPyramidTexs[hizIndex];
cmd.BeginSample("Depth-Downsample");
RenderingUtils.ReAllocateIfNeeded(ref hizBuffer, new RenderTextureDescriptor() RenderingUtils.ReAllocateIfNeeded(ref hizBuffer, new RenderTextureDescriptor()
{ {
@ -360,6 +361,7 @@ namespace X.Rendering.Feature
colorFormat = RenderTextureFormat.RFloat, colorFormat = RenderTextureFormat.RFloat,
msaaSamples = 1, msaaSamples = 1,
enableRandomWrite = true, enableRandomWrite = true,
sRGB = false,
}, filterMode: FilterMode.Point, name: depthPyramidNames[hizIndex]); }, filterMode: FilterMode.Point, name: depthPyramidNames[hizIndex]);
depthPyramidTexs[hizIndex] = hizBuffer; depthPyramidTexs[hizIndex] = hizBuffer;
@ -418,7 +420,10 @@ namespace X.Rendering.Feature
// cmd.SetExecutionFlags(CommandBufferExecutionFlags.AsyncCompute); // cmd.SetExecutionFlags(CommandBufferExecutionFlags.AsyncCompute);
cmd.DispatchCompute(settings.ComputeShader, kernelId, Mathf.CeilToInt(outputMipSize.x / 8f), Mathf.CeilToInt(outputMipSize.y / 8f), 1); cmd.DispatchCompute(settings.ComputeShader, kernelId, Mathf.CeilToInt(outputMipSize.x / 8f), Mathf.CeilToInt(outputMipSize.y / 8f), 1);
mipCnt = mipCnt - 4; mipCnt = mipCnt - 4;
break;
} }
cmd.EndSample("Depth-Downsample");
cmd.SetGlobalTexture(HizShaderIds.DepthPyramidTexId, hizBuffer); cmd.SetGlobalTexture(HizShaderIds.DepthPyramidTexId, hizBuffer);
} }
@ -441,7 +446,7 @@ namespace X.Rendering.Feature
var dispatchX = Mathf.CeilToInt(mip0SizeNOP.x / 64f); var dispatchX = Mathf.CeilToInt(mip0SizeNOP.x / 64f);
var dispatchY = Mathf.CeilToInt(mip0SizeNOP.y / 64f); var dispatchY = Mathf.CeilToInt(mip0SizeNOP.y / 64f);
cmd.SetComputeIntParam(settings.Spd, "mips", mipLevelCount); cmd.SetComputeIntParam(settings.Spd, "mips", mipLevelCount);
cmd.SetComputeIntParam(settings.Spd, "numWorkGroups", dispatchX); cmd.SetComputeIntParam(settings.Spd, "numWorkGroups", dispatchX * dispatchY);
cmd.SetComputeVectorArrayParam(settings.Spd, "_MipOffsetAndSizeArray", mipOffsetAndSizes); cmd.SetComputeVectorArrayParam(settings.Spd, "_MipOffsetAndSizeArray", mipOffsetAndSizes);
@ -469,6 +474,7 @@ namespace X.Rendering.Feature
else else
{ {
cmd.SetGlobalMatrix(HizShaderIds.GPUCullingVPId, world2Project); cmd.SetGlobalMatrix(HizShaderIds.GPUCullingVPId, world2Project);
cmd.SetViewProjectionMatrices(Matrix4x4.identity, Matrix4x4.identity);
cmd.SetRenderTarget(cullResult.ResultTex, RenderBufferLoadAction.DontCare, RenderBufferStoreAction.Store); cmd.SetRenderTarget(cullResult.ResultTex, RenderBufferLoadAction.DontCare, RenderBufferStoreAction.Store);
cmd.DrawMesh(RenderingUtils.fullscreenMesh, Matrix4x4.identity, settings.CullMat, 0, 0); cmd.DrawMesh(RenderingUtils.fullscreenMesh, Matrix4x4.identity, settings.CullMat, 0, 0);

View File

@ -163,7 +163,7 @@ namespace X.Rendering.Feature
for (var i = 0; i < renderers.Count; i++) for (var i = 0; i < renderers.Count; i++)
{ {
var rdr = renderers[i]; var rdr = renderers[i];
if (rdr) if (rdr && cullResult.ResultArray.IsCreated)
{ {
bool needCull = cullResult.ResultArray[i] > 0; bool needCull = cullResult.ResultArray[i] > 0;
#if UNITY_EDITOR #if UNITY_EDITOR

View File

@ -10,7 +10,7 @@ Shader "Hidden/CopyDepth"
HLSLPROGRAM HLSLPROGRAM
#pragma vertex Vertex #pragma vertex Vertex
#pragma fragment DownsampleDepthFrag #pragma fragment DownsampleDepthFrag
#pragma enable_d3d11_debug_symbols
#include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Core.hlsl" #include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Core.hlsl"
struct Attributes struct Attributes

View File

@ -2,15 +2,18 @@
#pragma kernel CSMainMipmapGroup0 #pragma kernel CSMainMipmapGroup0
#pragma kernel CSMainMipmapGroup0 _INPUTE_DEPTH_TEXTURE #pragma kernel CSMainMipmapGroup0 _INPUTE_DEPTH_TEXTURE
#pragma kernel CSMainMipmapGroup0 _INPUTE_DEPTH_TEXTURE _SKIP_3_MIP #pragma kernel CSMainMipmapGroup0 _INPUTE_DEPTH_TEXTURE _SKIP_3_MIP
#pragma enable_d3d11_debug_symbols
#pragma target 5.0
Texture2D<float> _InputDepth; Texture2D<float> _InputDepth;
RWTexture2D<float> _DepthMipChain; RWTexture2D<float> _DepthMipChain;
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
#if UNITY_REVERSED_Z #if UNITY_REVERSED_Z
#define MIN_DEPTH(l, r) min(l, r) # define MIN_DEPTH(l, r) min(l, r)
#else #else
#define MIN_DEPTH(l, r) max(l, r) # define MIN_DEPTH(l, r) max(l, r)
#endif #endif
#define GROUP_TILE_SIZE 8 #define GROUP_TILE_SIZE 8
@ -23,27 +26,27 @@ groupshared float _LDSDepths[GROUP_TILE_SIZE * GROUP_TILE_SIZE];
void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint groupThreadIndex : SV_GroupIndex, uint2 groupId : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID) void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint groupThreadIndex : SV_GroupIndex, uint2 groupId : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID)
{ {
uint2 curMipSize = _MipOffsetAndSizeArray[0].zw; uint2 curMipSize = _MipOffsetAndSizeArray[0].zw;
if (all(dispatchThreadId.xy < curMipSize.xy))
{
//Mip0从InputeTexture贴图下采样 //Mip0从InputeTexture贴图下采样
float2 uv = dispatchThreadId.xy / float2(curMipSize); float2 uv = dispatchThreadId.xy / float2(curMipSize);
uv = clamp(uv, 0, 1); uv = clamp(uv, 0, 1);
int2 texCrood = _InputMipOffsetAndSize.xy + uv * (_InputMipOffsetAndSize.zw); int3 texCrood = int3(_InputMipOffsetAndSize.xy + uv * (_InputMipOffsetAndSize.zw), 0);
uint2 maxIndex = _InputMipOffsetAndSize.xy + _InputMipOffsetAndSize.zw - 1; uint2 maxIndex = int3(_InputMipOffsetAndSize.xy + _InputMipOffsetAndSize.zw - 1,0);
#ifdef _INPUTE_DEPTH_TEXTURE //第一次Dispath使用Depth Texture #ifdef _INPUTE_DEPTH_TEXTURE //第一次Dispath使用Depth Texture
float p00 = _InputDepth[min(texCrood + uint2(0, 0), maxIndex)]; float p00 = _InputDepth[min(texCrood + uint3(0, 0, 0), maxIndex)];
float p01 = _InputDepth[min(texCrood + uint2(1, 0), maxIndex)]; float p01 = _InputDepth[min(texCrood + uint3(1, 0, 0), maxIndex)];
float p10 = _InputDepth[min(texCrood + uint2(0, 1), maxIndex)]; float p10 = _InputDepth[min(texCrood + uint3(0, 1, 0), maxIndex)];
float p11 = _InputDepth[min(texCrood + uint2(1, 1), maxIndex)]; float p11 = _InputDepth[min(texCrood + uint3(1, 1, 0), maxIndex)];
#else #else
float p00 = _DepthMipChain[min(texCrood + uint2(0, 0), maxIndex)]; float p00 = _DepthMipChain[min(texCrood + uint3(0, 0, 0), maxIndex)];
float p01 = _DepthMipChain[min(texCrood + uint2(1, 0), maxIndex)]; float p01 = _DepthMipChain[min(texCrood + uint3(1, 0, 0), maxIndex)];
float p10 = _DepthMipChain[min(texCrood + uint2(0, 1), maxIndex)]; float p10 = _DepthMipChain[min(texCrood + uint3(0, 1, 0), maxIndex)];
float p11 = _DepthMipChain[min(texCrood + uint2(1, 1), maxIndex)]; float p11 = _DepthMipChain[min(texCrood + uint3(1, 1, 0), maxIndex)];
#endif #endif
float4 depths = float4(p00, p10, p01, p11); float4 depths = float4(p00, p10, p01, p11);
float minDepth = MIN_DEPTH(MIN_DEPTH(depths.x, depths.y), MIN_DEPTH(depths.z, depths.w)); float minDepth = MIN_DEPTH(MIN_DEPTH(depths.x, depths.y), MIN_DEPTH(depths.z, depths.w));
if (all(dispatchThreadId.xy < curMipSize.xy))
{
#ifndef _SKIP_3_MIP //跳过前3级,不输出到RT #ifndef _SKIP_3_MIP //跳过前3级,不输出到RT
_DepthMipChain[_MipOffsetAndSizeArray[0].xy + dispatchThreadId.xy] = minDepth; _DepthMipChain[_MipOffsetAndSizeArray[0].xy + dispatchThreadId.xy] = minDepth;
#endif #endif
@ -80,6 +83,11 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
int threadIndex = xy.x + xy.y * TileSize; int threadIndex = xy.x + xy.y * TileSize;
_LDSDepths[threadIndex] = furthestDeviceZ; _LDSDepths[threadIndex] = furthestDeviceZ;
} }
else
{
int threadIndex = xy.x + xy.y * TileSize;
_LDSDepths[threadIndex] = 0;
}
//Mip2 //Mip2
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
@ -113,7 +121,11 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
int threadIndex = xy.x + xy.y * TileSize; int threadIndex = xy.x + xy.y * TileSize;
_LDSDepths[threadIndex] = furthestDeviceZ; _LDSDepths[threadIndex] = furthestDeviceZ;
} }
else
{
int threadIndex = xy.x + xy.y * TileSize;
_LDSDepths[threadIndex] = 0;
}
//Mip3 //Mip3
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
@ -132,7 +144,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
parentFurthestDeviceZ.z = _LDSDepths[index2]; parentFurthestDeviceZ.z = _LDSDepths[index2];
parentFurthestDeviceZ.w = _LDSDepths[index3]; parentFurthestDeviceZ.w = _LDSDepths[index3];
GroupMemoryBarrierWithGroupSync(); // GroupMemoryBarrierWithGroupSync();
furthestDeviceZ = MIN_DEPTH(MIN_DEPTH(parentFurthestDeviceZ.x, parentFurthestDeviceZ.y), MIN_DEPTH(parentFurthestDeviceZ.z, parentFurthestDeviceZ.w)); furthestDeviceZ = MIN_DEPTH(MIN_DEPTH(parentFurthestDeviceZ.x, parentFurthestDeviceZ.y), MIN_DEPTH(parentFurthestDeviceZ.z, parentFurthestDeviceZ.w));
localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize)); //因为过滤了3/4的线程组,所以线程索引需要往前补上 localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize)); //因为过滤了3/4的线程组,所以线程索引需要往前补上

View File

@ -11,6 +11,7 @@
#pragma vertex Vertex #pragma vertex Vertex
#pragma fragment CullFrag #pragma fragment CullFrag
#pragma enable_d3d11_debug_symbols #pragma enable_d3d11_debug_symbols
#pragma target 5.0
#include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Core.hlsl" #include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Core.hlsl"
@ -116,7 +117,7 @@
float2 size = (boxUVs.zw - boxUVs.xy) * _Mip0Size.xy; float2 size = (boxUVs.zw - boxUVs.xy) * _Mip0Size.xy;
float mip = (log2(max(size.x, size.y))); float mip = (log2(max(size.x, size.y)));
mip = ceil(mip) ; mip = ceil(mip);
mip = clamp(mip, _MipmapLevelMinMaxIndex.x, _MipmapLevelMinMaxIndex.y); mip = clamp(mip, _MipmapLevelMinMaxIndex.x, _MipmapLevelMinMaxIndex.y);
// float level_lower = max(mip - 1, 0); // float level_lower = max(mip - 1, 0);
@ -131,7 +132,6 @@
float4 offsetAndSize = _MipOffsetAndSize[mip]; float4 offsetAndSize = _MipOffsetAndSize[mip];
int4 pxMinMax = boxUVs * offsetAndSize.zwzw + offsetAndSize.xyxy; int4 pxMinMax = boxUVs * offsetAndSize.zwzw + offsetAndSize.xyxy;
float4 depth = float4(LOAD_TEXTURE2D_LOD(_DepthPyramidTexture, pxMinMax.xy, 0).r, float4 depth = float4(LOAD_TEXTURE2D_LOD(_DepthPyramidTexture, pxMinMax.xy, 0).r,
LOAD_TEXTURE2D_LOD(_DepthPyramidTexture, pxMinMax.zy, 0).r, LOAD_TEXTURE2D_LOD(_DepthPyramidTexture, pxMinMax.zy, 0).r,
LOAD_TEXTURE2D_LOD(_DepthPyramidTexture, pxMinMax.xw, 0).r, LOAD_TEXTURE2D_LOD(_DepthPyramidTexture, pxMinMax.xw, 0).r,

View File

@ -2,6 +2,7 @@
#pragma kernel CSMain #pragma kernel CSMain
#define FFX_SPD_NO_WAVE_OPERATIONS 1 #define FFX_SPD_NO_WAVE_OPERATIONS 1
// #pragma multi_compile FFX_SPD_NO_WAVE_OPERATIONS _ // #pragma multi_compile FFX_SPD_NO_WAVE_OPERATIONS _
#pragma enable_d3d11_debug_symbols
#define FFX_GPU 1 #define FFX_GPU 1
#define FFX_HLSL 1 #define FFX_HLSL 1
@ -13,6 +14,8 @@
#include "ffx/ffx_core.h" #include "ffx/ffx_core.h"
#include "ffx/ffx_common_types.h" #include "ffx/ffx_common_types.h"
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
FFX_GROUPSHARED FfxFloat32 spdIntermediate[16][16]; FFX_GROUPSHARED FfxFloat32 spdIntermediate[16][16];
void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) { void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) {
@ -35,18 +38,18 @@ FfxFloat32x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) {
} }
FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) { FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) {
#if FFX_SSSR_OPTION_INVERTED_DEPTH #if UNITY_REVERSED_Z
return max(max(v0, v1), max(v2, v3));
#else
return min(min(v0, v1), min(v2, v3)); return min(min(v0, v1), min(v2, v3));
#else
return max(max(v0, v1), max(v2, v3));
#endif #endif
} }
FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) { FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) {
#if FFX_SSSR_OPTION_INVERTED_DEPTH #if UNITY_REVERSED_Z
return max(max(v0, v1), max(v2, v3));
#else
return min(min(v0, v1), min(v2, v3)); return min(min(v0, v1), min(v2, v3));
#else
return max(max(v0, v1), max(v2, v3));
#endif #endif
} }
Texture2D<FfxFloat32> _InputDepth; Texture2D<FfxFloat32> _InputDepth;
@ -55,7 +58,8 @@ float4 _MipOffsetAndSizeArray[16];
FfxFloat32x4 SpdLoad(FfxInt32x2 coordinate, FfxUInt32 slice) FfxFloat32x4 SpdLoad(FfxInt32x2 coordinate, FfxUInt32 slice)
{ {
return _InputDepth[coordinate].xxxx; // 5 -> 6 as we store a copy of the depth buffer at index 0 uint4 cur = _MipOffsetAndSizeArray[6];
return _OutDepth[coordinate + cur.xy].xxxx; // 5 -> 6 as we store a copy of the depth buffer at index 0
} }
void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 coordinate, FfxUInt32 slice) void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 coordinate, FfxUInt32 slice)
@ -66,7 +70,8 @@ void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 coordinate, FfxUI
FfxFloat32x4 SpdLoadH(FfxInt32x2 coordinate, FfxUInt32 slice) FfxFloat32x4 SpdLoadH(FfxInt32x2 coordinate, FfxUInt32 slice)
{ {
return _InputDepth[coordinate].xxxx; // 5 -> 6 as we store a copy of the depth buffer at index 0 uint4 cur = _MipOffsetAndSizeArray[6];
return _OutDepth[coordinate + cur.xy].xxxx; // 5 -> 6 as we store a copy of the depth buffer at index 0
} }
void SpdStoreH(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 coordinate, FfxUInt32 slice) void SpdStoreH(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 coordinate, FfxUInt32 slice)

View File

@ -971,9 +971,9 @@ void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, Ffx
// compute MIP level 0 and 1 // compute MIP level 0 and 1
SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips, slice); SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips, slice);
// compute MIP level 2, 3, 4, 5 // // compute MIP level 2, 3, 4, 5
SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips, slice); SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips, slice);
//
if (mips < 7) if (mips < 7)
return; return;