From 3e340fe79309f38f15ccf143c7e4f9daea400fb2 Mon Sep 17 00:00:00 2001 From: StarBeats <977663818@qq.com> Date: Tue, 24 Jun 2025 21:26:55 +0800 Subject: [PATCH] fix spd err. --- .../HierarchicalZOcclusionCullFeature.cs | 8 ++- .../OcclusionCulling/HizObjectsManager.cs | 2 +- .../OcclusionCulling/Shaders/CopyDepth.shader | 2 +- .../Shaders/DepthPyramid.compute | 58 +++++++++++-------- .../OcclusionCulling/Shaders/HizCull.shader | 6 +- .../OcclusionCulling/Shaders/SPD.compute | 21 ++++--- .../OcclusionCulling/Shaders/ffx/ffx_spd.h | 8 +-- 7 files changed, 64 insertions(+), 41 deletions(-) diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/HierarchicalZOcclusionCullFeature.cs b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/HierarchicalZOcclusionCullFeature.cs index 38f06c7..71e930a 100644 --- a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/HierarchicalZOcclusionCullFeature.cs +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/HierarchicalZOcclusionCullFeature.cs @@ -351,6 +351,7 @@ namespace X.Rendering.Feature { var hizIndex = GetHizIndex(); RTHandle hizBuffer = depthPyramidTexs[hizIndex]; + cmd.BeginSample("Depth-Downsample"); RenderingUtils.ReAllocateIfNeeded(ref hizBuffer, new RenderTextureDescriptor() { @@ -360,6 +361,7 @@ namespace X.Rendering.Feature colorFormat = RenderTextureFormat.RFloat, msaaSamples = 1, enableRandomWrite = true, + sRGB = false, }, filterMode: FilterMode.Point, name: depthPyramidNames[hizIndex]); depthPyramidTexs[hizIndex] = hizBuffer; @@ -418,7 +420,10 @@ namespace X.Rendering.Feature // cmd.SetExecutionFlags(CommandBufferExecutionFlags.AsyncCompute); cmd.DispatchCompute(settings.ComputeShader, kernelId, Mathf.CeilToInt(outputMipSize.x / 8f), Mathf.CeilToInt(outputMipSize.y / 8f), 1); mipCnt = mipCnt - 4; + break; } + cmd.EndSample("Depth-Downsample"); + cmd.SetGlobalTexture(HizShaderIds.DepthPyramidTexId, hizBuffer); } @@ -441,7 +446,7 @@ namespace X.Rendering.Feature var dispatchX = Mathf.CeilToInt(mip0SizeNOP.x / 64f); var dispatchY = Mathf.CeilToInt(mip0SizeNOP.y / 64f); cmd.SetComputeIntParam(settings.Spd, "mips", mipLevelCount); - cmd.SetComputeIntParam(settings.Spd, "numWorkGroups", dispatchX); + cmd.SetComputeIntParam(settings.Spd, "numWorkGroups", dispatchX * dispatchY); cmd.SetComputeVectorArrayParam(settings.Spd, "_MipOffsetAndSizeArray", mipOffsetAndSizes); @@ -469,6 +474,7 @@ namespace X.Rendering.Feature else { cmd.SetGlobalMatrix(HizShaderIds.GPUCullingVPId, world2Project); + cmd.SetViewProjectionMatrices(Matrix4x4.identity, Matrix4x4.identity); cmd.SetRenderTarget(cullResult.ResultTex, RenderBufferLoadAction.DontCare, RenderBufferStoreAction.Store); cmd.DrawMesh(RenderingUtils.fullscreenMesh, Matrix4x4.identity, settings.CullMat, 0, 0); diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/HizObjectsManager.cs b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/HizObjectsManager.cs index c3f8156..f685636 100644 --- a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/HizObjectsManager.cs +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/HizObjectsManager.cs @@ -163,7 +163,7 @@ namespace X.Rendering.Feature for (var i = 0; i < renderers.Count; i++) { var rdr = renderers[i]; - if (rdr) + if (rdr && cullResult.ResultArray.IsCreated) { bool needCull = cullResult.ResultArray[i] > 0; #if UNITY_EDITOR diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/CopyDepth.shader b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/CopyDepth.shader index 783bd91..ee6e7fc 100644 --- a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/CopyDepth.shader +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/CopyDepth.shader @@ -10,7 +10,7 @@ Shader "Hidden/CopyDepth" HLSLPROGRAM #pragma vertex Vertex #pragma fragment DownsampleDepthFrag - +#pragma enable_d3d11_debug_symbols #include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Core.hlsl" struct Attributes diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/DepthPyramid.compute b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/DepthPyramid.compute index e554b83..849738e 100644 --- a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/DepthPyramid.compute +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/DepthPyramid.compute @@ -2,15 +2,18 @@ #pragma kernel CSMainMipmapGroup0 #pragma kernel CSMainMipmapGroup0 _INPUTE_DEPTH_TEXTURE #pragma kernel CSMainMipmapGroup0 _INPUTE_DEPTH_TEXTURE _SKIP_3_MIP +#pragma enable_d3d11_debug_symbols +#pragma target 5.0 Texture2D _InputDepth; RWTexture2D _DepthMipChain; +#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" #if UNITY_REVERSED_Z -#define MIN_DEPTH(l, r) min(l, r) +# define MIN_DEPTH(l, r) min(l, r) #else -#define MIN_DEPTH(l, r) max(l, r) +# define MIN_DEPTH(l, r) max(l, r) #endif #define GROUP_TILE_SIZE 8 @@ -23,27 +26,27 @@ groupshared float _LDSDepths[GROUP_TILE_SIZE * GROUP_TILE_SIZE]; void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint groupThreadIndex : SV_GroupIndex, uint2 groupId : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID) { uint2 curMipSize = _MipOffsetAndSizeArray[0].zw; - //Mip0从InputeTexture贴图下采样 - float2 uv = dispatchThreadId.xy / float2(curMipSize); - uv = clamp(uv, 0, 1); - int2 texCrood = _InputMipOffsetAndSize.xy + uv * (_InputMipOffsetAndSize.zw); - uint2 maxIndex = _InputMipOffsetAndSize.xy + _InputMipOffsetAndSize.zw - 1; -#ifdef _INPUTE_DEPTH_TEXTURE //第一次Dispath使用Depth Texture - float p00 = _InputDepth[min(texCrood + uint2(0, 0), maxIndex)]; - float p01 = _InputDepth[min(texCrood + uint2(1, 0), maxIndex)]; - float p10 = _InputDepth[min(texCrood + uint2(0, 1), maxIndex)]; - float p11 = _InputDepth[min(texCrood + uint2(1, 1), maxIndex)]; -#else - float p00 = _DepthMipChain[min(texCrood + uint2(0, 0), maxIndex)]; - float p01 = _DepthMipChain[min(texCrood + uint2(1, 0), maxIndex)]; - float p10 = _DepthMipChain[min(texCrood + uint2(0, 1), maxIndex)]; - float p11 = _DepthMipChain[min(texCrood + uint2(1, 1), maxIndex)]; -#endif - float4 depths = float4(p00, p10, p01, p11); - float minDepth = MIN_DEPTH(MIN_DEPTH(depths.x, depths.y), MIN_DEPTH(depths.z, depths.w)); - if (all(dispatchThreadId.xy < curMipSize.xy)) { + //Mip0从InputeTexture贴图下采样 + float2 uv = dispatchThreadId.xy / float2(curMipSize); + uv = clamp(uv, 0, 1); + int3 texCrood = int3(_InputMipOffsetAndSize.xy + uv * (_InputMipOffsetAndSize.zw), 0); + uint2 maxIndex = int3(_InputMipOffsetAndSize.xy + _InputMipOffsetAndSize.zw - 1,0); + #ifdef _INPUTE_DEPTH_TEXTURE //第一次Dispath使用Depth Texture + float p00 = _InputDepth[min(texCrood + uint3(0, 0, 0), maxIndex)]; + float p01 = _InputDepth[min(texCrood + uint3(1, 0, 0), maxIndex)]; + float p10 = _InputDepth[min(texCrood + uint3(0, 1, 0), maxIndex)]; + float p11 = _InputDepth[min(texCrood + uint3(1, 1, 0), maxIndex)]; + #else + float p00 = _DepthMipChain[min(texCrood + uint3(0, 0, 0), maxIndex)]; + float p01 = _DepthMipChain[min(texCrood + uint3(1, 0, 0), maxIndex)]; + float p10 = _DepthMipChain[min(texCrood + uint3(0, 1, 0), maxIndex)]; + float p11 = _DepthMipChain[min(texCrood + uint3(1, 1, 0), maxIndex)]; + #endif + float4 depths = float4(p00, p10, p01, p11); + float minDepth = MIN_DEPTH(MIN_DEPTH(depths.x, depths.y), MIN_DEPTH(depths.z, depths.w)); + #ifndef _SKIP_3_MIP //跳过前3级,不输出到RT _DepthMipChain[_MipOffsetAndSizeArray[0].xy + dispatchThreadId.xy] = minDepth; #endif @@ -80,6 +83,11 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group int threadIndex = xy.x + xy.y * TileSize; _LDSDepths[threadIndex] = furthestDeviceZ; } + else + { + int threadIndex = xy.x + xy.y * TileSize; + _LDSDepths[threadIndex] = 0; + } //Mip2 GroupMemoryBarrierWithGroupSync(); @@ -113,7 +121,11 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group int threadIndex = xy.x + xy.y * TileSize; _LDSDepths[threadIndex] = furthestDeviceZ; } - + else + { + int threadIndex = xy.x + xy.y * TileSize; + _LDSDepths[threadIndex] = 0; + } //Mip3 GroupMemoryBarrierWithGroupSync(); @@ -132,7 +144,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group parentFurthestDeviceZ.z = _LDSDepths[index2]; parentFurthestDeviceZ.w = _LDSDepths[index3]; - GroupMemoryBarrierWithGroupSync(); + // GroupMemoryBarrierWithGroupSync(); furthestDeviceZ = MIN_DEPTH(MIN_DEPTH(parentFurthestDeviceZ.x, parentFurthestDeviceZ.y), MIN_DEPTH(parentFurthestDeviceZ.z, parentFurthestDeviceZ.w)); localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize)); //因为过滤了3/4的线程组,所以线程索引需要往前补上 diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/HizCull.shader b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/HizCull.shader index 68438cb..24f611b 100644 --- a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/HizCull.shader +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/HizCull.shader @@ -11,6 +11,7 @@ #pragma vertex Vertex #pragma fragment CullFrag #pragma enable_d3d11_debug_symbols + #pragma target 5.0 #include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/Core.hlsl" @@ -116,7 +117,7 @@ float2 size = (boxUVs.zw - boxUVs.xy) * _Mip0Size.xy; float mip = (log2(max(size.x, size.y))); - mip = ceil(mip) ; + mip = ceil(mip); mip = clamp(mip, _MipmapLevelMinMaxIndex.x, _MipmapLevelMinMaxIndex.y); // float level_lower = max(mip - 1, 0); @@ -128,10 +129,9 @@ // // Use the lower level if we only touch <= 2 texels in both dimensions // if (dims.x <= 2 && dims.y <= 2) // mip = level_lower; - + float4 offsetAndSize = _MipOffsetAndSize[mip]; int4 pxMinMax = boxUVs * offsetAndSize.zwzw + offsetAndSize.xyxy; - float4 depth = float4(LOAD_TEXTURE2D_LOD(_DepthPyramidTexture, pxMinMax.xy, 0).r, LOAD_TEXTURE2D_LOD(_DepthPyramidTexture, pxMinMax.zy, 0).r, LOAD_TEXTURE2D_LOD(_DepthPyramidTexture, pxMinMax.xw, 0).r, diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/SPD.compute b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/SPD.compute index 1e792e0..66dfc43 100644 --- a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/SPD.compute +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/SPD.compute @@ -2,6 +2,7 @@ #pragma kernel CSMain #define FFX_SPD_NO_WAVE_OPERATIONS 1 // #pragma multi_compile FFX_SPD_NO_WAVE_OPERATIONS _ +#pragma enable_d3d11_debug_symbols #define FFX_GPU 1 #define FFX_HLSL 1 @@ -13,6 +14,8 @@ #include "ffx/ffx_core.h" #include "ffx/ffx_common_types.h" +#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" + FFX_GROUPSHARED FfxFloat32 spdIntermediate[16][16]; void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) { @@ -35,18 +38,18 @@ FfxFloat32x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) { } FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) { - #if FFX_SSSR_OPTION_INVERTED_DEPTH - return max(max(v0, v1), max(v2, v3)); - #else + #if UNITY_REVERSED_Z return min(min(v0, v1), min(v2, v3)); + #else + return max(max(v0, v1), max(v2, v3)); #endif } FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) { - #if FFX_SSSR_OPTION_INVERTED_DEPTH - return max(max(v0, v1), max(v2, v3)); - #else + #if UNITY_REVERSED_Z return min(min(v0, v1), min(v2, v3)); + #else + return max(max(v0, v1), max(v2, v3)); #endif } Texture2D _InputDepth; @@ -55,7 +58,8 @@ float4 _MipOffsetAndSizeArray[16]; FfxFloat32x4 SpdLoad(FfxInt32x2 coordinate, FfxUInt32 slice) { - return _InputDepth[coordinate].xxxx; // 5 -> 6 as we store a copy of the depth buffer at index 0 + uint4 cur = _MipOffsetAndSizeArray[6]; + return _OutDepth[coordinate + cur.xy].xxxx; // 5 -> 6 as we store a copy of the depth buffer at index 0 } void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 coordinate, FfxUInt32 slice) @@ -66,7 +70,8 @@ void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 coordinate, FfxUI FfxFloat32x4 SpdLoadH(FfxInt32x2 coordinate, FfxUInt32 slice) { - return _InputDepth[coordinate].xxxx; // 5 -> 6 as we store a copy of the depth buffer at index 0 + uint4 cur = _MipOffsetAndSizeArray[6]; + return _OutDepth[coordinate + cur.xy].xxxx; // 5 -> 6 as we store a copy of the depth buffer at index 0 } void SpdStoreH(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 coordinate, FfxUInt32 slice) diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/ffx/ffx_spd.h b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/ffx/ffx_spd.h index c3ee50f..6226945 100644 --- a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/ffx/ffx_spd.h +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/ffx/ffx_spd.h @@ -971,12 +971,12 @@ void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, Ffx // compute MIP level 0 and 1 SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips, slice); - // compute MIP level 2, 3, 4, 5 + // // compute MIP level 2, 3, 4, 5 SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips, slice); - + // if (mips < 7) return; - + // increase the global atomic counter for the given slice and check if it's the last remaining thread group: // terminate if not, continue if yes. if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice)) @@ -990,7 +990,7 @@ void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, Ffx SpdDownsampleMips_6_7H(x, y, mips, slice); // compute MIP level 8, 9, 10, 11 - SpdDownsampleNextFourH(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice); + SpdDownsampleNextFourH(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice); } /// Downsamples a 64x64 tile based on the work group id and work group offset.