fix compute depth downsample

2025-06-25 12:08:14 +08:00 · 2025-06-25 12:08:14 +08:00 · 3b95af03c8
commit 3b95af03c8
parent 4fa3eca91e
2 changed files with 30 additions and 39 deletions
--- a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/HierarchicalZOcclusionCullFeature.cs
+++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/HierarchicalZOcclusionCullFeature.cs
@ -420,7 +420,6 @@ namespace X.Rendering.Feature
                    // cmd.SetExecutionFlags(CommandBufferExecutionFlags.AsyncCompute);
                    cmd.DispatchCompute(settings.ComputeShader, kernelId, Mathf.CeilToInt(outputMipSize.x / 8f), Mathf.CeilToInt(outputMipSize.y / 8f), 1);
                    mipCnt = mipCnt - 4;
                    break;
                }
                cmd.EndSample("Depth-Downsample");
--- a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/DepthPyramid.compute
+++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/DepthPyramid.compute
@ -22,31 +22,32 @@ float4 _InputMipOffsetAndSize;
 float _MipCount;
 groupshared float _LDSDepths[GROUP_TILE_SIZE * GROUP_TILE_SIZE];
 [numthreads(GROUP_TILE_SIZE, GROUP_TILE_SIZE, 1)]
-void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint groupThreadIndex : SV_GroupIndex, uint2 groupId : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID)
+void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint groupThreadIndex : SV_GroupIndex, uint2 groupId : SV_GroupID,  uint3 groupThreadID : SV_GroupThreadID)
 {
    uint2 curMipSize = _MipOffsetAndSizeArray[0].zw;
    //Mip0从InputeTexture贴图下采样
    float2 uv = dispatchThreadId.xy / float2(curMipSize);
    uv = clamp(uv, 0, 1);
    int2 texCrood = _InputMipOffsetAndSize.xy + uv * (_InputMipOffsetAndSize.zw);
    uint2 maxIndex = _InputMipOffsetAndSize.xy + _InputMipOffsetAndSize.zw - 1;
 #ifdef _INPUTE_DEPTH_TEXTURE //第一次Dispath使用Depth Texture
    float p00 = _InputDepth[min(texCrood + uint2(0, 0), maxIndex)];
    float p01 = _InputDepth[min(texCrood + uint2(1, 0), maxIndex)];
    float p10 = _InputDepth[min(texCrood + uint2(0, 1), maxIndex)];
    float p11 = _InputDepth[min(texCrood + uint2(1, 1), maxIndex)];
 #else
    float p00 = _DepthMipChain[min(texCrood + uint2(0, 0), maxIndex)];
    float p01 = _DepthMipChain[min(texCrood + uint2(1, 0), maxIndex)];
    float p10 = _DepthMipChain[min(texCrood + uint2(0, 1), maxIndex)];
    float p11 = _DepthMipChain[min(texCrood + uint2(1, 1), maxIndex)];
 #endif
    float4 depths = float4(p00, p10, p01, p11);
    float minDepth = MIN_DEPTH(MIN_DEPTH(depths.x, depths.y), MIN_DEPTH(depths.z, depths.w));
    if (all(dispatchThreadId.xy < curMipSize.xy))
    {
        //Mip0从InputeTexture贴图下采样
        float2 uv = dispatchThreadId.xy / float2(curMipSize);
        uv = clamp(uv, 0, 1);
        int3 texCrood = int3(_InputMipOffsetAndSize.xy + uv * (_InputMipOffsetAndSize.zw), 0);
        uint2 maxIndex =  int3(_InputMipOffsetAndSize.xy + _InputMipOffsetAndSize.zw - 1,0);
    #ifdef _INPUTE_DEPTH_TEXTURE //第一次Dispath使用Depth Texture
        float p00 = _InputDepth[min(texCrood + uint3(0, 0, 0), maxIndex)];
        float p01 = _InputDepth[min(texCrood + uint3(1, 0, 0), maxIndex)];
        float p10 = _InputDepth[min(texCrood + uint3(0, 1, 0), maxIndex)];
        float p11 = _InputDepth[min(texCrood + uint3(1, 1, 0), maxIndex)];
    #else
        float p00 = _DepthMipChain[min(texCrood + uint3(0, 0, 0), maxIndex)];
        float p01 = _DepthMipChain[min(texCrood + uint3(1, 0, 0), maxIndex)];
        float p10 = _DepthMipChain[min(texCrood + uint3(0, 1, 0), maxIndex)];
        float p11 = _DepthMipChain[min(texCrood + uint3(1, 1, 0), maxIndex)];
    #endif
        float4 depths = float4(p00, p10, p01, p11);
        float minDepth = MIN_DEPTH(MIN_DEPTH(depths.x, depths.y), MIN_DEPTH(depths.z, depths.w));
 #ifndef _SKIP_3_MIP //跳过前3级,不输出到RT
        _DepthMipChain[_MipOffsetAndSizeArray[0].xy + dispatchThreadId.xy] = minDepth;
 #endif
@ -59,7 +60,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
    uint TileSize = GROUP_TILE_SIZE / 2; //线程数减半
    float4 parentFurthestDeviceZ;
    uint2 xy = min(groupThreadID.xy, TileSize - 1);
-    uint2 xy2 = xy * 2; //间隔索引:[0,2,4,6]
+    uint2 xy2 = xy * 2;//间隔索引:[0,2,4,6]
    uint index0 = xy2.x + xy2.y * preMipTileSize;
    uint index1 = (xy2.x + 1) + xy2.y * preMipTileSize;
    uint index2 = xy2.x + (xy2.y + 1) * preMipTileSize;
@ -71,7 +72,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
    parentFurthestDeviceZ.w = _LDSDepths[index3];
    float furthestDeviceZ = MIN_DEPTH(MIN_DEPTH(parentFurthestDeviceZ.x, parentFurthestDeviceZ.y), MIN_DEPTH(parentFurthestDeviceZ.z, parentFurthestDeviceZ.w));
-    uint2 localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize)); //因为过滤了3/4的线程组,所以线程索引需要往前补上 
+    uint2 localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize));//因为过滤了3/4的线程组,所以线程索引需要往前补上 
    if (all(localIndex < curMipSize.xy) //索引在贴图范围内的
        && all(groupThreadID.xy < TileSize) //每次每个线程组保留1/4的线程
@ -83,11 +84,6 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
        int threadIndex = xy.x + xy.y * TileSize;
        _LDSDepths[threadIndex] = furthestDeviceZ;
    }
    else
    {
        int threadIndex = xy.x + xy.y * TileSize;
        _LDSDepths[threadIndex] = 0;
    }
    //Mip2
    GroupMemoryBarrierWithGroupSync();
@ -96,7 +92,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
    preMipTileSize = TileSize;
    TileSize = TileSize / 2; //线程数减半
    xy = min(groupThreadID.xy, TileSize - 1);
-    xy2 = xy * 2; //间隔索引:[0,2,4,6]
+    xy2 = xy * 2;//间隔索引:[0,2,4,6]
    index0 = xy2.x + xy2.y * preMipTileSize;
    index1 = (xy2.x + 1) + xy2.y * preMipTileSize;
    index2 = xy2.x + (xy2.y + 1) * preMipTileSize;
@ -109,7 +105,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
    GroupMemoryBarrierWithGroupSync();
    furthestDeviceZ = MIN_DEPTH(MIN_DEPTH(parentFurthestDeviceZ.x, parentFurthestDeviceZ.y), MIN_DEPTH(parentFurthestDeviceZ.z, parentFurthestDeviceZ.w));
-    localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize)); //因为过滤了3/4的线程组,所以线程索引需要往前补上 
+    localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize));//因为过滤了3/4的线程组,所以线程索引需要往前补上 
    if (all(localIndex < curMipSize.xy) //索引在贴图范围内的
        && all(groupThreadID.xy < TileSize) //每次每个线程组保留1/4的线程
@ -121,11 +117,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
        int threadIndex = xy.x + xy.y * TileSize;
        _LDSDepths[threadIndex] = furthestDeviceZ;
    }
-    else
+
    {
        int threadIndex = xy.x + xy.y * TileSize;
        _LDSDepths[threadIndex] = 0;
    }
    //Mip3
    GroupMemoryBarrierWithGroupSync();
@ -133,7 +125,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
    preMipTileSize = TileSize;
    TileSize = TileSize / 2; //线程数减半
    xy = min(groupThreadID.xy, TileSize - 1);
-    xy2 = xy * 2; //间隔索引:[0,2,4,6]
+    xy2 = xy * 2;//间隔索引:[0,2,4,6]
    index0 = xy2.x + xy2.y * preMipTileSize;
    index1 = (xy2.x + 1) + xy2.y * preMipTileSize;
    index2 = xy2.x + (xy2.y + 1) * preMipTileSize;
@ -144,9 +136,9 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
    parentFurthestDeviceZ.z = _LDSDepths[index2];
    parentFurthestDeviceZ.w = _LDSDepths[index3];
-    // GroupMemoryBarrierWithGroupSync();
+    GroupMemoryBarrierWithGroupSync();
    furthestDeviceZ = MIN_DEPTH(MIN_DEPTH(parentFurthestDeviceZ.x, parentFurthestDeviceZ.y), MIN_DEPTH(parentFurthestDeviceZ.z, parentFurthestDeviceZ.w));
-    localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize)); //因为过滤了3/4的线程组,所以线程索引需要往前补上 
+    localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize));//因为过滤了3/4的线程组,所以线程索引需要往前补上 
    if (all(localIndex < curMipSize.xy) //索引在贴图范围内的
        && all(groupThreadID.xy < TileSize) //每次每个线程组保留1/4的线程
@ -154,4 +146,4 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
    {
        _DepthMipChain[_MipOffsetAndSizeArray[3].xy + localIndex] = furthestDeviceZ;
    }
-}
+}