diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/HierarchicalZOcclusionCullFeature.cs b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/HierarchicalZOcclusionCullFeature.cs index 10106ce..58b72df 100644 --- a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/HierarchicalZOcclusionCullFeature.cs +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/HierarchicalZOcclusionCullFeature.cs @@ -420,7 +420,6 @@ namespace X.Rendering.Feature // cmd.SetExecutionFlags(CommandBufferExecutionFlags.AsyncCompute); cmd.DispatchCompute(settings.ComputeShader, kernelId, Mathf.CeilToInt(outputMipSize.x / 8f), Mathf.CeilToInt(outputMipSize.y / 8f), 1); mipCnt = mipCnt - 4; - break; } cmd.EndSample("Depth-Downsample"); diff --git a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/DepthPyramid.compute b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/DepthPyramid.compute index 849738e..4b252f8 100644 --- a/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/DepthPyramid.compute +++ b/Packages/com.unity.render-pipelines.universal@14.0.11/Runtime/OcclusionCulling/Shaders/DepthPyramid.compute @@ -22,31 +22,32 @@ float4 _InputMipOffsetAndSize; float _MipCount; groupshared float _LDSDepths[GROUP_TILE_SIZE * GROUP_TILE_SIZE]; + [numthreads(GROUP_TILE_SIZE, GROUP_TILE_SIZE, 1)] -void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint groupThreadIndex : SV_GroupIndex, uint2 groupId : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID) +void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint groupThreadIndex : SV_GroupIndex, uint2 groupId : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID) { uint2 curMipSize = _MipOffsetAndSizeArray[0].zw; + //Mip0从InputeTexture贴图下采样 + float2 uv = dispatchThreadId.xy / float2(curMipSize); + uv = clamp(uv, 0, 1); + int2 texCrood = _InputMipOffsetAndSize.xy + uv * (_InputMipOffsetAndSize.zw); + uint2 maxIndex = _InputMipOffsetAndSize.xy + _InputMipOffsetAndSize.zw - 1; +#ifdef _INPUTE_DEPTH_TEXTURE //第一次Dispath使用Depth Texture + float p00 = _InputDepth[min(texCrood + uint2(0, 0), maxIndex)]; + float p01 = _InputDepth[min(texCrood + uint2(1, 0), maxIndex)]; + float p10 = _InputDepth[min(texCrood + uint2(0, 1), maxIndex)]; + float p11 = _InputDepth[min(texCrood + uint2(1, 1), maxIndex)]; +#else + float p00 = _DepthMipChain[min(texCrood + uint2(0, 0), maxIndex)]; + float p01 = _DepthMipChain[min(texCrood + uint2(1, 0), maxIndex)]; + float p10 = _DepthMipChain[min(texCrood + uint2(0, 1), maxIndex)]; + float p11 = _DepthMipChain[min(texCrood + uint2(1, 1), maxIndex)]; +#endif + float4 depths = float4(p00, p10, p01, p11); + float minDepth = MIN_DEPTH(MIN_DEPTH(depths.x, depths.y), MIN_DEPTH(depths.z, depths.w)); + if (all(dispatchThreadId.xy < curMipSize.xy)) { - //Mip0从InputeTexture贴图下采样 - float2 uv = dispatchThreadId.xy / float2(curMipSize); - uv = clamp(uv, 0, 1); - int3 texCrood = int3(_InputMipOffsetAndSize.xy + uv * (_InputMipOffsetAndSize.zw), 0); - uint2 maxIndex = int3(_InputMipOffsetAndSize.xy + _InputMipOffsetAndSize.zw - 1,0); - #ifdef _INPUTE_DEPTH_TEXTURE //第一次Dispath使用Depth Texture - float p00 = _InputDepth[min(texCrood + uint3(0, 0, 0), maxIndex)]; - float p01 = _InputDepth[min(texCrood + uint3(1, 0, 0), maxIndex)]; - float p10 = _InputDepth[min(texCrood + uint3(0, 1, 0), maxIndex)]; - float p11 = _InputDepth[min(texCrood + uint3(1, 1, 0), maxIndex)]; - #else - float p00 = _DepthMipChain[min(texCrood + uint3(0, 0, 0), maxIndex)]; - float p01 = _DepthMipChain[min(texCrood + uint3(1, 0, 0), maxIndex)]; - float p10 = _DepthMipChain[min(texCrood + uint3(0, 1, 0), maxIndex)]; - float p11 = _DepthMipChain[min(texCrood + uint3(1, 1, 0), maxIndex)]; - #endif - float4 depths = float4(p00, p10, p01, p11); - float minDepth = MIN_DEPTH(MIN_DEPTH(depths.x, depths.y), MIN_DEPTH(depths.z, depths.w)); - #ifndef _SKIP_3_MIP //跳过前3级,不输出到RT _DepthMipChain[_MipOffsetAndSizeArray[0].xy + dispatchThreadId.xy] = minDepth; #endif @@ -59,7 +60,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group uint TileSize = GROUP_TILE_SIZE / 2; //线程数减半 float4 parentFurthestDeviceZ; uint2 xy = min(groupThreadID.xy, TileSize - 1); - uint2 xy2 = xy * 2; //间隔索引:[0,2,4,6] + uint2 xy2 = xy * 2;//间隔索引:[0,2,4,6] uint index0 = xy2.x + xy2.y * preMipTileSize; uint index1 = (xy2.x + 1) + xy2.y * preMipTileSize; uint index2 = xy2.x + (xy2.y + 1) * preMipTileSize; @@ -71,7 +72,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group parentFurthestDeviceZ.w = _LDSDepths[index3]; float furthestDeviceZ = MIN_DEPTH(MIN_DEPTH(parentFurthestDeviceZ.x, parentFurthestDeviceZ.y), MIN_DEPTH(parentFurthestDeviceZ.z, parentFurthestDeviceZ.w)); - uint2 localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize)); //因为过滤了3/4的线程组,所以线程索引需要往前补上 + uint2 localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize));//因为过滤了3/4的线程组,所以线程索引需要往前补上 if (all(localIndex < curMipSize.xy) //索引在贴图范围内的 && all(groupThreadID.xy < TileSize) //每次每个线程组保留1/4的线程 @@ -83,11 +84,6 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group int threadIndex = xy.x + xy.y * TileSize; _LDSDepths[threadIndex] = furthestDeviceZ; } - else - { - int threadIndex = xy.x + xy.y * TileSize; - _LDSDepths[threadIndex] = 0; - } //Mip2 GroupMemoryBarrierWithGroupSync(); @@ -96,7 +92,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group preMipTileSize = TileSize; TileSize = TileSize / 2; //线程数减半 xy = min(groupThreadID.xy, TileSize - 1); - xy2 = xy * 2; //间隔索引:[0,2,4,6] + xy2 = xy * 2;//间隔索引:[0,2,4,6] index0 = xy2.x + xy2.y * preMipTileSize; index1 = (xy2.x + 1) + xy2.y * preMipTileSize; index2 = xy2.x + (xy2.y + 1) * preMipTileSize; @@ -109,7 +105,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group GroupMemoryBarrierWithGroupSync(); furthestDeviceZ = MIN_DEPTH(MIN_DEPTH(parentFurthestDeviceZ.x, parentFurthestDeviceZ.y), MIN_DEPTH(parentFurthestDeviceZ.z, parentFurthestDeviceZ.w)); - localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize)); //因为过滤了3/4的线程组,所以线程索引需要往前补上 + localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize));//因为过滤了3/4的线程组,所以线程索引需要往前补上 if (all(localIndex < curMipSize.xy) //索引在贴图范围内的 && all(groupThreadID.xy < TileSize) //每次每个线程组保留1/4的线程 @@ -121,11 +117,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group int threadIndex = xy.x + xy.y * TileSize; _LDSDepths[threadIndex] = furthestDeviceZ; } - else - { - int threadIndex = xy.x + xy.y * TileSize; - _LDSDepths[threadIndex] = 0; - } + //Mip3 GroupMemoryBarrierWithGroupSync(); @@ -133,7 +125,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group preMipTileSize = TileSize; TileSize = TileSize / 2; //线程数减半 xy = min(groupThreadID.xy, TileSize - 1); - xy2 = xy * 2; //间隔索引:[0,2,4,6] + xy2 = xy * 2;//间隔索引:[0,2,4,6] index0 = xy2.x + xy2.y * preMipTileSize; index1 = (xy2.x + 1) + xy2.y * preMipTileSize; index2 = xy2.x + (xy2.y + 1) * preMipTileSize; @@ -144,9 +136,9 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group parentFurthestDeviceZ.z = _LDSDepths[index2]; parentFurthestDeviceZ.w = _LDSDepths[index3]; - // GroupMemoryBarrierWithGroupSync(); + GroupMemoryBarrierWithGroupSync(); furthestDeviceZ = MIN_DEPTH(MIN_DEPTH(parentFurthestDeviceZ.x, parentFurthestDeviceZ.y), MIN_DEPTH(parentFurthestDeviceZ.z, parentFurthestDeviceZ.w)); - localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize)); //因为过滤了3/4的线程组,所以线程索引需要往前补上 + localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize));//因为过滤了3/4的线程组,所以线程索引需要往前补上 if (all(localIndex < curMipSize.xy) //索引在贴图范围内的 && all(groupThreadID.xy < TileSize) //每次每个线程组保留1/4的线程 @@ -154,4 +146,4 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group { _DepthMipChain[_MipOffsetAndSizeArray[3].xy + localIndex] = furthestDeviceZ; } -} \ No newline at end of file +}