fix compute depth downsample

This commit is contained in:
StarBeats 2025-06-25 12:08:14 +08:00
parent 4fa3eca91e
commit 3b95af03c8
2 changed files with 30 additions and 39 deletions

View File

@ -420,7 +420,6 @@ namespace X.Rendering.Feature
// cmd.SetExecutionFlags(CommandBufferExecutionFlags.AsyncCompute); // cmd.SetExecutionFlags(CommandBufferExecutionFlags.AsyncCompute);
cmd.DispatchCompute(settings.ComputeShader, kernelId, Mathf.CeilToInt(outputMipSize.x / 8f), Mathf.CeilToInt(outputMipSize.y / 8f), 1); cmd.DispatchCompute(settings.ComputeShader, kernelId, Mathf.CeilToInt(outputMipSize.x / 8f), Mathf.CeilToInt(outputMipSize.y / 8f), 1);
mipCnt = mipCnt - 4; mipCnt = mipCnt - 4;
break;
} }
cmd.EndSample("Depth-Downsample"); cmd.EndSample("Depth-Downsample");

View File

@ -22,31 +22,32 @@ float4 _InputMipOffsetAndSize;
float _MipCount; float _MipCount;
groupshared float _LDSDepths[GROUP_TILE_SIZE * GROUP_TILE_SIZE]; groupshared float _LDSDepths[GROUP_TILE_SIZE * GROUP_TILE_SIZE];
[numthreads(GROUP_TILE_SIZE, GROUP_TILE_SIZE, 1)] [numthreads(GROUP_TILE_SIZE, GROUP_TILE_SIZE, 1)]
void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint groupThreadIndex : SV_GroupIndex, uint2 groupId : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID) void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint groupThreadIndex : SV_GroupIndex, uint2 groupId : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID)
{ {
uint2 curMipSize = _MipOffsetAndSizeArray[0].zw; uint2 curMipSize = _MipOffsetAndSizeArray[0].zw;
//Mip0从InputeTexture贴图下采样
float2 uv = dispatchThreadId.xy / float2(curMipSize);
uv = clamp(uv, 0, 1);
int2 texCrood = _InputMipOffsetAndSize.xy + uv * (_InputMipOffsetAndSize.zw);
uint2 maxIndex = _InputMipOffsetAndSize.xy + _InputMipOffsetAndSize.zw - 1;
#ifdef _INPUTE_DEPTH_TEXTURE //第一次Dispath使用Depth Texture
float p00 = _InputDepth[min(texCrood + uint2(0, 0), maxIndex)];
float p01 = _InputDepth[min(texCrood + uint2(1, 0), maxIndex)];
float p10 = _InputDepth[min(texCrood + uint2(0, 1), maxIndex)];
float p11 = _InputDepth[min(texCrood + uint2(1, 1), maxIndex)];
#else
float p00 = _DepthMipChain[min(texCrood + uint2(0, 0), maxIndex)];
float p01 = _DepthMipChain[min(texCrood + uint2(1, 0), maxIndex)];
float p10 = _DepthMipChain[min(texCrood + uint2(0, 1), maxIndex)];
float p11 = _DepthMipChain[min(texCrood + uint2(1, 1), maxIndex)];
#endif
float4 depths = float4(p00, p10, p01, p11);
float minDepth = MIN_DEPTH(MIN_DEPTH(depths.x, depths.y), MIN_DEPTH(depths.z, depths.w));
if (all(dispatchThreadId.xy < curMipSize.xy)) if (all(dispatchThreadId.xy < curMipSize.xy))
{ {
//Mip0从InputeTexture贴图下采样
float2 uv = dispatchThreadId.xy / float2(curMipSize);
uv = clamp(uv, 0, 1);
int3 texCrood = int3(_InputMipOffsetAndSize.xy + uv * (_InputMipOffsetAndSize.zw), 0);
uint2 maxIndex = int3(_InputMipOffsetAndSize.xy + _InputMipOffsetAndSize.zw - 1,0);
#ifdef _INPUTE_DEPTH_TEXTURE //第一次Dispath使用Depth Texture
float p00 = _InputDepth[min(texCrood + uint3(0, 0, 0), maxIndex)];
float p01 = _InputDepth[min(texCrood + uint3(1, 0, 0), maxIndex)];
float p10 = _InputDepth[min(texCrood + uint3(0, 1, 0), maxIndex)];
float p11 = _InputDepth[min(texCrood + uint3(1, 1, 0), maxIndex)];
#else
float p00 = _DepthMipChain[min(texCrood + uint3(0, 0, 0), maxIndex)];
float p01 = _DepthMipChain[min(texCrood + uint3(1, 0, 0), maxIndex)];
float p10 = _DepthMipChain[min(texCrood + uint3(0, 1, 0), maxIndex)];
float p11 = _DepthMipChain[min(texCrood + uint3(1, 1, 0), maxIndex)];
#endif
float4 depths = float4(p00, p10, p01, p11);
float minDepth = MIN_DEPTH(MIN_DEPTH(depths.x, depths.y), MIN_DEPTH(depths.z, depths.w));
#ifndef _SKIP_3_MIP //跳过前3级,不输出到RT #ifndef _SKIP_3_MIP //跳过前3级,不输出到RT
_DepthMipChain[_MipOffsetAndSizeArray[0].xy + dispatchThreadId.xy] = minDepth; _DepthMipChain[_MipOffsetAndSizeArray[0].xy + dispatchThreadId.xy] = minDepth;
#endif #endif
@ -59,7 +60,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
uint TileSize = GROUP_TILE_SIZE / 2; //线程数减半 uint TileSize = GROUP_TILE_SIZE / 2; //线程数减半
float4 parentFurthestDeviceZ; float4 parentFurthestDeviceZ;
uint2 xy = min(groupThreadID.xy, TileSize - 1); uint2 xy = min(groupThreadID.xy, TileSize - 1);
uint2 xy2 = xy * 2; //间隔索引:[0,2,4,6] uint2 xy2 = xy * 2;//间隔索引:[0,2,4,6]
uint index0 = xy2.x + xy2.y * preMipTileSize; uint index0 = xy2.x + xy2.y * preMipTileSize;
uint index1 = (xy2.x + 1) + xy2.y * preMipTileSize; uint index1 = (xy2.x + 1) + xy2.y * preMipTileSize;
uint index2 = xy2.x + (xy2.y + 1) * preMipTileSize; uint index2 = xy2.x + (xy2.y + 1) * preMipTileSize;
@ -71,7 +72,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
parentFurthestDeviceZ.w = _LDSDepths[index3]; parentFurthestDeviceZ.w = _LDSDepths[index3];
float furthestDeviceZ = MIN_DEPTH(MIN_DEPTH(parentFurthestDeviceZ.x, parentFurthestDeviceZ.y), MIN_DEPTH(parentFurthestDeviceZ.z, parentFurthestDeviceZ.w)); float furthestDeviceZ = MIN_DEPTH(MIN_DEPTH(parentFurthestDeviceZ.x, parentFurthestDeviceZ.y), MIN_DEPTH(parentFurthestDeviceZ.z, parentFurthestDeviceZ.w));
uint2 localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize)); //因为过滤了3/4的线程组,所以线程索引需要往前补上 uint2 localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize));//因为过滤了3/4的线程组,所以线程索引需要往前补上
if (all(localIndex < curMipSize.xy) //索引在贴图范围内的 if (all(localIndex < curMipSize.xy) //索引在贴图范围内的
&& all(groupThreadID.xy < TileSize) //每次每个线程组保留1/4的线程 && all(groupThreadID.xy < TileSize) //每次每个线程组保留1/4的线程
@ -83,11 +84,6 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
int threadIndex = xy.x + xy.y * TileSize; int threadIndex = xy.x + xy.y * TileSize;
_LDSDepths[threadIndex] = furthestDeviceZ; _LDSDepths[threadIndex] = furthestDeviceZ;
} }
else
{
int threadIndex = xy.x + xy.y * TileSize;
_LDSDepths[threadIndex] = 0;
}
//Mip2 //Mip2
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
@ -96,7 +92,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
preMipTileSize = TileSize; preMipTileSize = TileSize;
TileSize = TileSize / 2; //线程数减半 TileSize = TileSize / 2; //线程数减半
xy = min(groupThreadID.xy, TileSize - 1); xy = min(groupThreadID.xy, TileSize - 1);
xy2 = xy * 2; //间隔索引:[0,2,4,6] xy2 = xy * 2;//间隔索引:[0,2,4,6]
index0 = xy2.x + xy2.y * preMipTileSize; index0 = xy2.x + xy2.y * preMipTileSize;
index1 = (xy2.x + 1) + xy2.y * preMipTileSize; index1 = (xy2.x + 1) + xy2.y * preMipTileSize;
index2 = xy2.x + (xy2.y + 1) * preMipTileSize; index2 = xy2.x + (xy2.y + 1) * preMipTileSize;
@ -109,7 +105,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
furthestDeviceZ = MIN_DEPTH(MIN_DEPTH(parentFurthestDeviceZ.x, parentFurthestDeviceZ.y), MIN_DEPTH(parentFurthestDeviceZ.z, parentFurthestDeviceZ.w)); furthestDeviceZ = MIN_DEPTH(MIN_DEPTH(parentFurthestDeviceZ.x, parentFurthestDeviceZ.y), MIN_DEPTH(parentFurthestDeviceZ.z, parentFurthestDeviceZ.w));
localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize)); //因为过滤了3/4的线程组,所以线程索引需要往前补上 localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize));//因为过滤了3/4的线程组,所以线程索引需要往前补上
if (all(localIndex < curMipSize.xy) //索引在贴图范围内的 if (all(localIndex < curMipSize.xy) //索引在贴图范围内的
&& all(groupThreadID.xy < TileSize) //每次每个线程组保留1/4的线程 && all(groupThreadID.xy < TileSize) //每次每个线程组保留1/4的线程
@ -121,11 +117,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
int threadIndex = xy.x + xy.y * TileSize; int threadIndex = xy.x + xy.y * TileSize;
_LDSDepths[threadIndex] = furthestDeviceZ; _LDSDepths[threadIndex] = furthestDeviceZ;
} }
else
{
int threadIndex = xy.x + xy.y * TileSize;
_LDSDepths[threadIndex] = 0;
}
//Mip3 //Mip3
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
@ -133,7 +125,7 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
preMipTileSize = TileSize; preMipTileSize = TileSize;
TileSize = TileSize / 2; //线程数减半 TileSize = TileSize / 2; //线程数减半
xy = min(groupThreadID.xy, TileSize - 1); xy = min(groupThreadID.xy, TileSize - 1);
xy2 = xy * 2; //间隔索引:[0,2,4,6] xy2 = xy * 2;//间隔索引:[0,2,4,6]
index0 = xy2.x + xy2.y * preMipTileSize; index0 = xy2.x + xy2.y * preMipTileSize;
index1 = (xy2.x + 1) + xy2.y * preMipTileSize; index1 = (xy2.x + 1) + xy2.y * preMipTileSize;
index2 = xy2.x + (xy2.y + 1) * preMipTileSize; index2 = xy2.x + (xy2.y + 1) * preMipTileSize;
@ -144,9 +136,9 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
parentFurthestDeviceZ.z = _LDSDepths[index2]; parentFurthestDeviceZ.z = _LDSDepths[index2];
parentFurthestDeviceZ.w = _LDSDepths[index3]; parentFurthestDeviceZ.w = _LDSDepths[index3];
// GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
furthestDeviceZ = MIN_DEPTH(MIN_DEPTH(parentFurthestDeviceZ.x, parentFurthestDeviceZ.y), MIN_DEPTH(parentFurthestDeviceZ.z, parentFurthestDeviceZ.w)); furthestDeviceZ = MIN_DEPTH(MIN_DEPTH(parentFurthestDeviceZ.x, parentFurthestDeviceZ.y), MIN_DEPTH(parentFurthestDeviceZ.z, parentFurthestDeviceZ.w));
localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize)); //因为过滤了3/4的线程组,所以线程索引需要往前补上 localIndex = dispatchThreadId.xy - (groupId.xy * (GROUP_TILE_SIZE - TileSize));//因为过滤了3/4的线程组,所以线程索引需要往前补上
if (all(localIndex < curMipSize.xy) //索引在贴图范围内的 if (all(localIndex < curMipSize.xy) //索引在贴图范围内的
&& all(groupThreadID.xy < TileSize) //每次每个线程组保留1/4的线程 && all(groupThreadID.xy < TileSize) //每次每个线程组保留1/4的线程
@ -154,4 +146,4 @@ void CSMainMipmapGroup0(uint3 dispatchThreadId : SV_DispatchThreadID, uint group
{ {
_DepthMipChain[_MipOffsetAndSizeArray[3].xy + localIndex] = furthestDeviceZ; _DepthMipChain[_MipOffsetAndSizeArray[3].xy + localIndex] = furthestDeviceZ;
} }
} }