Update GPU encoder to use DirectCompute 5 (#108)

This commit is contained in:
Chuck Walbourn
2023-03-20 10:49:19 -07:00
committed by GitHub
parent 8c47482183
commit 574745f290
4 changed files with 60 additions and 35 deletions

View File

@@ -982,6 +982,7 @@ void EncodeBlockCS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
}
}
#ifdef EMULATE_F16C
uint float2half1(float f)
{
uint Result;
@@ -1014,23 +1015,15 @@ uint float2half1(float f)
}
return (Result|Sign);
}
#endif
uint3 float2half(float3 endPoint_f)
{
//uint3 sign = asuint(endPoint_f) & 0x80000000;
//uint3 expo = asuint(endPoint_f) & 0x7F800000;
//uint3 base = asuint(endPoint_f) & 0x007FFFFF;
//return ( expo < 0x33800000 ) ? 0
// //0x33800000 indicating 2^-24, which is minimal denormalized number that half can present
// : ( ( expo < 0x38800000 ) ? ( sign >> 16 ) | ( ( base + 0x00800000 ) >> ( 23 - ( ( expo - 0x33800000 ) >> 23 ) ) )//fixed a bug in v0.2
// //0x38800000 indicating 2^-14, which is minimal normalized number that half can present, so need to use denormalized half presentation
// : ( ( expo == 0x7F800000 || expo > 0x47000000 ) ? ( ( sign >> 16 ) | 0x7bff )
// // treat NaN as INF, treat INF (including NaN) as the maximum/minimum number that half can present
// // 0x47000000 indicating 2^15, which is maximum exponent that half can present, so cut to 0x7bff which is the maximum half number
// : ( ( sign >> 16 ) | ( ( ( expo - 0x38000000 ) | base ) >> 13 ) ) ) );
#ifdef EMULATE_F16C
return uint3(float2half1(endPoint_f.x), float2half1(endPoint_f.y), float2half1(endPoint_f.z));
#else
return uint3(f32tof16(endPoint_f.x), f32tof16(endPoint_f.y), f32tof16(endPoint_f.z));
#endif
}
int3 start_quantize(uint3 pixel_h)
{
@@ -1207,6 +1200,7 @@ void generate_palette_unquantized16(out uint3 palette, int3 low, int3 high, int
palette = finish_unquantize(tmp);
}
#ifdef EMULATE_F16C
float half2float1(uint Value)
{
uint Mantissa = (uint)(Value & 0x03FF);
@@ -1240,16 +1234,15 @@ float half2float1(uint Value)
return asfloat(Result);
}
#endif
float3 half2float(uint3 color_h)
{
//uint3 sign = color_h & 0x8000;
//uint3 expo = color_h & 0x7C00;
//uint3 base = color_h & 0x03FF;
//return ( expo == 0 ) ? asfloat( ( sign << 16 ) | asuint( float3(base) / 16777216 ) ) //16777216 = 2^24
// : asfloat( ( sign << 16 ) | ( ( ( expo + 0x1C000 ) | base ) << 13 ) ); //0x1C000 = 0x1FC00 - 0x3C00
#ifdef EMULATE_F16C
return float3(half2float1(color_h.x), half2float1(color_h.y), half2float1(color_h.z));
#else
return float3(f16tof32(color_h.x), f16tof32(color_h.y), f16tof32(color_h.z));
#endif
}
void block_package(inout uint4 block, int2x3 endPoint[2], uint mode_type, uint partition_index) // for mode 1 - 10

View File

@@ -46,8 +46,11 @@ endlocal
exit /b 0
:CompileShader
set fxc=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_4_0 /E%2 "/Fh%CompileShadersOutput%\%1_%2.inc" "/Fd%CompileShadersOutput%\%1_%2.pdb" /Vn%1_%2
set fxc=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_5_0 /E%2 "/Fh%CompileShadersOutput%\%1_%2.inc" "/Fd%CompileShadersOutput%\%1_%2.pdb" /Vn%1_%2
set fxc4=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_4_0 /DEMULATE_F16C /E%2 "/Fh%CompileShadersOutput%\%1_%2_cs40.inc" "/Fd%CompileShadersOutput%\%1_%2_cs40.pdb" /Vn%1_%2
echo.
echo %fxc%
%fxc% || set error=1
echo %fxc4%
%fxc4% || set error=1
exit /b