Update GPU encoder to use DirectCompute 5 (#108)

2026-02-10 14:46:14 +01:00 · 2023-03-20 10:49:19 -07:00
parent 8c47482183
commit 574745f290
4 changed files with 60 additions and 35 deletions
--- a/DirectXTex/Shaders/BC6HEncode.hlsl
+++ b/DirectXTex/Shaders/BC6HEncode.hlsl
@@ -982,6 +982,7 @@ void EncodeBlockCS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
    }
 }

+#ifdef EMULATE_F16C
 uint float2half1(float f)
 {
    uint Result;
@@ -1014,23 +1015,15 @@ uint float2half1(float f)
    }
    return (Result|Sign);
 }
+#endif

 uint3 float2half(float3 endPoint_f)
 {
-    //uint3 sign = asuint(endPoint_f) & 0x80000000;
-    //uint3 expo = asuint(endPoint_f) & 0x7F800000;
-    //uint3 base = asuint(endPoint_f) & 0x007FFFFF;
-    //return ( expo < 0x33800000 ) ? 0
-    //                    //0x33800000 indicating 2^-24, which is minimal denormalized number that half can present
-    //    : ( ( expo < 0x38800000 ) ? ( sign >> 16 ) | ( ( base + 0x00800000 ) >> ( 23 - ( ( expo - 0x33800000 ) >> 23 ) ) )//fixed a bug in v0.2
-    //                    //0x38800000 indicating 2^-14, which is minimal normalized number that half can present, so need to use denormalized half presentation
-    //    : ( ( expo == 0x7F800000 || expo > 0x47000000 ) ? ( ( sign >> 16 ) | 0x7bff )
-    //                    // treat NaN as INF, treat INF (including NaN) as the maximum/minimum number that half can present
-    //                    // 0x47000000 indicating 2^15, which is maximum exponent that half can present, so cut to 0x7bff which is the maximum half number
-    //    : ( ( sign >> 16 ) | ( ( ( expo - 0x38000000 ) | base ) >> 13 ) ) ) );
-
-
+#ifdef EMULATE_F16C
    return uint3(float2half1(endPoint_f.x), float2half1(endPoint_f.y), float2half1(endPoint_f.z));
+#else
+    return uint3(f32tof16(endPoint_f.x), f32tof16(endPoint_f.y), f32tof16(endPoint_f.z));
+#endif
 }
 int3 start_quantize(uint3 pixel_h)
 {
@@ -1207,6 +1200,7 @@ void generate_palette_unquantized16(out uint3 palette, int3 low, int3 high, int
    palette = finish_unquantize(tmp);
 }

+#ifdef EMULATE_F16C
 float half2float1(uint Value)
 {
    uint Mantissa = (uint)(Value & 0x03FF);
@@ -1240,16 +1234,15 @@ float half2float1(uint Value)

    return asfloat(Result);
 }
+#endif

 float3 half2float(uint3 color_h)
 {
-    //uint3 sign = color_h & 0x8000;
-    //uint3 expo = color_h & 0x7C00;
-    //uint3 base = color_h & 0x03FF;
-    //return ( expo == 0 ) ? asfloat( ( sign << 16 ) | asuint( float3(base) / 16777216 ) ) //16777216 = 2^24
-    //    : asfloat( ( sign << 16 ) | ( ( ( expo + 0x1C000 ) | base ) << 13 ) ); //0x1C000 = 0x1FC00 - 0x3C00
-
+#ifdef EMULATE_F16C
    return float3(half2float1(color_h.x), half2float1(color_h.y), half2float1(color_h.z));
+#else
+    return float3(f16tof32(color_h.x), f16tof32(color_h.y), f16tof32(color_h.z));
+#endif
 }

 void block_package(inout uint4 block, int2x3 endPoint[2], uint mode_type, uint partition_index) // for mode 1 - 10
--- a/DirectXTex/Shaders/CompileShaders.cmd
+++ b/DirectXTex/Shaders/CompileShaders.cmd
@@ -46,8 +46,11 @@ endlocal
 exit /b 0

 :CompileShader
-set fxc=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_4_0 /E%2 "/Fh%CompileShadersOutput%\%1_%2.inc" "/Fd%CompileShadersOutput%\%1_%2.pdb" /Vn%1_%2
+set fxc=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_5_0 /E%2 "/Fh%CompileShadersOutput%\%1_%2.inc" "/Fd%CompileShadersOutput%\%1_%2.pdb" /Vn%1_%2
+set fxc4=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_4_0 /DEMULATE_F16C /E%2 "/Fh%CompileShadersOutput%\%1_%2_cs40.inc" "/Fd%CompileShadersOutput%\%1_%2_cs40.pdb" /Vn%1_%2
 echo.
 echo %fxc%
 %fxc% || set error=1
+echo %fxc4%
+%fxc4% || set error=1
 exit /b