From 574745f2908dd3f1a2003e64a9b818f76fb4c589 Mon Sep 17 00:00:00 2001 From: Chuck Walbourn Date: Mon, 20 Mar 2023 10:49:19 -0700 Subject: [PATCH] Update GPU encoder to use DirectCompute 5 (#108) --- DirectXTex/BCDirectCompute.cpp | 56 ++++++++++++++++++++------- DirectXTex/Shaders/BC6HEncode.hlsl | 31 ++++++--------- DirectXTex/Shaders/CompileShaders.cmd | 5 ++- Texconv/texconv.cpp | 3 +- 4 files changed, 60 insertions(+), 35 deletions(-) diff --git a/DirectXTex/BCDirectCompute.cpp b/DirectXTex/BCDirectCompute.cpp index a98ef8b..2f3265d 100644 --- a/DirectXTex/BCDirectCompute.cpp +++ b/DirectXTex/BCDirectCompute.cpp @@ -20,13 +20,27 @@ using Microsoft::WRL::ComPtr; namespace { -#include "BC7Encode_EncodeBlockCS.inc" -#include "BC7Encode_TryMode02CS.inc" -#include "BC7Encode_TryMode137CS.inc" -#include "BC7Encode_TryMode456CS.inc" -#include "BC6HEncode_EncodeBlockCS.inc" -#include "BC6HEncode_TryModeG10CS.inc" -#include "BC6HEncode_TryModeLE10CS.inc" + namespace cs5 + { + #include "BC7Encode_EncodeBlockCS.inc" + #include "BC7Encode_TryMode02CS.inc" + #include "BC7Encode_TryMode137CS.inc" + #include "BC7Encode_TryMode456CS.inc" + #include "BC6HEncode_EncodeBlockCS.inc" + #include "BC6HEncode_TryModeG10CS.inc" + #include "BC6HEncode_TryModeLE10CS.inc" + } + + namespace cs4 + { + #include "BC7Encode_EncodeBlockCS_cs40.inc" + #include "BC7Encode_TryMode02CS_cs40.inc" + #include "BC7Encode_TryMode137CS_cs40.inc" + #include "BC7Encode_TryMode456CS_cs40.inc" + #include "BC6HEncode_EncodeBlockCS_cs40.inc" + #include "BC6HEncode_TryModeG10CS_cs40.inc" + #include "BC6HEncode_TryModeLE10CS_cs40.inc" + } struct BufferBC6HBC7 { @@ -132,39 +146,53 @@ HRESULT GPUCompressBC::Initialize(ID3D11Device* pDevice) //--- Create compute shader library: BC6H ----------------------------------------- // Modes 11-14 - HRESULT hr = pDevice->CreateComputeShader(BC6HEncode_TryModeG10CS, sizeof(BC6HEncode_TryModeG10CS), nullptr, m_BC6H_tryModeG10CS.ReleaseAndGetAddressOf()); + auto blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC6HEncode_TryModeG10CS : cs4::BC6HEncode_TryModeG10CS; + auto blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC6HEncode_TryModeG10CS) : sizeof(cs4::BC6HEncode_TryModeG10CS); + HRESULT hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC6H_tryModeG10CS.ReleaseAndGetAddressOf()); if (FAILED(hr)) return hr; // Modes 1-10 - hr = pDevice->CreateComputeShader(BC6HEncode_TryModeLE10CS, sizeof(BC6HEncode_TryModeLE10CS), nullptr, m_BC6H_tryModeLE10CS.ReleaseAndGetAddressOf()); + blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC6HEncode_TryModeLE10CS : cs4::BC6HEncode_TryModeLE10CS; + blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC6HEncode_TryModeLE10CS) : sizeof(cs4::BC6HEncode_TryModeLE10CS); + hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC6H_tryModeLE10CS.ReleaseAndGetAddressOf()); if (FAILED(hr)) return hr; // Encode - hr = pDevice->CreateComputeShader(BC6HEncode_EncodeBlockCS, sizeof(BC6HEncode_EncodeBlockCS), nullptr, m_BC6H_encodeBlockCS.ReleaseAndGetAddressOf()); + blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC6HEncode_EncodeBlockCS : cs4::BC6HEncode_EncodeBlockCS; + blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC6HEncode_EncodeBlockCS) : sizeof(cs4::BC6HEncode_EncodeBlockCS); + hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC6H_encodeBlockCS.ReleaseAndGetAddressOf()); if (FAILED(hr)) return hr; //--- Create compute shader library: BC7 ------------------------------------------ // Modes 4, 5, 6 - hr = pDevice->CreateComputeShader(BC7Encode_TryMode456CS, sizeof(BC7Encode_TryMode456CS), nullptr, m_BC7_tryMode456CS.ReleaseAndGetAddressOf()); + blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC7Encode_TryMode456CS : cs4::BC7Encode_TryMode456CS; + blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC7Encode_TryMode456CS) : sizeof(cs4::BC7Encode_TryMode456CS); + hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC7_tryMode456CS.ReleaseAndGetAddressOf()); if (FAILED(hr)) return hr; // Modes 1, 3, 7 - hr = pDevice->CreateComputeShader(BC7Encode_TryMode137CS, sizeof(BC7Encode_TryMode137CS), nullptr, m_BC7_tryMode137CS.ReleaseAndGetAddressOf()); + blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC7Encode_TryMode137CS : cs4::BC7Encode_TryMode137CS; + blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC7Encode_TryMode137CS) : sizeof(cs4::BC7Encode_TryMode137CS); + hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC7_tryMode137CS.ReleaseAndGetAddressOf()); if (FAILED(hr)) return hr; // Modes 0, 2 - hr = pDevice->CreateComputeShader(BC7Encode_TryMode02CS, sizeof(BC7Encode_TryMode02CS), nullptr, m_BC7_tryMode02CS.ReleaseAndGetAddressOf()); + blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC7Encode_TryMode02CS : cs4::BC7Encode_TryMode02CS; + blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC7Encode_TryMode02CS) : sizeof(cs4::BC7Encode_TryMode02CS); + hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC7_tryMode02CS.ReleaseAndGetAddressOf()); if (FAILED(hr)) return hr; // Encode - hr = pDevice->CreateComputeShader(BC7Encode_EncodeBlockCS, sizeof(BC7Encode_EncodeBlockCS), nullptr, m_BC7_encodeBlockCS.ReleaseAndGetAddressOf()); + blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC7Encode_EncodeBlockCS : cs4::BC7Encode_EncodeBlockCS; + blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC7Encode_EncodeBlockCS) : sizeof(cs4::BC7Encode_EncodeBlockCS); + hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC7_encodeBlockCS.ReleaseAndGetAddressOf()); if (FAILED(hr)) return hr; diff --git a/DirectXTex/Shaders/BC6HEncode.hlsl b/DirectXTex/Shaders/BC6HEncode.hlsl index c43de61..ecc919a 100644 --- a/DirectXTex/Shaders/BC6HEncode.hlsl +++ b/DirectXTex/Shaders/BC6HEncode.hlsl @@ -982,6 +982,7 @@ void EncodeBlockCS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID) } } +#ifdef EMULATE_F16C uint float2half1(float f) { uint Result; @@ -1014,23 +1015,15 @@ uint float2half1(float f) } return (Result|Sign); } +#endif uint3 float2half(float3 endPoint_f) { - //uint3 sign = asuint(endPoint_f) & 0x80000000; - //uint3 expo = asuint(endPoint_f) & 0x7F800000; - //uint3 base = asuint(endPoint_f) & 0x007FFFFF; - //return ( expo < 0x33800000 ) ? 0 - // //0x33800000 indicating 2^-24, which is minimal denormalized number that half can present - // : ( ( expo < 0x38800000 ) ? ( sign >> 16 ) | ( ( base + 0x00800000 ) >> ( 23 - ( ( expo - 0x33800000 ) >> 23 ) ) )//fixed a bug in v0.2 - // //0x38800000 indicating 2^-14, which is minimal normalized number that half can present, so need to use denormalized half presentation - // : ( ( expo == 0x7F800000 || expo > 0x47000000 ) ? ( ( sign >> 16 ) | 0x7bff ) - // // treat NaN as INF, treat INF (including NaN) as the maximum/minimum number that half can present - // // 0x47000000 indicating 2^15, which is maximum exponent that half can present, so cut to 0x7bff which is the maximum half number - // : ( ( sign >> 16 ) | ( ( ( expo - 0x38000000 ) | base ) >> 13 ) ) ) ); - - +#ifdef EMULATE_F16C return uint3(float2half1(endPoint_f.x), float2half1(endPoint_f.y), float2half1(endPoint_f.z)); +#else + return uint3(f32tof16(endPoint_f.x), f32tof16(endPoint_f.y), f32tof16(endPoint_f.z)); +#endif } int3 start_quantize(uint3 pixel_h) { @@ -1207,6 +1200,7 @@ void generate_palette_unquantized16(out uint3 palette, int3 low, int3 high, int palette = finish_unquantize(tmp); } +#ifdef EMULATE_F16C float half2float1(uint Value) { uint Mantissa = (uint)(Value & 0x03FF); @@ -1240,16 +1234,15 @@ float half2float1(uint Value) return asfloat(Result); } +#endif float3 half2float(uint3 color_h) { - //uint3 sign = color_h & 0x8000; - //uint3 expo = color_h & 0x7C00; - //uint3 base = color_h & 0x03FF; - //return ( expo == 0 ) ? asfloat( ( sign << 16 ) | asuint( float3(base) / 16777216 ) ) //16777216 = 2^24 - // : asfloat( ( sign << 16 ) | ( ( ( expo + 0x1C000 ) | base ) << 13 ) ); //0x1C000 = 0x1FC00 - 0x3C00 - +#ifdef EMULATE_F16C return float3(half2float1(color_h.x), half2float1(color_h.y), half2float1(color_h.z)); +#else + return float3(f16tof32(color_h.x), f16tof32(color_h.y), f16tof32(color_h.z)); +#endif } void block_package(inout uint4 block, int2x3 endPoint[2], uint mode_type, uint partition_index) // for mode 1 - 10 diff --git a/DirectXTex/Shaders/CompileShaders.cmd b/DirectXTex/Shaders/CompileShaders.cmd index 3427e73..14a599b 100644 --- a/DirectXTex/Shaders/CompileShaders.cmd +++ b/DirectXTex/Shaders/CompileShaders.cmd @@ -46,8 +46,11 @@ endlocal exit /b 0 :CompileShader -set fxc=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_4_0 /E%2 "/Fh%CompileShadersOutput%\%1_%2.inc" "/Fd%CompileShadersOutput%\%1_%2.pdb" /Vn%1_%2 +set fxc=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_5_0 /E%2 "/Fh%CompileShadersOutput%\%1_%2.inc" "/Fd%CompileShadersOutput%\%1_%2.pdb" /Vn%1_%2 +set fxc4=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_4_0 /DEMULATE_F16C /E%2 "/Fh%CompileShadersOutput%\%1_%2_cs40.inc" "/Fd%CompileShadersOutput%\%1_%2_cs40.pdb" /Vn%1_%2 echo. echo %fxc% %fxc% || set error=1 +echo %fxc4% +%fxc4% || set error=1 exit /b diff --git a/Texconv/texconv.cpp b/Texconv/texconv.cpp index f72b0a5..fc57a20 100644 --- a/Texconv/texconv.cpp +++ b/Texconv/texconv.cpp @@ -1117,7 +1117,8 @@ namespace hr = pAdapter->GetDesc(&desc); if (SUCCEEDED(hr)) { - wprintf(L"\n[Using DirectCompute on \"%ls\"]\n", desc.Description); + wprintf(L"\n[Using DirectCompute %ls on \"%ls\"]\n", + (fl >= D3D_FEATURE_LEVEL_11_0) ? L"5.0" : L"4.0", desc.Description); } } }