mirror of
https://github.com/microsoft/DirectXTex.git
synced 2025-07-09 11:40:14 +02:00
Update GPU encoder to use DirectCompute 5 (#108)
This commit is contained in:
parent
8c47482183
commit
574745f290
@ -20,13 +20,27 @@ using Microsoft::WRL::ComPtr;
|
|||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
#include "BC7Encode_EncodeBlockCS.inc"
|
namespace cs5
|
||||||
#include "BC7Encode_TryMode02CS.inc"
|
{
|
||||||
#include "BC7Encode_TryMode137CS.inc"
|
#include "BC7Encode_EncodeBlockCS.inc"
|
||||||
#include "BC7Encode_TryMode456CS.inc"
|
#include "BC7Encode_TryMode02CS.inc"
|
||||||
#include "BC6HEncode_EncodeBlockCS.inc"
|
#include "BC7Encode_TryMode137CS.inc"
|
||||||
#include "BC6HEncode_TryModeG10CS.inc"
|
#include "BC7Encode_TryMode456CS.inc"
|
||||||
#include "BC6HEncode_TryModeLE10CS.inc"
|
#include "BC6HEncode_EncodeBlockCS.inc"
|
||||||
|
#include "BC6HEncode_TryModeG10CS.inc"
|
||||||
|
#include "BC6HEncode_TryModeLE10CS.inc"
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace cs4
|
||||||
|
{
|
||||||
|
#include "BC7Encode_EncodeBlockCS_cs40.inc"
|
||||||
|
#include "BC7Encode_TryMode02CS_cs40.inc"
|
||||||
|
#include "BC7Encode_TryMode137CS_cs40.inc"
|
||||||
|
#include "BC7Encode_TryMode456CS_cs40.inc"
|
||||||
|
#include "BC6HEncode_EncodeBlockCS_cs40.inc"
|
||||||
|
#include "BC6HEncode_TryModeG10CS_cs40.inc"
|
||||||
|
#include "BC6HEncode_TryModeLE10CS_cs40.inc"
|
||||||
|
}
|
||||||
|
|
||||||
struct BufferBC6HBC7
|
struct BufferBC6HBC7
|
||||||
{
|
{
|
||||||
@ -132,39 +146,53 @@ HRESULT GPUCompressBC::Initialize(ID3D11Device* pDevice)
|
|||||||
//--- Create compute shader library: BC6H -----------------------------------------
|
//--- Create compute shader library: BC6H -----------------------------------------
|
||||||
|
|
||||||
// Modes 11-14
|
// Modes 11-14
|
||||||
HRESULT hr = pDevice->CreateComputeShader(BC6HEncode_TryModeG10CS, sizeof(BC6HEncode_TryModeG10CS), nullptr, m_BC6H_tryModeG10CS.ReleaseAndGetAddressOf());
|
auto blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC6HEncode_TryModeG10CS : cs4::BC6HEncode_TryModeG10CS;
|
||||||
|
auto blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC6HEncode_TryModeG10CS) : sizeof(cs4::BC6HEncode_TryModeG10CS);
|
||||||
|
HRESULT hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC6H_tryModeG10CS.ReleaseAndGetAddressOf());
|
||||||
if (FAILED(hr))
|
if (FAILED(hr))
|
||||||
return hr;
|
return hr;
|
||||||
|
|
||||||
// Modes 1-10
|
// Modes 1-10
|
||||||
hr = pDevice->CreateComputeShader(BC6HEncode_TryModeLE10CS, sizeof(BC6HEncode_TryModeLE10CS), nullptr, m_BC6H_tryModeLE10CS.ReleaseAndGetAddressOf());
|
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC6HEncode_TryModeLE10CS : cs4::BC6HEncode_TryModeLE10CS;
|
||||||
|
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC6HEncode_TryModeLE10CS) : sizeof(cs4::BC6HEncode_TryModeLE10CS);
|
||||||
|
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC6H_tryModeLE10CS.ReleaseAndGetAddressOf());
|
||||||
if (FAILED(hr))
|
if (FAILED(hr))
|
||||||
return hr;
|
return hr;
|
||||||
|
|
||||||
// Encode
|
// Encode
|
||||||
hr = pDevice->CreateComputeShader(BC6HEncode_EncodeBlockCS, sizeof(BC6HEncode_EncodeBlockCS), nullptr, m_BC6H_encodeBlockCS.ReleaseAndGetAddressOf());
|
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC6HEncode_EncodeBlockCS : cs4::BC6HEncode_EncodeBlockCS;
|
||||||
|
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC6HEncode_EncodeBlockCS) : sizeof(cs4::BC6HEncode_EncodeBlockCS);
|
||||||
|
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC6H_encodeBlockCS.ReleaseAndGetAddressOf());
|
||||||
if (FAILED(hr))
|
if (FAILED(hr))
|
||||||
return hr;
|
return hr;
|
||||||
|
|
||||||
//--- Create compute shader library: BC7 ------------------------------------------
|
//--- Create compute shader library: BC7 ------------------------------------------
|
||||||
|
|
||||||
// Modes 4, 5, 6
|
// Modes 4, 5, 6
|
||||||
hr = pDevice->CreateComputeShader(BC7Encode_TryMode456CS, sizeof(BC7Encode_TryMode456CS), nullptr, m_BC7_tryMode456CS.ReleaseAndGetAddressOf());
|
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC7Encode_TryMode456CS : cs4::BC7Encode_TryMode456CS;
|
||||||
|
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC7Encode_TryMode456CS) : sizeof(cs4::BC7Encode_TryMode456CS);
|
||||||
|
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC7_tryMode456CS.ReleaseAndGetAddressOf());
|
||||||
if (FAILED(hr))
|
if (FAILED(hr))
|
||||||
return hr;
|
return hr;
|
||||||
|
|
||||||
// Modes 1, 3, 7
|
// Modes 1, 3, 7
|
||||||
hr = pDevice->CreateComputeShader(BC7Encode_TryMode137CS, sizeof(BC7Encode_TryMode137CS), nullptr, m_BC7_tryMode137CS.ReleaseAndGetAddressOf());
|
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC7Encode_TryMode137CS : cs4::BC7Encode_TryMode137CS;
|
||||||
|
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC7Encode_TryMode137CS) : sizeof(cs4::BC7Encode_TryMode137CS);
|
||||||
|
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC7_tryMode137CS.ReleaseAndGetAddressOf());
|
||||||
if (FAILED(hr))
|
if (FAILED(hr))
|
||||||
return hr;
|
return hr;
|
||||||
|
|
||||||
// Modes 0, 2
|
// Modes 0, 2
|
||||||
hr = pDevice->CreateComputeShader(BC7Encode_TryMode02CS, sizeof(BC7Encode_TryMode02CS), nullptr, m_BC7_tryMode02CS.ReleaseAndGetAddressOf());
|
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC7Encode_TryMode02CS : cs4::BC7Encode_TryMode02CS;
|
||||||
|
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC7Encode_TryMode02CS) : sizeof(cs4::BC7Encode_TryMode02CS);
|
||||||
|
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC7_tryMode02CS.ReleaseAndGetAddressOf());
|
||||||
if (FAILED(hr))
|
if (FAILED(hr))
|
||||||
return hr;
|
return hr;
|
||||||
|
|
||||||
// Encode
|
// Encode
|
||||||
hr = pDevice->CreateComputeShader(BC7Encode_EncodeBlockCS, sizeof(BC7Encode_EncodeBlockCS), nullptr, m_BC7_encodeBlockCS.ReleaseAndGetAddressOf());
|
blob = (fl >= D3D_FEATURE_LEVEL_11_0) ? cs5::BC7Encode_EncodeBlockCS : cs4::BC7Encode_EncodeBlockCS;
|
||||||
|
blobSize = (fl >= D3D_FEATURE_LEVEL_11_0) ? sizeof(cs5::BC7Encode_EncodeBlockCS) : sizeof(cs4::BC7Encode_EncodeBlockCS);
|
||||||
|
hr = pDevice->CreateComputeShader(blob, blobSize, nullptr, m_BC7_encodeBlockCS.ReleaseAndGetAddressOf());
|
||||||
if (FAILED(hr))
|
if (FAILED(hr))
|
||||||
return hr;
|
return hr;
|
||||||
|
|
||||||
|
@ -982,6 +982,7 @@ void EncodeBlockCS(uint GI : SV_GroupIndex, uint3 groupID : SV_GroupID)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef EMULATE_F16C
|
||||||
uint float2half1(float f)
|
uint float2half1(float f)
|
||||||
{
|
{
|
||||||
uint Result;
|
uint Result;
|
||||||
@ -1014,23 +1015,15 @@ uint float2half1(float f)
|
|||||||
}
|
}
|
||||||
return (Result|Sign);
|
return (Result|Sign);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
uint3 float2half(float3 endPoint_f)
|
uint3 float2half(float3 endPoint_f)
|
||||||
{
|
{
|
||||||
//uint3 sign = asuint(endPoint_f) & 0x80000000;
|
#ifdef EMULATE_F16C
|
||||||
//uint3 expo = asuint(endPoint_f) & 0x7F800000;
|
|
||||||
//uint3 base = asuint(endPoint_f) & 0x007FFFFF;
|
|
||||||
//return ( expo < 0x33800000 ) ? 0
|
|
||||||
// //0x33800000 indicating 2^-24, which is minimal denormalized number that half can present
|
|
||||||
// : ( ( expo < 0x38800000 ) ? ( sign >> 16 ) | ( ( base + 0x00800000 ) >> ( 23 - ( ( expo - 0x33800000 ) >> 23 ) ) )//fixed a bug in v0.2
|
|
||||||
// //0x38800000 indicating 2^-14, which is minimal normalized number that half can present, so need to use denormalized half presentation
|
|
||||||
// : ( ( expo == 0x7F800000 || expo > 0x47000000 ) ? ( ( sign >> 16 ) | 0x7bff )
|
|
||||||
// // treat NaN as INF, treat INF (including NaN) as the maximum/minimum number that half can present
|
|
||||||
// // 0x47000000 indicating 2^15, which is maximum exponent that half can present, so cut to 0x7bff which is the maximum half number
|
|
||||||
// : ( ( sign >> 16 ) | ( ( ( expo - 0x38000000 ) | base ) >> 13 ) ) ) );
|
|
||||||
|
|
||||||
|
|
||||||
return uint3(float2half1(endPoint_f.x), float2half1(endPoint_f.y), float2half1(endPoint_f.z));
|
return uint3(float2half1(endPoint_f.x), float2half1(endPoint_f.y), float2half1(endPoint_f.z));
|
||||||
|
#else
|
||||||
|
return uint3(f32tof16(endPoint_f.x), f32tof16(endPoint_f.y), f32tof16(endPoint_f.z));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
int3 start_quantize(uint3 pixel_h)
|
int3 start_quantize(uint3 pixel_h)
|
||||||
{
|
{
|
||||||
@ -1207,6 +1200,7 @@ void generate_palette_unquantized16(out uint3 palette, int3 low, int3 high, int
|
|||||||
palette = finish_unquantize(tmp);
|
palette = finish_unquantize(tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef EMULATE_F16C
|
||||||
float half2float1(uint Value)
|
float half2float1(uint Value)
|
||||||
{
|
{
|
||||||
uint Mantissa = (uint)(Value & 0x03FF);
|
uint Mantissa = (uint)(Value & 0x03FF);
|
||||||
@ -1240,16 +1234,15 @@ float half2float1(uint Value)
|
|||||||
|
|
||||||
return asfloat(Result);
|
return asfloat(Result);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
float3 half2float(uint3 color_h)
|
float3 half2float(uint3 color_h)
|
||||||
{
|
{
|
||||||
//uint3 sign = color_h & 0x8000;
|
#ifdef EMULATE_F16C
|
||||||
//uint3 expo = color_h & 0x7C00;
|
|
||||||
//uint3 base = color_h & 0x03FF;
|
|
||||||
//return ( expo == 0 ) ? asfloat( ( sign << 16 ) | asuint( float3(base) / 16777216 ) ) //16777216 = 2^24
|
|
||||||
// : asfloat( ( sign << 16 ) | ( ( ( expo + 0x1C000 ) | base ) << 13 ) ); //0x1C000 = 0x1FC00 - 0x3C00
|
|
||||||
|
|
||||||
return float3(half2float1(color_h.x), half2float1(color_h.y), half2float1(color_h.z));
|
return float3(half2float1(color_h.x), half2float1(color_h.y), half2float1(color_h.z));
|
||||||
|
#else
|
||||||
|
return float3(f16tof32(color_h.x), f16tof32(color_h.y), f16tof32(color_h.z));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void block_package(inout uint4 block, int2x3 endPoint[2], uint mode_type, uint partition_index) // for mode 1 - 10
|
void block_package(inout uint4 block, int2x3 endPoint[2], uint mode_type, uint partition_index) // for mode 1 - 10
|
||||||
|
@ -46,8 +46,11 @@ endlocal
|
|||||||
exit /b 0
|
exit /b 0
|
||||||
|
|
||||||
:CompileShader
|
:CompileShader
|
||||||
set fxc=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_4_0 /E%2 "/Fh%CompileShadersOutput%\%1_%2.inc" "/Fd%CompileShadersOutput%\%1_%2.pdb" /Vn%1_%2
|
set fxc=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_5_0 /E%2 "/Fh%CompileShadersOutput%\%1_%2.inc" "/Fd%CompileShadersOutput%\%1_%2.pdb" /Vn%1_%2
|
||||||
|
set fxc4=%PCFXC% "%1.hlsl" %FXCOPTS% /Tcs_4_0 /DEMULATE_F16C /E%2 "/Fh%CompileShadersOutput%\%1_%2_cs40.inc" "/Fd%CompileShadersOutput%\%1_%2_cs40.pdb" /Vn%1_%2
|
||||||
echo.
|
echo.
|
||||||
echo %fxc%
|
echo %fxc%
|
||||||
%fxc% || set error=1
|
%fxc% || set error=1
|
||||||
|
echo %fxc4%
|
||||||
|
%fxc4% || set error=1
|
||||||
exit /b
|
exit /b
|
||||||
|
@ -1117,7 +1117,8 @@ namespace
|
|||||||
hr = pAdapter->GetDesc(&desc);
|
hr = pAdapter->GetDesc(&desc);
|
||||||
if (SUCCEEDED(hr))
|
if (SUCCEEDED(hr))
|
||||||
{
|
{
|
||||||
wprintf(L"\n[Using DirectCompute on \"%ls\"]\n", desc.Description);
|
wprintf(L"\n[Using DirectCompute %ls on \"%ls\"]\n",
|
||||||
|
(fl >= D3D_FEATURE_LEVEL_11_0) ? L"5.0" : L"4.0", desc.Description);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user