mirror of
https://github.com/microsoft/DirectXTex.git
synced 2026-02-09 06:16:14 +01:00
BC7 compressor optimization for skipping mode 0 & 2 by default
This commit is contained in:
@@ -59,6 +59,7 @@ enum BC_FLAGS
|
||||
BC_FLAGS_DITHER_RGB = 0x10000, // Enables dithering for RGB colors for BC1-3
|
||||
BC_FLAGS_DITHER_A = 0x20000, // Enables dithering for Alpha channel for BC1-3
|
||||
BC_FLAGS_UNIFORM = 0x40000, // By default, uses perceptual weighting for BC1-3; this flag makes it a uniform weighting
|
||||
BC_FLAGS_USE_3SUBSETS = 0x80000,// By default, BC7 skips mode 0 & 2; this flag adds those modes back
|
||||
};
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
@@ -623,7 +624,7 @@ class D3DX_BC7 : private CBits< 16 >
|
||||
{
|
||||
public:
|
||||
void Decode(_Out_writes_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const;
|
||||
void Encode(_In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn);
|
||||
void Encode(bool skip3subsets, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn);
|
||||
|
||||
private:
|
||||
struct ModeInfo
|
||||
|
||||
@@ -2143,7 +2143,7 @@ void D3DX_BC7::Decode(HDRColorA* pOut) const
|
||||
}
|
||||
|
||||
_Use_decl_annotations_
|
||||
void D3DX_BC7::Encode(const HDRColorA* const pIn)
|
||||
void D3DX_BC7::Encode(bool skip3subsets, const HDRColorA* const pIn)
|
||||
{
|
||||
assert( pIn );
|
||||
|
||||
@@ -2161,6 +2161,12 @@ void D3DX_BC7::Encode(const HDRColorA* const pIn)
|
||||
|
||||
for(EP.uMode = 0; EP.uMode < 8 && fMSEBest > 0; ++EP.uMode)
|
||||
{
|
||||
if ( skip3subsets && (EP.uMode == 0 || EP.uMode == 2) )
|
||||
{
|
||||
// 3 subset modes tend to be used rarely and add significant compression time
|
||||
continue;
|
||||
}
|
||||
|
||||
const size_t uShapes = size_t(1) << ms_aInfo[EP.uMode].uPartitionBits;
|
||||
assert( uShapes <= BC7_MAX_SHAPES );
|
||||
_Analysis_assume_( uShapes <= BC7_MAX_SHAPES );
|
||||
@@ -2860,10 +2866,9 @@ void D3DXDecodeBC7(XMVECTOR *pColor, const uint8_t *pBC)
|
||||
_Use_decl_annotations_
|
||||
void D3DXEncodeBC7(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags)
|
||||
{
|
||||
UNREFERENCED_PARAMETER(flags);
|
||||
assert( pBC && pColor );
|
||||
static_assert( sizeof(D3DX_BC7) == 16, "D3DX_BC7 should be 16 bytes" );
|
||||
reinterpret_cast< D3DX_BC7* >( pBC )->Encode(reinterpret_cast<const HDRColorA*>(pColor));
|
||||
reinterpret_cast< D3DX_BC7* >( pBC )->Encode( !(flags& BC_FLAGS_USE_3SUBSETS), reinterpret_cast<const HDRColorA*>(pColor));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -178,7 +178,7 @@ HRESULT GPUCompressBC::Initialize( ID3D11Device* pDevice )
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
_Use_decl_annotations_
|
||||
HRESULT GPUCompressBC::Prepare( size_t width, size_t height, DXGI_FORMAT format, float alphaWeight )
|
||||
HRESULT GPUCompressBC::Prepare( size_t width, size_t height, DXGI_FORMAT format, float alphaWeight, bool skip3subsets )
|
||||
{
|
||||
if ( !width || !height || alphaWeight < 0.f )
|
||||
return E_INVALIDARG;
|
||||
@@ -193,6 +193,8 @@ HRESULT GPUCompressBC::Prepare( size_t width, size_t height, DXGI_FORMAT format,
|
||||
|
||||
m_alphaWeight = alphaWeight;
|
||||
|
||||
m_skip3Subsets = skip3subsets;
|
||||
|
||||
size_t xblocks = std::max<size_t>( 1, (width + 3) >> 2 );
|
||||
size_t yblocks = std::max<size_t>( 1, (height + 3) >> 2 );
|
||||
size_t num_blocks = xblocks * yblocks;
|
||||
@@ -468,6 +470,10 @@ HRESULT GPUCompressBC::Compress( const Image& srcImage, const Image& destImage )
|
||||
for ( UINT i = 0; i < 3; ++i )
|
||||
{
|
||||
static const UINT modes[] = { 1, 3, 7 };
|
||||
|
||||
// Mode 1: err1 -> err2
|
||||
// Mode 3: err2 -> err1
|
||||
// Mode 7: err1 -> err2
|
||||
{
|
||||
D3D11_MAPPED_SUBRESOURCE mapped;
|
||||
HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped );
|
||||
@@ -494,33 +500,39 @@ HRESULT GPUCompressBC::Compress( const Image& srcImage, const Image& destImage )
|
||||
(i & 1) ? m_err1UAV.Get() : m_err2UAV.Get(), uThreadGroupCount );
|
||||
}
|
||||
|
||||
for ( UINT i = 0; i < 2; ++i )
|
||||
if ( !m_skip3Subsets )
|
||||
{
|
||||
static const UINT modes[] = { 0, 2 };
|
||||
// 3 subset modes tend to be used rarely and add significant compression time
|
||||
for ( UINT i = 0; i < 2; ++i )
|
||||
{
|
||||
D3D11_MAPPED_SUBRESOURCE mapped;
|
||||
HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped );
|
||||
if ( FAILED(hr) )
|
||||
static const UINT modes[] = { 0, 2 };
|
||||
// Mode 0: err2 -> err1
|
||||
// Mode 2: err1 -> err2
|
||||
{
|
||||
ResetContext( pContext );
|
||||
return hr;
|
||||
D3D11_MAPPED_SUBRESOURCE mapped;
|
||||
HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped );
|
||||
if ( FAILED(hr) )
|
||||
{
|
||||
ResetContext( pContext );
|
||||
return hr;
|
||||
}
|
||||
|
||||
ConstantsBC6HBC7 param;
|
||||
param.tex_width = static_cast<UINT>( srcImage.width );
|
||||
param.num_block_x = static_cast<UINT>( xblocks );
|
||||
param.format = m_bcformat;
|
||||
param.mode_id = modes[i];
|
||||
param.start_block_id = start_block_id;
|
||||
param.num_total_blocks = num_total_blocks;
|
||||
param.alpha_weight = m_alphaWeight;
|
||||
memcpy( mapped.pData, ¶m, sizeof( param ) );
|
||||
pContext->Unmap( m_constBuffer.Get(), 0 );
|
||||
}
|
||||
|
||||
ConstantsBC6HBC7 param;
|
||||
param.tex_width = static_cast<UINT>( srcImage.width );
|
||||
param.num_block_x = static_cast<UINT>( xblocks );
|
||||
param.format = m_bcformat;
|
||||
param.mode_id = modes[i];
|
||||
param.start_block_id = start_block_id;
|
||||
param.num_total_blocks = num_total_blocks;
|
||||
param.alpha_weight = m_alphaWeight;
|
||||
memcpy( mapped.pData, ¶m, sizeof( param ) );
|
||||
pContext->Unmap( m_constBuffer.Get(), 0 );
|
||||
pSRVs[1] = (i & 1) ? m_err1SRV.Get() : m_err2SRV.Get();
|
||||
RunComputeShader( pContext, m_BC7_tryMode02CS.Get(), pSRVs, 2, m_constBuffer.Get(),
|
||||
(i & 1) ? m_err2UAV.Get() : m_err1UAV.Get(), uThreadGroupCount );
|
||||
}
|
||||
|
||||
pSRVs[1] = (i & 1) ? m_err1SRV.Get() : m_err2SRV.Get();
|
||||
RunComputeShader( pContext, m_BC7_tryMode02CS.Get(), pSRVs, 2, m_constBuffer.Get(),
|
||||
(i & 1) ? m_err2UAV.Get() : m_err1UAV.Get(), uThreadGroupCount );
|
||||
}
|
||||
|
||||
pSRVs[1] = m_err2SRV.Get();
|
||||
|
||||
@@ -23,7 +23,7 @@ public:
|
||||
|
||||
HRESULT Initialize( _In_ ID3D11Device* pDevice );
|
||||
|
||||
HRESULT Prepare( _In_ size_t width, _In_ size_t height, _In_ DXGI_FORMAT format, _In_ float alphaWeight = 1.f );
|
||||
HRESULT Prepare( _In_ size_t width, _In_ size_t height, _In_ DXGI_FORMAT format, _In_ float alphaWeight = 1.f, _In_ bool skip3subsets = true );
|
||||
|
||||
HRESULT Compress( _In_ const Image& srcImage, _In_ const Image& destImage );
|
||||
|
||||
@@ -33,6 +33,7 @@ private:
|
||||
DXGI_FORMAT m_bcformat;
|
||||
DXGI_FORMAT m_srcformat;
|
||||
float m_alphaWeight;
|
||||
bool m_skip3Subsets;
|
||||
size_t m_width;
|
||||
size_t m_height;
|
||||
|
||||
|
||||
@@ -521,6 +521,9 @@ namespace DirectX
|
||||
TEX_COMPRESS_UNIFORM = 0x40000,
|
||||
// Uniform color weighting for BC1-3 compression; by default uses perceptual weighting
|
||||
|
||||
TEX_COMPRESS_BC7_USE_3SUBSETS = 0x80000,
|
||||
// Enables exhaustive search for BC7 compress for mode 0 and 2; by default skips trying these modes
|
||||
|
||||
TEX_COMPRESS_SRGB_IN = 0x1000000,
|
||||
TEX_COMPRESS_SRGB_OUT = 0x2000000,
|
||||
TEX_COMPRESS_SRGB = ( TEX_COMPRESS_SRGB_IN | TEX_COMPRESS_SRGB_OUT ),
|
||||
|
||||
@@ -32,7 +32,8 @@ inline static DWORD _GetBCFlags( _In_ DWORD compress )
|
||||
static_assert( TEX_COMPRESS_A_DITHER == BC_FLAGS_DITHER_A, "TEX_COMPRESS_* flags should match BC_FLAGS_*" );
|
||||
static_assert( TEX_COMPRESS_DITHER == (BC_FLAGS_DITHER_RGB | BC_FLAGS_DITHER_A), "TEX_COMPRESS_* flags should match BC_FLAGS_*" );
|
||||
static_assert( TEX_COMPRESS_UNIFORM == BC_FLAGS_UNIFORM, "TEX_COMPRESS_* flags should match BC_FLAGS_*" );
|
||||
return ( compress & (BC_FLAGS_DITHER_RGB|BC_FLAGS_DITHER_A|BC_FLAGS_UNIFORM) );
|
||||
static_assert( TEX_COMPRESS_BC7_USE_3SUBSETS == BC_FLAGS_USE_3SUBSETS, "TEX_COMPRESS_* flags should match BC_FLAGS_*" );
|
||||
return ( compress & (BC_FLAGS_DITHER_RGB|BC_FLAGS_DITHER_A|BC_FLAGS_UNIFORM|BC_FLAGS_USE_3SUBSETS) );
|
||||
}
|
||||
|
||||
inline static DWORD _GetSRGBFlags( _In_ DWORD compress )
|
||||
|
||||
@@ -216,7 +216,7 @@ HRESULT Compress( ID3D11Device* pDevice, const Image& srcImage, DXGI_FORMAT form
|
||||
if ( FAILED(hr) )
|
||||
return hr;
|
||||
|
||||
hr = gpubc->Prepare( srcImage.width, srcImage.height, format, alphaWeight );
|
||||
hr = gpubc->Prepare( srcImage.width, srcImage.height, format, alphaWeight, !(compress & TEX_COMPRESS_BC7_USE_3SUBSETS) );
|
||||
if ( FAILED(hr) )
|
||||
return hr;
|
||||
|
||||
@@ -295,7 +295,7 @@ HRESULT Compress( ID3D11Device* pDevice, const Image* srcImages, size_t nimages,
|
||||
|
||||
for( size_t level=0; level < metadata.mipLevels; ++level )
|
||||
{
|
||||
hr = gpubc->Prepare( w, h, format, alphaWeight );
|
||||
hr = gpubc->Prepare( w, h, format, alphaWeight, !(compress & TEX_COMPRESS_BC7_USE_3SUBSETS) );
|
||||
if ( FAILED(hr) )
|
||||
{
|
||||
cImages.Release();
|
||||
@@ -346,7 +346,7 @@ HRESULT Compress( ID3D11Device* pDevice, const Image* srcImages, size_t nimages,
|
||||
|
||||
for( size_t level=0; level < metadata.mipLevels; ++level )
|
||||
{
|
||||
hr = gpubc->Prepare( w, h, format, alphaWeight );
|
||||
hr = gpubc->Prepare( w, h, format, alphaWeight, !(compress & TEX_COMPRESS_BC7_USE_3SUBSETS) );
|
||||
if ( FAILED(hr) )
|
||||
{
|
||||
cImages.Release();
|
||||
|
||||
Reference in New Issue
Block a user