mirror of
https://github.com/microsoft/DirectXTex.git
synced 2025-07-12 21:20:13 +02:00
BC7 compressor optimziation for only using mode 6
This commit is contained in:
parent
bfa53ff6c0
commit
1b27a1b819
@ -55,11 +55,12 @@ extern const int g_aWeights4[16];
|
||||
|
||||
enum BC_FLAGS
|
||||
{
|
||||
BC_FLAGS_NONE = 0x0,
|
||||
BC_FLAGS_DITHER_RGB = 0x10000, // Enables dithering for RGB colors for BC1-3
|
||||
BC_FLAGS_DITHER_A = 0x20000, // Enables dithering for Alpha channel for BC1-3
|
||||
BC_FLAGS_UNIFORM = 0x40000, // By default, uses perceptual weighting for BC1-3; this flag makes it a uniform weighting
|
||||
BC_FLAGS_USE_3SUBSETS = 0x80000,// By default, BC7 skips mode 0 & 2; this flag adds those modes back
|
||||
BC_FLAGS_NONE = 0x0,
|
||||
BC_FLAGS_DITHER_RGB = 0x10000, // Enables dithering for RGB colors for BC1-3
|
||||
BC_FLAGS_DITHER_A = 0x20000, // Enables dithering for Alpha channel for BC1-3
|
||||
BC_FLAGS_UNIFORM = 0x40000, // By default, uses perceptual weighting for BC1-3; this flag makes it a uniform weighting
|
||||
BC_FLAGS_USE_3SUBSETS = 0x80000, // By default, BC7 skips mode 0 & 2; this flag adds those modes back
|
||||
BC_FLAGS_FORCE_BC7_MODE6 = 0x100000, // BC7 should only use mode 6; skip other modes
|
||||
};
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
@ -624,7 +625,7 @@ class D3DX_BC7 : private CBits< 16 >
|
||||
{
|
||||
public:
|
||||
void Decode(_Out_writes_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const;
|
||||
void Encode(bool skip3subsets, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn);
|
||||
void Encode(DWORD flags, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn);
|
||||
|
||||
private:
|
||||
struct ModeInfo
|
||||
|
@ -634,7 +634,7 @@ namespace
|
||||
_In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints,
|
||||
_Out_ HDRColorA* pX,
|
||||
_Out_ HDRColorA* pY,
|
||||
size_t cSteps,
|
||||
_In_range_(3, 4) size_t cSteps,
|
||||
size_t cPixels,
|
||||
_In_reads_(cPixels) const size_t* pIndex)
|
||||
{
|
||||
@ -830,7 +830,7 @@ namespace
|
||||
_In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints,
|
||||
_Out_ HDRColorA* pX,
|
||||
_Out_ HDRColorA* pY,
|
||||
size_t cSteps,
|
||||
_In_range_(3, 4) size_t cSteps,
|
||||
size_t cPixels,
|
||||
_In_reads_(cPixels) const size_t* pIndex)
|
||||
{
|
||||
@ -2176,7 +2176,7 @@ void D3DX_BC7::Decode(HDRColorA* pOut) const
|
||||
}
|
||||
|
||||
_Use_decl_annotations_
|
||||
void D3DX_BC7::Encode(bool skip3subsets, const HDRColorA* const pIn)
|
||||
void D3DX_BC7::Encode(DWORD flags, const HDRColorA* const pIn)
|
||||
{
|
||||
assert(pIn);
|
||||
|
||||
@ -2194,12 +2194,18 @@ void D3DX_BC7::Encode(bool skip3subsets, const HDRColorA* const pIn)
|
||||
|
||||
for (EP.uMode = 0; EP.uMode < 8 && fMSEBest > 0; ++EP.uMode)
|
||||
{
|
||||
if (skip3subsets && (EP.uMode == 0 || EP.uMode == 2))
|
||||
if (!(flags & BC_FLAGS_USE_3SUBSETS) && (EP.uMode == 0 || EP.uMode == 2))
|
||||
{
|
||||
// 3 subset modes tend to be used rarely and add significant compression time
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((flags & TEX_COMPRESS_BC7_QUICK) && (EP.uMode != 6))
|
||||
{
|
||||
// Use only mode 6
|
||||
continue;
|
||||
}
|
||||
|
||||
const size_t uShapes = size_t(1) << ms_aInfo[EP.uMode].uPartitionBits;
|
||||
assert(uShapes <= BC7_MAX_SHAPES);
|
||||
_Analysis_assume_(uShapes <= BC7_MAX_SHAPES);
|
||||
@ -2902,5 +2908,5 @@ void DirectX::D3DXEncodeBC7(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags)
|
||||
{
|
||||
assert(pBC && pColor);
|
||||
static_assert(sizeof(D3DX_BC7) == 16, "D3DX_BC7 should be 16 bytes");
|
||||
reinterpret_cast<D3DX_BC7*>(pBC)->Encode(!(flags& BC_FLAGS_USE_3SUBSETS), reinterpret_cast<const HDRColorA*>(pColor));
|
||||
reinterpret_cast<D3DX_BC7*>(pBC)->Encode(flags, reinterpret_cast<const HDRColorA*>(pColor));
|
||||
}
|
||||
|
@ -175,8 +175,7 @@ HRESULT GPUCompressBC::Initialize(ID3D11Device* pDevice)
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
_Use_decl_annotations_
|
||||
HRESULT GPUCompressBC::Prepare(size_t width, size_t height, DXGI_FORMAT format, float alphaWeight, bool skip3subsets)
|
||||
HRESULT GPUCompressBC::Prepare(size_t width, size_t height, DWORD flags, DXGI_FORMAT format, float alphaWeight)
|
||||
{
|
||||
if (!width || !height || alphaWeight < 0.f)
|
||||
return E_INVALIDARG;
|
||||
@ -189,7 +188,16 @@ HRESULT GPUCompressBC::Prepare(size_t width, size_t height, DXGI_FORMAT format,
|
||||
|
||||
m_alphaWeight = alphaWeight;
|
||||
|
||||
m_skip3Subsets = skip3subsets;
|
||||
if (flags & TEX_COMPRESS_BC7_QUICK)
|
||||
{
|
||||
m_bc7_mode02 = false;
|
||||
m_bc7_mode137 = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_bc7_mode02 = (flags & TEX_COMPRESS_BC7_USE_3SUBSETS) != 0;
|
||||
m_bc7_mode137 = true;
|
||||
}
|
||||
|
||||
size_t xblocks = std::max<size_t>(1, (width + 3) >> 2);
|
||||
size_t yblocks = std::max<size_t>(1, (height + 3) >> 2);
|
||||
@ -332,7 +340,6 @@ HRESULT GPUCompressBC::Prepare(size_t width, size_t height, DXGI_FORMAT format,
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
_Use_decl_annotations_
|
||||
HRESULT GPUCompressBC::Compress(const Image& srcImage, const Image& destImage)
|
||||
{
|
||||
if (!srcImage.pixels || !destImage.pixels)
|
||||
@ -456,40 +463,43 @@ HRESULT GPUCompressBC::Compress(const Image& srcImage, const Image& destImage)
|
||||
RunComputeShader(pContext, m_BC7_tryMode456CS.Get(), pSRVs, 2, m_constBuffer.Get(),
|
||||
m_err1UAV.Get(), std::max<UINT>((uThreadGroupCount + 3) / 4, 1));
|
||||
|
||||
for (UINT i = 0; i < 3; ++i)
|
||||
if (m_bc7_mode137)
|
||||
{
|
||||
static const UINT modes[] = { 1, 3, 7 };
|
||||
|
||||
// Mode 1: err1 -> err2
|
||||
// Mode 3: err2 -> err1
|
||||
// Mode 7: err1 -> err2
|
||||
for (UINT i = 0; i < 3; ++i)
|
||||
{
|
||||
D3D11_MAPPED_SUBRESOURCE mapped;
|
||||
HRESULT hr = pContext->Map(m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped);
|
||||
if (FAILED(hr))
|
||||
static const UINT modes[] = { 1, 3, 7 };
|
||||
|
||||
// Mode 1: err1 -> err2
|
||||
// Mode 3: err2 -> err1
|
||||
// Mode 7: err1 -> err2
|
||||
{
|
||||
ResetContext(pContext);
|
||||
return hr;
|
||||
D3D11_MAPPED_SUBRESOURCE mapped;
|
||||
HRESULT hr = pContext->Map(m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped);
|
||||
if (FAILED(hr))
|
||||
{
|
||||
ResetContext(pContext);
|
||||
return hr;
|
||||
}
|
||||
|
||||
ConstantsBC6HBC7 param;
|
||||
param.tex_width = static_cast<UINT>(srcImage.width);
|
||||
param.num_block_x = static_cast<UINT>(xblocks);
|
||||
param.format = m_bcformat;
|
||||
param.mode_id = modes[i];
|
||||
param.start_block_id = start_block_id;
|
||||
param.num_total_blocks = num_total_blocks;
|
||||
param.alpha_weight = m_alphaWeight;
|
||||
memcpy(mapped.pData, ¶m, sizeof(param));
|
||||
pContext->Unmap(m_constBuffer.Get(), 0);
|
||||
}
|
||||
|
||||
ConstantsBC6HBC7 param;
|
||||
param.tex_width = static_cast<UINT>(srcImage.width);
|
||||
param.num_block_x = static_cast<UINT>(xblocks);
|
||||
param.format = m_bcformat;
|
||||
param.mode_id = modes[i];
|
||||
param.start_block_id = start_block_id;
|
||||
param.num_total_blocks = num_total_blocks;
|
||||
param.alpha_weight = m_alphaWeight;
|
||||
memcpy(mapped.pData, ¶m, sizeof(param));
|
||||
pContext->Unmap(m_constBuffer.Get(), 0);
|
||||
pSRVs[1] = (i & 1) ? m_err2SRV.Get() : m_err1SRV.Get();
|
||||
RunComputeShader(pContext, m_BC7_tryMode137CS.Get(), pSRVs, 2, m_constBuffer.Get(),
|
||||
(i & 1) ? m_err1UAV.Get() : m_err2UAV.Get(), uThreadGroupCount);
|
||||
}
|
||||
|
||||
pSRVs[1] = (i & 1) ? m_err2SRV.Get() : m_err1SRV.Get();
|
||||
RunComputeShader(pContext, m_BC7_tryMode137CS.Get(), pSRVs, 2, m_constBuffer.Get(),
|
||||
(i & 1) ? m_err1UAV.Get() : m_err2UAV.Get(), uThreadGroupCount);
|
||||
}
|
||||
|
||||
if (!m_skip3Subsets)
|
||||
if (m_bc7_mode02)
|
||||
{
|
||||
// 3 subset modes tend to be used rarely and add significant compression time
|
||||
for (UINT i = 0; i < 2; ++i)
|
||||
@ -524,7 +534,7 @@ HRESULT GPUCompressBC::Compress(const Image& srcImage, const Image& destImage)
|
||||
}
|
||||
}
|
||||
|
||||
pSRVs[1] = m_err2SRV.Get();
|
||||
pSRVs[1] = (m_bc7_mode02 || m_bc7_mode137) ? m_err2SRV.Get() : m_err1SRV.Get();
|
||||
RunComputeShader(pContext, m_BC7_encodeBlockCS.Get(), pSRVs, 2, m_constBuffer.Get(),
|
||||
m_outputUAV.Get(), std::max<UINT>((uThreadGroupCount + 3) / 4, 1));
|
||||
}
|
||||
|
@ -23,9 +23,9 @@ public:
|
||||
|
||||
HRESULT Initialize( _In_ ID3D11Device* pDevice );
|
||||
|
||||
HRESULT Prepare( _In_ size_t width, _In_ size_t height, _In_ DXGI_FORMAT format, _In_ float alphaWeight = 1.f, _In_ bool skip3subsets = true );
|
||||
HRESULT Prepare( size_t width, size_t height, DWORD flags, DXGI_FORMAT format, float alphaWeight );
|
||||
|
||||
HRESULT Compress( _In_ const Image& srcImage, _In_ const Image& destImage );
|
||||
HRESULT Compress( const Image& srcImage, const Image& destImage );
|
||||
|
||||
DXGI_FORMAT GetSourceFormat() const { return m_srcformat; }
|
||||
|
||||
@ -33,7 +33,8 @@ private:
|
||||
DXGI_FORMAT m_bcformat;
|
||||
DXGI_FORMAT m_srcformat;
|
||||
float m_alphaWeight;
|
||||
bool m_skip3Subsets;
|
||||
bool m_bc7_mode02;
|
||||
bool m_bc7_mode137;
|
||||
size_t m_width;
|
||||
size_t m_height;
|
||||
|
||||
|
@ -503,30 +503,33 @@ namespace DirectX
|
||||
|
||||
enum TEX_COMPRESS_FLAGS
|
||||
{
|
||||
TEX_COMPRESS_DEFAULT = 0,
|
||||
TEX_COMPRESS_DEFAULT = 0,
|
||||
|
||||
TEX_COMPRESS_RGB_DITHER = 0x10000,
|
||||
TEX_COMPRESS_RGB_DITHER = 0x10000,
|
||||
// Enables dithering RGB colors for BC1-3 compression
|
||||
|
||||
TEX_COMPRESS_A_DITHER = 0x20000,
|
||||
TEX_COMPRESS_A_DITHER = 0x20000,
|
||||
// Enables dithering alpha for BC1-3 compression
|
||||
|
||||
TEX_COMPRESS_DITHER = 0x30000,
|
||||
TEX_COMPRESS_DITHER = 0x30000,
|
||||
// Enables both RGB and alpha dithering for BC1-3 compression
|
||||
|
||||
TEX_COMPRESS_UNIFORM = 0x40000,
|
||||
TEX_COMPRESS_UNIFORM = 0x40000,
|
||||
// Uniform color weighting for BC1-3 compression; by default uses perceptual weighting
|
||||
|
||||
TEX_COMPRESS_BC7_USE_3SUBSETS = 0x80000,
|
||||
TEX_COMPRESS_BC7_USE_3SUBSETS = 0x80000,
|
||||
// Enables exhaustive search for BC7 compress for mode 0 and 2; by default skips trying these modes
|
||||
|
||||
TEX_COMPRESS_SRGB_IN = 0x1000000,
|
||||
TEX_COMPRESS_SRGB_OUT = 0x2000000,
|
||||
TEX_COMPRESS_SRGB = ( TEX_COMPRESS_SRGB_IN | TEX_COMPRESS_SRGB_OUT ),
|
||||
TEX_COMPRESS_BC7_QUICK = 0x100000,
|
||||
// Minimal modes (usually mode 6) for BC7 compression
|
||||
|
||||
TEX_COMPRESS_SRGB_IN = 0x1000000,
|
||||
TEX_COMPRESS_SRGB_OUT = 0x2000000,
|
||||
TEX_COMPRESS_SRGB = ( TEX_COMPRESS_SRGB_IN | TEX_COMPRESS_SRGB_OUT ),
|
||||
// if the input format type is IsSRGB(), then SRGB_IN is on by default
|
||||
// if the output format type is IsSRGB(), then SRGB_OUT is on by default
|
||||
|
||||
TEX_COMPRESS_PARALLEL = 0x10000000,
|
||||
TEX_COMPRESS_PARALLEL = 0x10000000,
|
||||
// Compress is free to use multithreading to improve performance (by default it does not use multithreading)
|
||||
};
|
||||
|
||||
|
@ -33,7 +33,8 @@ namespace
|
||||
static_assert(TEX_COMPRESS_DITHER == (BC_FLAGS_DITHER_RGB | BC_FLAGS_DITHER_A), "TEX_COMPRESS_* flags should match BC_FLAGS_*");
|
||||
static_assert(TEX_COMPRESS_UNIFORM == BC_FLAGS_UNIFORM, "TEX_COMPRESS_* flags should match BC_FLAGS_*");
|
||||
static_assert(TEX_COMPRESS_BC7_USE_3SUBSETS == BC_FLAGS_USE_3SUBSETS, "TEX_COMPRESS_* flags should match BC_FLAGS_*");
|
||||
return (compress & (BC_FLAGS_DITHER_RGB | BC_FLAGS_DITHER_A | BC_FLAGS_UNIFORM | BC_FLAGS_USE_3SUBSETS));
|
||||
static_assert(TEX_COMPRESS_BC7_QUICK == BC_FLAGS_FORCE_BC7_MODE6, "TEX_COMPRESS_* flags should match BC_FLAGS_*");
|
||||
return (compress & (BC_FLAGS_DITHER_RGB | BC_FLAGS_DITHER_A | BC_FLAGS_UNIFORM | BC_FLAGS_USE_3SUBSETS | BC_FLAGS_FORCE_BC7_MODE6));
|
||||
}
|
||||
|
||||
inline DWORD GetSRGBFlags(_In_ DWORD compress)
|
||||
|
@ -234,7 +234,7 @@ HRESULT DirectX::Compress(
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
hr = gpubc->Prepare(srcImage.width, srcImage.height, format, alphaWeight, !(compress & TEX_COMPRESS_BC7_USE_3SUBSETS));
|
||||
hr = gpubc->Prepare(srcImage.width, srcImage.height, compress, format, alphaWeight);
|
||||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
@ -320,7 +320,7 @@ HRESULT DirectX::Compress(
|
||||
|
||||
for (size_t level = 0; level < metadata.mipLevels; ++level)
|
||||
{
|
||||
hr = gpubc->Prepare(w, h, format, alphaWeight, !(compress & TEX_COMPRESS_BC7_USE_3SUBSETS));
|
||||
hr = gpubc->Prepare(w, h, compress, format, alphaWeight);
|
||||
if (FAILED(hr))
|
||||
{
|
||||
cImages.Release();
|
||||
@ -371,7 +371,7 @@ HRESULT DirectX::Compress(
|
||||
|
||||
for (size_t level = 0; level < metadata.mipLevels; ++level)
|
||||
{
|
||||
hr = gpubc->Prepare(w, h, format, alphaWeight, !(compress & TEX_COMPRESS_BC7_USE_3SUBSETS));
|
||||
hr = gpubc->Prepare(w, h, compress, format, alphaWeight);
|
||||
if (FAILED(hr))
|
||||
{
|
||||
cImages.Release();
|
||||
|
@ -295,6 +295,7 @@ namespace TriangleFilter
|
||||
}
|
||||
|
||||
assert(pFilter != 0);
|
||||
_Analysis_assume_(pFilter != 0);
|
||||
|
||||
// Filter setup
|
||||
size_t sizeInBytes = TF_FILTER_SIZE;
|
||||
|
@ -81,6 +81,7 @@ enum OPTIONS
|
||||
OPT_NORMAL_MAP_AMPLITUDE,
|
||||
OPT_COMPRESS_UNIFORM,
|
||||
OPT_COMPRESS_MAX,
|
||||
OPT_COMPRESS_QUICK,
|
||||
OPT_COMPRESS_DITHER,
|
||||
OPT_WIC_QUALITY,
|
||||
OPT_WIC_LOSSLESS,
|
||||
@ -147,6 +148,7 @@ SValue g_pOptions[] =
|
||||
{ L"nmapamp", OPT_NORMAL_MAP_AMPLITUDE },
|
||||
{ L"bcuniform", OPT_COMPRESS_UNIFORM },
|
||||
{ L"bcmax", OPT_COMPRESS_MAX },
|
||||
{ L"bcquick", OPT_COMPRESS_QUICK },
|
||||
{ L"bcdither", OPT_COMPRESS_DITHER },
|
||||
{ L"wicq", OPT_WIC_QUALITY },
|
||||
{ L"wiclossless", OPT_WIC_LOSSLESS },
|
||||
@ -656,7 +658,8 @@ namespace
|
||||
wprintf(L" -nogpu Do not use DirectCompute-based codecs\n");
|
||||
wprintf(L" -bcuniform Use uniform rather than perceptual weighting for BC1-3\n");
|
||||
wprintf(L" -bcdither Use dithering for BC1-3\n");
|
||||
wprintf(L" -bcmax Use exchaustive compression (BC7 only)\n");
|
||||
wprintf(L" -bcmax Use exhaustive compression (BC7 only)\n");
|
||||
wprintf(L" -bcquick USe quick compression (BC7 only)\n");
|
||||
wprintf(L" -wicq <quality> When writing images with WIC use quality (0.0 to 1.0)\n");
|
||||
wprintf(L" -wiclossless When writing images with WIC use lossless mode\n");
|
||||
wprintf(
|
||||
@ -690,9 +693,10 @@ namespace
|
||||
for (UINT adapterIndex = 0; DXGI_ERROR_NOT_FOUND != dxgiFactory->EnumAdapters(adapterIndex, adapter.ReleaseAndGetAddressOf()); ++adapterIndex)
|
||||
{
|
||||
DXGI_ADAPTER_DESC desc;
|
||||
adapter->GetDesc(&desc);
|
||||
|
||||
wprintf(L" %u: VID:%04X, PID:%04X - %ls\n", adapterIndex, desc.VendorId, desc.DeviceId, desc.Description);
|
||||
if (SUCCEEDED(adapter->GetDesc(&desc)))
|
||||
{
|
||||
wprintf(L" %u: VID:%04X, PID:%04X - %ls\n", adapterIndex, desc.VendorId, desc.DeviceId, desc.Description);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1203,6 +1207,10 @@ int __cdecl wmain(_In_ int argc, _In_z_count_(argc) wchar_t* argv[])
|
||||
dwCompress |= TEX_COMPRESS_BC7_USE_3SUBSETS;
|
||||
break;
|
||||
|
||||
case OPT_COMPRESS_QUICK:
|
||||
dwCompress |= TEX_COMPRESS_BC7_QUICK;
|
||||
break;
|
||||
|
||||
case OPT_COMPRESS_DITHER:
|
||||
dwCompress |= TEX_COMPRESS_DITHER;
|
||||
break;
|
||||
|
Loading…
x
Reference in New Issue
Block a user