diff --git a/DirectXTex/BC.h b/DirectXTex/BC.h index 69b6746..d1c1665 100644 --- a/DirectXTex/BC.h +++ b/DirectXTex/BC.h @@ -55,11 +55,12 @@ extern const int g_aWeights4[16]; enum BC_FLAGS { - BC_FLAGS_NONE = 0x0, - BC_FLAGS_DITHER_RGB = 0x10000, // Enables dithering for RGB colors for BC1-3 - BC_FLAGS_DITHER_A = 0x20000, // Enables dithering for Alpha channel for BC1-3 - BC_FLAGS_UNIFORM = 0x40000, // By default, uses perceptual weighting for BC1-3; this flag makes it a uniform weighting - BC_FLAGS_USE_3SUBSETS = 0x80000,// By default, BC7 skips mode 0 & 2; this flag adds those modes back + BC_FLAGS_NONE = 0x0, + BC_FLAGS_DITHER_RGB = 0x10000, // Enables dithering for RGB colors for BC1-3 + BC_FLAGS_DITHER_A = 0x20000, // Enables dithering for Alpha channel for BC1-3 + BC_FLAGS_UNIFORM = 0x40000, // By default, uses perceptual weighting for BC1-3; this flag makes it a uniform weighting + BC_FLAGS_USE_3SUBSETS = 0x80000, // By default, BC7 skips mode 0 & 2; this flag adds those modes back + BC_FLAGS_FORCE_BC7_MODE6 = 0x100000, // BC7 should only use mode 6; skip other modes }; //------------------------------------------------------------------------------------- @@ -624,7 +625,7 @@ class D3DX_BC7 : private CBits< 16 > { public: void Decode(_Out_writes_(NUM_PIXELS_PER_BLOCK) HDRColorA* pOut) const; - void Encode(bool skip3subsets, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn); + void Encode(DWORD flags, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pIn); private: struct ModeInfo diff --git a/DirectXTex/BC6HBC7.cpp b/DirectXTex/BC6HBC7.cpp index 9ac1d76..e73a417 100644 --- a/DirectXTex/BC6HBC7.cpp +++ b/DirectXTex/BC6HBC7.cpp @@ -634,7 +634,7 @@ namespace _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints, _Out_ HDRColorA* pX, _Out_ HDRColorA* pY, - size_t cSteps, + _In_range_(3, 4) size_t cSteps, size_t cPixels, _In_reads_(cPixels) const size_t* pIndex) { @@ -830,7 +830,7 @@ namespace _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints, _Out_ HDRColorA* pX, _Out_ HDRColorA* pY, - size_t cSteps, + _In_range_(3, 4) size_t cSteps, size_t cPixels, _In_reads_(cPixels) const size_t* pIndex) { @@ -2176,7 +2176,7 @@ void D3DX_BC7::Decode(HDRColorA* pOut) const } _Use_decl_annotations_ -void D3DX_BC7::Encode(bool skip3subsets, const HDRColorA* const pIn) +void D3DX_BC7::Encode(DWORD flags, const HDRColorA* const pIn) { assert(pIn); @@ -2194,12 +2194,18 @@ void D3DX_BC7::Encode(bool skip3subsets, const HDRColorA* const pIn) for (EP.uMode = 0; EP.uMode < 8 && fMSEBest > 0; ++EP.uMode) { - if (skip3subsets && (EP.uMode == 0 || EP.uMode == 2)) + if (!(flags & BC_FLAGS_USE_3SUBSETS) && (EP.uMode == 0 || EP.uMode == 2)) { // 3 subset modes tend to be used rarely and add significant compression time continue; } + if ((flags & TEX_COMPRESS_BC7_QUICK) && (EP.uMode != 6)) + { + // Use only mode 6 + continue; + } + const size_t uShapes = size_t(1) << ms_aInfo[EP.uMode].uPartitionBits; assert(uShapes <= BC7_MAX_SHAPES); _Analysis_assume_(uShapes <= BC7_MAX_SHAPES); @@ -2902,5 +2908,5 @@ void DirectX::D3DXEncodeBC7(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags) { assert(pBC && pColor); static_assert(sizeof(D3DX_BC7) == 16, "D3DX_BC7 should be 16 bytes"); - reinterpret_cast(pBC)->Encode(!(flags& BC_FLAGS_USE_3SUBSETS), reinterpret_cast(pColor)); + reinterpret_cast(pBC)->Encode(flags, reinterpret_cast(pColor)); } diff --git a/DirectXTex/BCDirectCompute.cpp b/DirectXTex/BCDirectCompute.cpp index 9f8e607..5a063b7 100644 --- a/DirectXTex/BCDirectCompute.cpp +++ b/DirectXTex/BCDirectCompute.cpp @@ -175,8 +175,7 @@ HRESULT GPUCompressBC::Initialize(ID3D11Device* pDevice) //------------------------------------------------------------------------------------- -_Use_decl_annotations_ -HRESULT GPUCompressBC::Prepare(size_t width, size_t height, DXGI_FORMAT format, float alphaWeight, bool skip3subsets) +HRESULT GPUCompressBC::Prepare(size_t width, size_t height, DWORD flags, DXGI_FORMAT format, float alphaWeight) { if (!width || !height || alphaWeight < 0.f) return E_INVALIDARG; @@ -189,7 +188,16 @@ HRESULT GPUCompressBC::Prepare(size_t width, size_t height, DXGI_FORMAT format, m_alphaWeight = alphaWeight; - m_skip3Subsets = skip3subsets; + if (flags & TEX_COMPRESS_BC7_QUICK) + { + m_bc7_mode02 = false; + m_bc7_mode137 = false; + } + else + { + m_bc7_mode02 = (flags & TEX_COMPRESS_BC7_USE_3SUBSETS) != 0; + m_bc7_mode137 = true; + } size_t xblocks = std::max(1, (width + 3) >> 2); size_t yblocks = std::max(1, (height + 3) >> 2); @@ -332,7 +340,6 @@ HRESULT GPUCompressBC::Prepare(size_t width, size_t height, DXGI_FORMAT format, //------------------------------------------------------------------------------------- -_Use_decl_annotations_ HRESULT GPUCompressBC::Compress(const Image& srcImage, const Image& destImage) { if (!srcImage.pixels || !destImage.pixels) @@ -456,40 +463,43 @@ HRESULT GPUCompressBC::Compress(const Image& srcImage, const Image& destImage) RunComputeShader(pContext, m_BC7_tryMode456CS.Get(), pSRVs, 2, m_constBuffer.Get(), m_err1UAV.Get(), std::max((uThreadGroupCount + 3) / 4, 1)); - for (UINT i = 0; i < 3; ++i) + if (m_bc7_mode137) { - static const UINT modes[] = { 1, 3, 7 }; - - // Mode 1: err1 -> err2 - // Mode 3: err2 -> err1 - // Mode 7: err1 -> err2 + for (UINT i = 0; i < 3; ++i) { - D3D11_MAPPED_SUBRESOURCE mapped; - HRESULT hr = pContext->Map(m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); - if (FAILED(hr)) + static const UINT modes[] = { 1, 3, 7 }; + + // Mode 1: err1 -> err2 + // Mode 3: err2 -> err1 + // Mode 7: err1 -> err2 { - ResetContext(pContext); - return hr; + D3D11_MAPPED_SUBRESOURCE mapped; + HRESULT hr = pContext->Map(m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + if (FAILED(hr)) + { + ResetContext(pContext); + return hr; + } + + ConstantsBC6HBC7 param; + param.tex_width = static_cast(srcImage.width); + param.num_block_x = static_cast(xblocks); + param.format = m_bcformat; + param.mode_id = modes[i]; + param.start_block_id = start_block_id; + param.num_total_blocks = num_total_blocks; + param.alpha_weight = m_alphaWeight; + memcpy(mapped.pData, ¶m, sizeof(param)); + pContext->Unmap(m_constBuffer.Get(), 0); } - ConstantsBC6HBC7 param; - param.tex_width = static_cast(srcImage.width); - param.num_block_x = static_cast(xblocks); - param.format = m_bcformat; - param.mode_id = modes[i]; - param.start_block_id = start_block_id; - param.num_total_blocks = num_total_blocks; - param.alpha_weight = m_alphaWeight; - memcpy(mapped.pData, ¶m, sizeof(param)); - pContext->Unmap(m_constBuffer.Get(), 0); + pSRVs[1] = (i & 1) ? m_err2SRV.Get() : m_err1SRV.Get(); + RunComputeShader(pContext, m_BC7_tryMode137CS.Get(), pSRVs, 2, m_constBuffer.Get(), + (i & 1) ? m_err1UAV.Get() : m_err2UAV.Get(), uThreadGroupCount); } - - pSRVs[1] = (i & 1) ? m_err2SRV.Get() : m_err1SRV.Get(); - RunComputeShader(pContext, m_BC7_tryMode137CS.Get(), pSRVs, 2, m_constBuffer.Get(), - (i & 1) ? m_err1UAV.Get() : m_err2UAV.Get(), uThreadGroupCount); } - if (!m_skip3Subsets) + if (m_bc7_mode02) { // 3 subset modes tend to be used rarely and add significant compression time for (UINT i = 0; i < 2; ++i) @@ -524,7 +534,7 @@ HRESULT GPUCompressBC::Compress(const Image& srcImage, const Image& destImage) } } - pSRVs[1] = m_err2SRV.Get(); + pSRVs[1] = (m_bc7_mode02 || m_bc7_mode137) ? m_err2SRV.Get() : m_err1SRV.Get(); RunComputeShader(pContext, m_BC7_encodeBlockCS.Get(), pSRVs, 2, m_constBuffer.Get(), m_outputUAV.Get(), std::max((uThreadGroupCount + 3) / 4, 1)); } diff --git a/DirectXTex/BCDirectCompute.h b/DirectXTex/BCDirectCompute.h index 8bbfa75..bedc982 100644 --- a/DirectXTex/BCDirectCompute.h +++ b/DirectXTex/BCDirectCompute.h @@ -23,9 +23,9 @@ public: HRESULT Initialize( _In_ ID3D11Device* pDevice ); - HRESULT Prepare( _In_ size_t width, _In_ size_t height, _In_ DXGI_FORMAT format, _In_ float alphaWeight = 1.f, _In_ bool skip3subsets = true ); + HRESULT Prepare( size_t width, size_t height, DWORD flags, DXGI_FORMAT format, float alphaWeight ); - HRESULT Compress( _In_ const Image& srcImage, _In_ const Image& destImage ); + HRESULT Compress( const Image& srcImage, const Image& destImage ); DXGI_FORMAT GetSourceFormat() const { return m_srcformat; } @@ -33,7 +33,8 @@ private: DXGI_FORMAT m_bcformat; DXGI_FORMAT m_srcformat; float m_alphaWeight; - bool m_skip3Subsets; + bool m_bc7_mode02; + bool m_bc7_mode137; size_t m_width; size_t m_height; diff --git a/DirectXTex/DirectXTex.h b/DirectXTex/DirectXTex.h index 90d26c2..c17dd1a 100644 --- a/DirectXTex/DirectXTex.h +++ b/DirectXTex/DirectXTex.h @@ -503,30 +503,33 @@ namespace DirectX enum TEX_COMPRESS_FLAGS { - TEX_COMPRESS_DEFAULT = 0, + TEX_COMPRESS_DEFAULT = 0, - TEX_COMPRESS_RGB_DITHER = 0x10000, + TEX_COMPRESS_RGB_DITHER = 0x10000, // Enables dithering RGB colors for BC1-3 compression - TEX_COMPRESS_A_DITHER = 0x20000, + TEX_COMPRESS_A_DITHER = 0x20000, // Enables dithering alpha for BC1-3 compression - TEX_COMPRESS_DITHER = 0x30000, + TEX_COMPRESS_DITHER = 0x30000, // Enables both RGB and alpha dithering for BC1-3 compression - TEX_COMPRESS_UNIFORM = 0x40000, + TEX_COMPRESS_UNIFORM = 0x40000, // Uniform color weighting for BC1-3 compression; by default uses perceptual weighting - TEX_COMPRESS_BC7_USE_3SUBSETS = 0x80000, + TEX_COMPRESS_BC7_USE_3SUBSETS = 0x80000, // Enables exhaustive search for BC7 compress for mode 0 and 2; by default skips trying these modes - TEX_COMPRESS_SRGB_IN = 0x1000000, - TEX_COMPRESS_SRGB_OUT = 0x2000000, - TEX_COMPRESS_SRGB = ( TEX_COMPRESS_SRGB_IN | TEX_COMPRESS_SRGB_OUT ), + TEX_COMPRESS_BC7_QUICK = 0x100000, + // Minimal modes (usually mode 6) for BC7 compression + + TEX_COMPRESS_SRGB_IN = 0x1000000, + TEX_COMPRESS_SRGB_OUT = 0x2000000, + TEX_COMPRESS_SRGB = ( TEX_COMPRESS_SRGB_IN | TEX_COMPRESS_SRGB_OUT ), // if the input format type is IsSRGB(), then SRGB_IN is on by default // if the output format type is IsSRGB(), then SRGB_OUT is on by default - TEX_COMPRESS_PARALLEL = 0x10000000, + TEX_COMPRESS_PARALLEL = 0x10000000, // Compress is free to use multithreading to improve performance (by default it does not use multithreading) }; diff --git a/DirectXTex/DirectXTexCompress.cpp b/DirectXTex/DirectXTexCompress.cpp index 3283dc2..f0ad330 100644 --- a/DirectXTex/DirectXTexCompress.cpp +++ b/DirectXTex/DirectXTexCompress.cpp @@ -33,7 +33,8 @@ namespace static_assert(TEX_COMPRESS_DITHER == (BC_FLAGS_DITHER_RGB | BC_FLAGS_DITHER_A), "TEX_COMPRESS_* flags should match BC_FLAGS_*"); static_assert(TEX_COMPRESS_UNIFORM == BC_FLAGS_UNIFORM, "TEX_COMPRESS_* flags should match BC_FLAGS_*"); static_assert(TEX_COMPRESS_BC7_USE_3SUBSETS == BC_FLAGS_USE_3SUBSETS, "TEX_COMPRESS_* flags should match BC_FLAGS_*"); - return (compress & (BC_FLAGS_DITHER_RGB | BC_FLAGS_DITHER_A | BC_FLAGS_UNIFORM | BC_FLAGS_USE_3SUBSETS)); + static_assert(TEX_COMPRESS_BC7_QUICK == BC_FLAGS_FORCE_BC7_MODE6, "TEX_COMPRESS_* flags should match BC_FLAGS_*"); + return (compress & (BC_FLAGS_DITHER_RGB | BC_FLAGS_DITHER_A | BC_FLAGS_UNIFORM | BC_FLAGS_USE_3SUBSETS | BC_FLAGS_FORCE_BC7_MODE6)); } inline DWORD GetSRGBFlags(_In_ DWORD compress) diff --git a/DirectXTex/DirectXTexCompressGPU.cpp b/DirectXTex/DirectXTexCompressGPU.cpp index 09f62dd..fbfda44 100644 --- a/DirectXTex/DirectXTexCompressGPU.cpp +++ b/DirectXTex/DirectXTexCompressGPU.cpp @@ -234,7 +234,7 @@ HRESULT DirectX::Compress( if (FAILED(hr)) return hr; - hr = gpubc->Prepare(srcImage.width, srcImage.height, format, alphaWeight, !(compress & TEX_COMPRESS_BC7_USE_3SUBSETS)); + hr = gpubc->Prepare(srcImage.width, srcImage.height, compress, format, alphaWeight); if (FAILED(hr)) return hr; @@ -320,7 +320,7 @@ HRESULT DirectX::Compress( for (size_t level = 0; level < metadata.mipLevels; ++level) { - hr = gpubc->Prepare(w, h, format, alphaWeight, !(compress & TEX_COMPRESS_BC7_USE_3SUBSETS)); + hr = gpubc->Prepare(w, h, compress, format, alphaWeight); if (FAILED(hr)) { cImages.Release(); @@ -371,7 +371,7 @@ HRESULT DirectX::Compress( for (size_t level = 0; level < metadata.mipLevels; ++level) { - hr = gpubc->Prepare(w, h, format, alphaWeight, !(compress & TEX_COMPRESS_BC7_USE_3SUBSETS)); + hr = gpubc->Prepare(w, h, compress, format, alphaWeight); if (FAILED(hr)) { cImages.Release(); diff --git a/DirectXTex/Filters.h b/DirectXTex/Filters.h index 0741a9a..e264664 100644 --- a/DirectXTex/Filters.h +++ b/DirectXTex/Filters.h @@ -295,6 +295,7 @@ namespace TriangleFilter } assert(pFilter != 0); + _Analysis_assume_(pFilter != 0); // Filter setup size_t sizeInBytes = TF_FILTER_SIZE; diff --git a/Texconv/texconv.cpp b/Texconv/texconv.cpp index 77764da..898bbd0 100644 --- a/Texconv/texconv.cpp +++ b/Texconv/texconv.cpp @@ -81,6 +81,7 @@ enum OPTIONS OPT_NORMAL_MAP_AMPLITUDE, OPT_COMPRESS_UNIFORM, OPT_COMPRESS_MAX, + OPT_COMPRESS_QUICK, OPT_COMPRESS_DITHER, OPT_WIC_QUALITY, OPT_WIC_LOSSLESS, @@ -147,6 +148,7 @@ SValue g_pOptions[] = { L"nmapamp", OPT_NORMAL_MAP_AMPLITUDE }, { L"bcuniform", OPT_COMPRESS_UNIFORM }, { L"bcmax", OPT_COMPRESS_MAX }, + { L"bcquick", OPT_COMPRESS_QUICK }, { L"bcdither", OPT_COMPRESS_DITHER }, { L"wicq", OPT_WIC_QUALITY }, { L"wiclossless", OPT_WIC_LOSSLESS }, @@ -656,7 +658,8 @@ namespace wprintf(L" -nogpu Do not use DirectCompute-based codecs\n"); wprintf(L" -bcuniform Use uniform rather than perceptual weighting for BC1-3\n"); wprintf(L" -bcdither Use dithering for BC1-3\n"); - wprintf(L" -bcmax Use exchaustive compression (BC7 only)\n"); + wprintf(L" -bcmax Use exhaustive compression (BC7 only)\n"); + wprintf(L" -bcquick USe quick compression (BC7 only)\n"); wprintf(L" -wicq When writing images with WIC use quality (0.0 to 1.0)\n"); wprintf(L" -wiclossless When writing images with WIC use lossless mode\n"); wprintf( @@ -690,9 +693,10 @@ namespace for (UINT adapterIndex = 0; DXGI_ERROR_NOT_FOUND != dxgiFactory->EnumAdapters(adapterIndex, adapter.ReleaseAndGetAddressOf()); ++adapterIndex) { DXGI_ADAPTER_DESC desc; - adapter->GetDesc(&desc); - - wprintf(L" %u: VID:%04X, PID:%04X - %ls\n", adapterIndex, desc.VendorId, desc.DeviceId, desc.Description); + if (SUCCEEDED(adapter->GetDesc(&desc))) + { + wprintf(L" %u: VID:%04X, PID:%04X - %ls\n", adapterIndex, desc.VendorId, desc.DeviceId, desc.Description); + } } } } @@ -1203,6 +1207,10 @@ int __cdecl wmain(_In_ int argc, _In_z_count_(argc) wchar_t* argv[]) dwCompress |= TEX_COMPRESS_BC7_USE_3SUBSETS; break; + case OPT_COMPRESS_QUICK: + dwCompress |= TEX_COMPRESS_BC7_QUICK; + break; + case OPT_COMPRESS_DITHER: dwCompress |= TEX_COMPRESS_DITHER; break;