From ec03b62e6d0d25b619b0601428e245fe0aaa0338 Mon Sep 17 00:00:00 2001 From: walbourn_cp Date: Wed, 10 Apr 2013 18:11:47 -0700 Subject: [PATCH] DirectXTex - WIC 32bpp RGBE is 8:8:8:8, not the same as DXGI SHAREDEXP 9:9:9:5 - Fixed LoadScanline/StoreScanline for SHAREDEXP, X2_BIAS, and R1 --- DirectXTex/DirectXTexConvert.cpp | 154 +++++++++++++++++++++++++++++-- DirectXTex/DirectXTexUtil.cpp | 1 - DirectXTex/DirectXTexWIC.cpp | 1 + 3 files changed, 146 insertions(+), 10 deletions(-) diff --git a/DirectXTex/DirectXTexConvert.cpp b/DirectXTex/DirectXTexConvert.cpp index fb7ef23..29a4a3a 100644 --- a/DirectXTex/DirectXTexConvert.cpp +++ b/DirectXTex/DirectXTexConvert.cpp @@ -23,6 +23,28 @@ using namespace DirectX::PackedVector; #endif +namespace +{ + inline float round_to_nearest( float x ) + { + float i = floorf(x); + x -= i; + if(x < 0.5f) + return i; + if(x > 0.5f) + return i + 1.f; + + float int_part; + modff( i / 2.f, &int_part ); + if ( (2.f*int_part) == i ) + { + return i; + } + + return i + 1.f; + } +}; + namespace DirectX { @@ -578,9 +600,35 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, return false; case DXGI_FORMAT_R10G10B10A2_UNORM: - case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: LOAD_SCANLINE( XMUDECN4, XMLoadUDecN4 ); + case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: + if ( size >= sizeof(XMUDECN4) ) + { + const XMUDECN4 * __restrict sPtr = reinterpret_cast(pSource); + for( size_t icount = 0; icount < size; icount += sizeof(XMUDECN4) ) + { + if ( dPtr >= ePtr ) break; + + int32_t ElementX = sPtr->v & 0x3FF; + int32_t ElementY = (sPtr->v >> 10) & 0x3FF; + int32_t ElementZ = (sPtr->v >> 20) & 0x3FF; + + XMVECTORF32 vResult = { + (float)(ElementX - 0x180) / 510.0f, + (float)(ElementY - 0x180) / 510.0f, + (float)(ElementZ - 0x180) / 510.0f, + (float)(sPtr->v >> 30) / 3.0f + }; + + ++sPtr; + + *(dPtr++) = vResult.v; + } + return true; + } + return false; + case DXGI_FORMAT_R10G10B10A2_UINT: LOAD_SCANLINE( XMUDEC4, XMLoadUDec4 ); @@ -825,10 +873,10 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, const uint8_t * __restrict sPtr = reinterpret_cast(pSource); for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) ) { - for( size_t bcount = 0; bcount < 8; ++bcount ) + for( size_t bcount = 8; bcount > 0; --bcount ) { if ( dPtr >= ePtr ) break; - *(dPtr++) = XMVectorSet( (((*sPtr >> bcount) & 0x1) ? 1.f : 0.f), 0.f, 0.f, 1.f ); + *(dPtr++) = XMVectorSet( (((*sPtr >> (bcount-1)) & 0x1) ? 1.f : 0.f), 0.f, 0.f, 1.f ); } ++sPtr; @@ -838,7 +886,27 @@ bool _LoadScanline( XMVECTOR* pDestination, size_t count, return false; case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: - LOAD_SCANLINE3( XMFLOAT3SE, XMLoadFloat3SE, g_XMIdentityR3 ) + if ( size >= sizeof(XMFLOAT3SE) ) + { + const XMFLOAT3SE * __restrict sPtr = reinterpret_cast(pSource); + for( size_t icount = 0; icount < size; icount += sizeof(XMFLOAT3SE) ) + { + union { float f; int32_t i; } fi; + fi.i = 0x33800000 + (sPtr->e << 23); + float Scale = fi.f; + + XMVECTORF32 v = { + Scale * float( sPtr->xm ), + Scale * float( sPtr->ym ), + Scale * float( sPtr->zm ), + 1.0f }; + + if ( dPtr >= ePtr ) break; + *(dPtr++) = v; + } + return true; + } + return false; case DXGI_FORMAT_R8G8_B8G8_UNORM: if ( size >= sizeof(XMUBYTEN4) ) @@ -1057,9 +1125,36 @@ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, return true; case DXGI_FORMAT_R10G10B10A2_UNORM: - case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: STORE_SCANLINE( XMUDECN4, XMStoreUDecN4 ); + case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: + if ( size >= sizeof(XMUDECN4) ) + { + static const XMVECTORF32 Scale = { 510.0f, 510.0f, 510.0f, 3.0f }; + static const XMVECTORF32 Bias = { 384.0f, 384.0f, 384.0f, 0.0f }; + static const XMVECTORF32 C = { 1023.f, 1023.f, 1023.f, 3.f }; + + XMUDECN4 * __restrict dPtr = reinterpret_cast(pDestination); + for( size_t icount = 0; icount < size; icount += sizeof(XMUDECN4) ) + { + if ( sPtr >= ePtr ) break; + + XMVECTOR N = XMVectorMultiply(*sPtr++, Scale); + N = XMVectorAdd( N, Bias ); + N = XMVectorClamp( N, g_XMZero, C ); + + XMFLOAT4A tmp; + XMStoreFloat4A(&tmp, N ); + + dPtr->v = ((uint32_t)tmp.w << 30) + | (((uint32_t)tmp.z & 0x3FF) << 20) + | (((uint32_t)tmp.y & 0x3FF) << 10) + | (((uint32_t)tmp.x & 0x3FF)); + ++dPtr; + } + } + return true; + case DXGI_FORMAT_R10G10B10A2_UINT: STORE_SCANLINE( XMUDEC4, XMStoreUDec4 ); @@ -1309,12 +1404,16 @@ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) ) { uint8_t pixels = 0; - for( size_t bcount = 0; bcount < 8; ++bcount ) + for( size_t bcount = 8; bcount > 0; --bcount ) { if ( sPtr >= ePtr ) break; float v = XMVectorGetX( *sPtr++ ); - if ( v > 0.5f ) - pixels |= 1 << bcount; + + // Absolute thresholding generally doesn't give good results for all images + // Picking the 'right' threshold automatically requires whole-image analysis + + if ( v > 0.25f ) + pixels |= 1 << (bcount-1); } *(dPtr++) = pixels; } @@ -1322,7 +1421,44 @@ bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format, return true; case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: - STORE_SCANLINE( XMFLOAT3SE, XMStoreFloat3SE ) + if ( size >= sizeof(XMFLOAT3SE) ) + { + static const float maxf9 = float(0x1FF << 7); + static const float minf9 = float(1.f / (1 << 16)); + + XMFLOAT3SE * __restrict dPtr = reinterpret_cast(pDestination); + for( size_t icount = 0; icount < size; icount += sizeof(XMFLOAT3SE) ) + { + if ( sPtr >= ePtr ) break; + + XMFLOAT3 rgb; + XMStoreFloat3( &rgb, *(sPtr++) ); + + float r = (rgb.x >= 0.f) ? ( (rgb.x > maxf9) ? maxf9 : rgb.x ) : 0.f; + float g = (rgb.y >= 0.f) ? ( (rgb.y > maxf9) ? maxf9 : rgb.y ) : 0.f; + float b = (rgb.z >= 0.f) ? ( (rgb.z > maxf9) ? maxf9 : rgb.z ) : 0.f; + + const float max_rg = (r > g) ? r : g; + const float max_rgb = (max_rg > b) ? max_rg : b; + + const float maxColor = (max_rgb > minf9) ? max_rgb : minf9; + + union { float f; INT32 i; } fi; + fi.f = maxColor; + fi.i &= 0xFF800000; // cut off fraction + + dPtr->e = (fi.i - 0x37800000) >> 23; + + fi.i = 0x83000000 - fi.i; + float ScaleR = fi.f; + + dPtr->xm = static_cast( round_to_nearest(r * ScaleR) ); + dPtr->ym = static_cast( round_to_nearest(g * ScaleR) ); + dPtr->zm = static_cast( round_to_nearest(b * ScaleR) ); + ++dPtr; + } + } + return true; case DXGI_FORMAT_R8G8_B8G8_UNORM: if ( size >= sizeof(XMUBYTEN4) ) diff --git a/DirectXTex/DirectXTexUtil.cpp b/DirectXTex/DirectXTexUtil.cpp index ef96064..3eda63a 100644 --- a/DirectXTex/DirectXTexUtil.cpp +++ b/DirectXTex/DirectXTexUtil.cpp @@ -37,7 +37,6 @@ static WICTranslate g_WICFormats[] = { GUID_WICPixelFormat32bppRGBA1010102XR, DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM }, // DXGI 1.1 { GUID_WICPixelFormat32bppRGBA1010102, DXGI_FORMAT_R10G10B10A2_UNORM }, - { GUID_WICPixelFormat32bppRGBE, DXGI_FORMAT_R9G9B9E5_SHAREDEXP }, { GUID_WICPixelFormat16bppBGRA5551, DXGI_FORMAT_B5G5R5A1_UNORM }, { GUID_WICPixelFormat16bppBGR565, DXGI_FORMAT_B5G6R5_UNORM }, diff --git a/DirectXTex/DirectXTexWIC.cpp b/DirectXTex/DirectXTexWIC.cpp index 911e830..b6a59f3 100644 --- a/DirectXTex/DirectXTexWIC.cpp +++ b/DirectXTex/DirectXTexWIC.cpp @@ -67,6 +67,7 @@ static WICConvert g_WICConvert[] = { GUID_WICPixelFormat128bppRGBFloat, GUID_WICPixelFormat128bppRGBAFloat }, // DXGI_FORMAT_R32G32B32A32_FLOAT { GUID_WICPixelFormat128bppRGBAFixedPoint, GUID_WICPixelFormat128bppRGBAFloat }, // DXGI_FORMAT_R32G32B32A32_FLOAT { GUID_WICPixelFormat128bppRGBFixedPoint, GUID_WICPixelFormat128bppRGBAFloat }, // DXGI_FORMAT_R32G32B32A32_FLOAT + { GUID_WICPixelFormat32bppRGBE, GUID_WICPixelFormat128bppRGBAFloat }, // DXGI_FORMAT_R32G32B32A32_FLOAT { GUID_WICPixelFormat32bppCMYK, GUID_WICPixelFormat32bppRGBA }, // DXGI_FORMAT_R8G8B8A8_UNORM { GUID_WICPixelFormat64bppCMYK, GUID_WICPixelFormat64bppRGBA }, // DXGI_FORMAT_R16G16B16A16_UNORM