Bias added for ARM version of 16bpp format conversions to match existing output

This commit is contained in:
Chuck Walbourn 2022-11-28 00:03:22 -08:00
parent 44f9a71ca1
commit 91df32232f
2 changed files with 20 additions and 1 deletions

View File

@ -2068,7 +2068,11 @@ bool DirectX::Internal::StoreScanline(
{ {
if (sPtr >= ePtr) break; if (sPtr >= ePtr) break;
XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>(*sPtr++); XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>(*sPtr++);
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __arm__ || __aarch64__
v = XMVectorMultiplyAdd(v, s_Scale, g_XMOneHalf);
#else
v = XMVectorMultiply(v, s_Scale); v = XMVectorMultiply(v, s_Scale);
#endif
XMStoreU565(dPtr++, v); XMStoreU565(dPtr++, v);
} }
return true; return true;
@ -2079,12 +2083,19 @@ bool DirectX::Internal::StoreScanline(
if (size >= sizeof(XMU555)) if (size >= sizeof(XMU555))
{ {
static const XMVECTORF32 s_Scale = { { { 31.f, 31.f, 31.f, 1.f } } }; static const XMVECTORF32 s_Scale = { { { 31.f, 31.f, 31.f, 1.f } } };
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __arm__ || __aarch64__
static const XMVECTORF32 s_OneHalfXYZ = { { { 0.5f, 0.5f, 0.5f, 0.f } } };
#endif
XMU555 * __restrict dPtr = static_cast<XMU555*>(pDestination); XMU555 * __restrict dPtr = static_cast<XMU555*>(pDestination);
for (size_t icount = 0; icount < (size - sizeof(XMU555) + 1); icount += sizeof(XMU555)) for (size_t icount = 0; icount < (size - sizeof(XMU555) + 1); icount += sizeof(XMU555))
{ {
if (sPtr >= ePtr) break; if (sPtr >= ePtr) break;
XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>(*sPtr++); XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>(*sPtr++);
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __arm__ || __aarch64__
v = XMVectorMultiplyAdd(v, s_Scale, s_OneHalfXYZ);
#else
v = XMVectorMultiply(v, s_Scale); v = XMVectorMultiply(v, s_Scale);
#endif
XMStoreU555(dPtr, v); XMStoreU555(dPtr, v);
dPtr->w = (XMVectorGetW(v) > threshold) ? 1u : 0u; dPtr->w = (XMVectorGetW(v) > threshold) ? 1u : 0u;
++dPtr; ++dPtr;
@ -2360,7 +2371,11 @@ bool DirectX::Internal::StoreScanline(
{ {
if (sPtr >= ePtr) break; if (sPtr >= ePtr) break;
XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>(*sPtr++); XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>(*sPtr++);
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __arm__ || __aarch64__
v = XMVectorMultiplyAdd(v, s_Scale, g_XMOneHalf);
#else
v = XMVectorMultiply(v, s_Scale); v = XMVectorMultiply(v, s_Scale);
#endif
XMStoreUNibble4(dPtr++, v); XMStoreUNibble4(dPtr++, v);
} }
return true; return true;
@ -2436,8 +2451,11 @@ bool DirectX::Internal::StoreScanline(
for (size_t icount = 0; icount < (size - sizeof(uint8_t) + 1); icount += sizeof(uint8_t)) for (size_t icount = 0; icount < (size - sizeof(uint8_t) + 1); icount += sizeof(uint8_t))
{ {
if (sPtr >= ePtr) break; if (sPtr >= ePtr) break;
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __arm__ || __aarch64__
const XMVECTOR v = XMVectorMultiplyAdd(*sPtr++, s_Scale, g_XMOneHalf);
#else
const XMVECTOR v = XMVectorMultiply(*sPtr++, s_Scale); const XMVECTOR v = XMVectorMultiply(*sPtr++, s_Scale);
#endif
XMUNIBBLE4 nibble; XMUNIBBLE4 nibble;
XMStoreUNibble4(&nibble, v); XMStoreUNibble4(&nibble, v);
*dPtr = static_cast<uint8_t>(nibble.v); *dPtr = static_cast<uint8_t>(nibble.v);

View File

@ -70,6 +70,7 @@
#pragma clang diagnostic ignored "-Wswitch-enum" #pragma clang diagnostic ignored "-Wswitch-enum"
#pragma clang diagnostic ignored "-Wtautological-type-limit-compare" #pragma clang diagnostic ignored "-Wtautological-type-limit-compare"
#pragma clang diagnostic ignored "-Wunknown-pragmas" #pragma clang diagnostic ignored "-Wunknown-pragmas"
#pragma clang diagnostic ignored "-Wundef"
#endif #endif
#ifdef _WIN32 #ifdef _WIN32