Bias added for ARM version of 16bpp format conversions to match existing output

2026-02-04 04:16:12 +01:00 · 2022-11-28 00:03:22 -08:00
parent 44f9a71ca1
commit 91df32232f
2 changed files with 20 additions and 1 deletions
--- a/DirectXTex/DirectXTexConvert.cpp
+++ b/DirectXTex/DirectXTexConvert.cpp
@@ -2068,7 +2068,11 @@ bool DirectX::Internal::StoreScanline(
            {
                if (sPtr >= ePtr) break;
                XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>(*sPtr++);
 #if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __arm__ || __aarch64__
                v = XMVectorMultiplyAdd(v, s_Scale, g_XMOneHalf);
 #else
                v = XMVectorMultiply(v, s_Scale);
 #endif
                XMStoreU565(dPtr++, v);
            }
            return true;
@@ -2079,12 +2083,19 @@ bool DirectX::Internal::StoreScanline(
        if (size >= sizeof(XMU555))
        {
            static const XMVECTORF32 s_Scale = { { { 31.f, 31.f, 31.f, 1.f } } };
 #if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __arm__ || __aarch64__
            static const XMVECTORF32 s_OneHalfXYZ = { { { 0.5f, 0.5f, 0.5f, 0.f } } };
 #endif
            XMU555 * __restrict dPtr = static_cast<XMU555*>(pDestination);
            for (size_t icount = 0; icount < (size - sizeof(XMU555) + 1); icount += sizeof(XMU555))
            {
                if (sPtr >= ePtr) break;
                XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>(*sPtr++);
 #if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __arm__ || __aarch64__
                v = XMVectorMultiplyAdd(v, s_Scale, s_OneHalfXYZ);
 #else
                v = XMVectorMultiply(v, s_Scale);
 #endif
                XMStoreU555(dPtr, v);
                dPtr->w = (XMVectorGetW(v) > threshold) ? 1u : 0u;
                ++dPtr;
@@ -2360,7 +2371,11 @@ bool DirectX::Internal::StoreScanline(
            {
                if (sPtr >= ePtr) break;
                XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>(*sPtr++);
 #if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __arm__ || __aarch64__
                v = XMVectorMultiplyAdd(v, s_Scale, g_XMOneHalf);
 #else
                v = XMVectorMultiply(v, s_Scale);
 #endif
                XMStoreUNibble4(dPtr++, v);
            }
            return true;
@@ -2436,8 +2451,11 @@ bool DirectX::Internal::StoreScanline(
            for (size_t icount = 0; icount < (size - sizeof(uint8_t) + 1); icount += sizeof(uint8_t))
            {
                if (sPtr >= ePtr) break;
 #if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __arm__ || __aarch64__
                const XMVECTOR v = XMVectorMultiplyAdd(*sPtr++, s_Scale, g_XMOneHalf);
 #else
                const XMVECTOR v = XMVectorMultiply(*sPtr++, s_Scale);
-
+#endif
                XMUNIBBLE4 nibble;
                XMStoreUNibble4(&nibble, v);
                *dPtr = static_cast<uint8_t>(nibble.v);
--- a/DirectXTex/DirectXTexP.h
+++ b/DirectXTex/DirectXTexP.h
@@ -70,6 +70,7 @@
 #pragma clang diagnostic ignored "-Wswitch-enum"
 #pragma clang diagnostic ignored "-Wtautological-type-limit-compare"
 #pragma clang diagnostic ignored "-Wunknown-pragmas"
 #pragma clang diagnostic ignored "-Wundef"
 #endif
 #ifdef _WIN32