From 91df32232f917429c173d0d3d17a80d15dc3e484 Mon Sep 17 00:00:00 2001 From: Chuck Walbourn Date: Mon, 28 Nov 2022 00:03:22 -0800 Subject: [PATCH] Bias added for ARM version of 16bpp format conversions to match existing output --- DirectXTex/DirectXTexConvert.cpp | 20 +++++++++++++++++++- DirectXTex/DirectXTexP.h | 1 + 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/DirectXTex/DirectXTexConvert.cpp b/DirectXTex/DirectXTexConvert.cpp index f98fbc7..bbcc704 100644 --- a/DirectXTex/DirectXTexConvert.cpp +++ b/DirectXTex/DirectXTexConvert.cpp @@ -2068,7 +2068,11 @@ bool DirectX::Internal::StoreScanline( { if (sPtr >= ePtr) break; XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>(*sPtr++); +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __arm__ || __aarch64__ + v = XMVectorMultiplyAdd(v, s_Scale, g_XMOneHalf); +#else v = XMVectorMultiply(v, s_Scale); +#endif XMStoreU565(dPtr++, v); } return true; @@ -2079,12 +2083,19 @@ bool DirectX::Internal::StoreScanline( if (size >= sizeof(XMU555)) { static const XMVECTORF32 s_Scale = { { { 31.f, 31.f, 31.f, 1.f } } }; +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __arm__ || __aarch64__ + static const XMVECTORF32 s_OneHalfXYZ = { { { 0.5f, 0.5f, 0.5f, 0.f } } }; +#endif XMU555 * __restrict dPtr = static_cast(pDestination); for (size_t icount = 0; icount < (size - sizeof(XMU555) + 1); icount += sizeof(XMU555)) { if (sPtr >= ePtr) break; XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>(*sPtr++); +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __arm__ || __aarch64__ + v = XMVectorMultiplyAdd(v, s_Scale, s_OneHalfXYZ); +#else v = XMVectorMultiply(v, s_Scale); +#endif XMStoreU555(dPtr, v); dPtr->w = (XMVectorGetW(v) > threshold) ? 1u : 0u; ++dPtr; @@ -2360,7 +2371,11 @@ bool DirectX::Internal::StoreScanline( { if (sPtr >= ePtr) break; XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>(*sPtr++); +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __arm__ || __aarch64__ + v = XMVectorMultiplyAdd(v, s_Scale, g_XMOneHalf); +#else v = XMVectorMultiply(v, s_Scale); +#endif XMStoreUNibble4(dPtr++, v); } return true; @@ -2436,8 +2451,11 @@ bool DirectX::Internal::StoreScanline( for (size_t icount = 0; icount < (size - sizeof(uint8_t) + 1); icount += sizeof(uint8_t)) { if (sPtr >= ePtr) break; +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __arm__ || __aarch64__ + const XMVECTOR v = XMVectorMultiplyAdd(*sPtr++, s_Scale, g_XMOneHalf); +#else const XMVECTOR v = XMVectorMultiply(*sPtr++, s_Scale); - +#endif XMUNIBBLE4 nibble; XMStoreUNibble4(&nibble, v); *dPtr = static_cast(nibble.v); diff --git a/DirectXTex/DirectXTexP.h b/DirectXTex/DirectXTexP.h index 38fdc44..9673713 100644 --- a/DirectXTex/DirectXTexP.h +++ b/DirectXTex/DirectXTexP.h @@ -70,6 +70,7 @@ #pragma clang diagnostic ignored "-Wswitch-enum" #pragma clang diagnostic ignored "-Wtautological-type-limit-compare" #pragma clang diagnostic ignored "-Wunknown-pragmas" +#pragma clang diagnostic ignored "-Wundef" #endif #ifdef _WIN32