diff options
author | Matt A. Tobin <email@mattatobin.com> | 2021-09-23 18:23:04 -0400 |
---|---|---|
committer | Matt A. Tobin <email@mattatobin.com> | 2021-09-23 18:23:04 -0400 |
commit | 6acb3d91365c63763c58166b562bac23eba6710b (patch) | |
tree | 28cbd03027fd3c6855410a8b6bbc488882816c22 /gfx | |
parent | 53d20b4e60cfb8385d4411a09fb82fc680852f62 (diff) | |
download | aura-central-6acb3d91365c63763c58166b562bac23eba6710b.tar.gz |
Issue %3003 - Move ycbcr to libs/
Diffstat (limited to 'gfx')
29 files changed, 0 insertions, 9119 deletions
diff --git a/gfx/moz.build b/gfx/moz.build index 6d825bae9..7914cea6b 100644 --- a/gfx/moz.build +++ b/gfx/moz.build @@ -5,7 +5,6 @@ DIRS += [ '2d', - 'ycbcr', 'src', 'gl', 'layers', diff --git a/gfx/ycbcr/LICENSE b/gfx/ycbcr/LICENSE deleted file mode 100644 index 8dc35041d..000000000 --- a/gfx/ycbcr/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/gfx/ycbcr/QuellGccWarnings.patch b/gfx/ycbcr/QuellGccWarnings.patch deleted file mode 100644 index d580ac981..000000000 --- a/gfx/ycbcr/QuellGccWarnings.patch +++ /dev/null @@ -1,40 +0,0 @@ -diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp ---- a/gfx/ycbcr/yuv_convert.cpp -+++ b/gfx/ycbcr/yuv_convert.cpp -@@ -337,16 +337,17 @@ void ScaleYCbCrToRGB32(const uint* yplan - source_dx_uv >> kFractionBits); - } - } - else { - ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr, - dest_pixel, width, source_dx); - } - #else -+ (void)source_dx_uv; - ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, - dest_pixel, width, source_dx); - #endif - } - } - // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms. - if (has_mmx) - EMMS(); -diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h ---- a/gfx/ycbcr/yuv_row.h -+++ b/gfx/ycbcr/yuv_row.h -@@ -129,14 +129,14 @@ extern SIMD_ALIGNED(int16 kCoefficientsR - #if defined(ARCH_CPU_X86) && !defined(ARCH_CPU_X86_64) - #if defined(_MSC_VER) - #define EMMS() __asm emms - #pragma warning(disable: 4799) - #else - #define EMMS() asm("emms") - #endif - #else --#define EMMS() -+#define EMMS() ((void)0) - #endif - - } // extern "C" - - #endif // MEDIA_BASE_YUV_ROW_H_ diff --git a/gfx/ycbcr/README b/gfx/ycbcr/README deleted file mode 100644 index a951bc83a..000000000 --- a/gfx/ycbcr/README +++ /dev/null @@ -1,29 +0,0 @@ -This color conversion code is from the Chromium open source project available here: - -http://code.google.com/chromium/ - -The code comes from svn revision 63840 on 2010-10-26. - -If you just want to check out this individual directory, use: - -svn co -r 63840 http://src.chromium.org/svn/trunk/src/media/base - -The code was copied from a Chromium svn checkout using the 'update.sh' script which then applies patches for our build and to add dynamic CPU detection. - -convert.patch contains the following changes: - - * Change Chromium code to build using Mozilla build system. - * Add runtime CPU detection for MMX - * Move default C implementation to work on all platforms. - * Change Chromium code to allow a picture region. - * The YUV conversion will convert within this picture region only. - * Add YCbCr 4:4:4 support - * Bug 619178 - Update CPU detection in yuv_convert to new SSE.h interface. - * Bug 616778 - Split yuv_convert FilterRows vectorized code into separate files so it can - be properly guarded with cpuid() calls. - -win64.patch: SSE2 optimization for Microsoft Visual C++ x64 version - -TypeFromSize.patch: Bug 656185 - Add a method to detect YUVType from plane sizes. - -QuellGccWarnings.patch: Bug 711895 - Avoid some GCC compilation warnings. diff --git a/gfx/ycbcr/TypeFromSize.patch b/gfx/ycbcr/TypeFromSize.patch deleted file mode 100644 index d08a19690..000000000 --- a/gfx/ycbcr/TypeFromSize.patch +++ /dev/null @@ -1,58 +0,0 @@ -diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp ---- a/gfx/ycbcr/yuv_convert.cpp -+++ b/gfx/ycbcr/yuv_convert.cpp -@@ -26,16 +26,32 @@ namespace mozilla { - - namespace gfx { - - // 16.16 fixed point arithmetic - const int kFractionBits = 16; - const int kFractionMax = 1 << kFractionBits; - const int kFractionMask = ((1 << kFractionBits) - 1); - -+YUVType TypeFromSize(int ywidth, -+ int yheight, -+ int cbcrwidth, -+ int cbcrheight) -+{ -+ if (ywidth == cbcrwidth && yheight == cbcrheight) { -+ return YV24; -+ } -+ else if (ywidth / 2 == cbcrwidth && yheight == cbcrheight) { -+ return YV16; -+ } -+ else { -+ return YV12; -+ } -+} -+ - // Convert a frame of YUV to 32 bit ARGB. - void ConvertYCbCrToRGB32(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int pic_x, - int pic_y, - int pic_width, -diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h ---- a/gfx/ycbcr/yuv_convert.h -+++ b/gfx/ycbcr/yuv_convert.h -@@ -36,16 +36,18 @@ enum Rotate { - // Filter affects how scaling looks. - enum ScaleFilter { - FILTER_NONE = 0, // No filter (point sampled). - FILTER_BILINEAR_H = 1, // Bilinear horizontal filter. - FILTER_BILINEAR_V = 2, // Bilinear vertical filter. - FILTER_BILINEAR = 3 // Bilinear filter. - }; - -+YUVType TypeFromSize(int ywidth, int yheight, int cbcrwidth, int cbcrheight); -+ - // Convert a frame of YUV to 32 bit ARGB. - // Pass in YV16/YV12 depending on source format - void ConvertYCbCrToRGB32(const uint8* yplane, - const uint8* uplane, - const uint8* vplane, - uint8* rgbframe, - int pic_x, - int pic_y, diff --git a/gfx/ycbcr/YCbCrUtils.cpp b/gfx/ycbcr/YCbCrUtils.cpp deleted file mode 100644 index 882197857..000000000 --- a/gfx/ycbcr/YCbCrUtils.cpp +++ /dev/null @@ -1,157 +0,0 @@ -/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "gfx2DGlue.h" - -#include "YCbCrUtils.h" -#include "yuv_convert.h" -#include "ycbcr_to_rgb565.h" - -namespace mozilla { -namespace gfx { - -void -GetYCbCrToRGBDestFormatAndSize(const layers::PlanarYCbCrData& aData, - SurfaceFormat& aSuggestedFormat, - IntSize& aSuggestedSize) -{ - YUVType yuvtype = - TypeFromSize(aData.mYSize.width, - aData.mYSize.height, - aData.mCbCrSize.width, - aData.mCbCrSize.height); - - // 'prescale' is true if the scaling is to be done as part of the - // YCbCr to RGB conversion rather than on the RGB data when rendered. - bool prescale = aSuggestedSize.width > 0 && aSuggestedSize.height > 0 && - aSuggestedSize != aData.mPicSize; - - if (aSuggestedFormat == SurfaceFormat::R5G6B5_UINT16) { -#if defined(HAVE_YCBCR_TO_RGB565) - if (prescale && - !IsScaleYCbCrToRGB565Fast(aData.mPicX, - aData.mPicY, - aData.mPicSize.width, - aData.mPicSize.height, - aSuggestedSize.width, - aSuggestedSize.height, - yuvtype, - FILTER_BILINEAR) && - IsConvertYCbCrToRGB565Fast(aData.mPicX, - aData.mPicY, - aData.mPicSize.width, - aData.mPicSize.height, - yuvtype)) { - prescale = false; - } -#else - // yuv2rgb16 function not available - aSuggestedFormat = SurfaceFormat::B8G8R8X8; -#endif - } - else if (aSuggestedFormat != SurfaceFormat::B8G8R8X8) { - // No other formats are currently supported. - aSuggestedFormat = SurfaceFormat::B8G8R8X8; - } - if (aSuggestedFormat == SurfaceFormat::B8G8R8X8) { - /* ScaleYCbCrToRGB32 does not support a picture offset, nor 4:4:4 data. - See bugs 639415 and 640073. */ - if (aData.mPicX != 0 || aData.mPicY != 0 || yuvtype == YV24) - prescale = false; - } - if (!prescale) { - aSuggestedSize = aData.mPicSize; - } -} - -void -ConvertYCbCrToRGB(const layers::PlanarYCbCrData& aData, - const SurfaceFormat& aDestFormat, - const IntSize& aDestSize, - unsigned char* aDestBuffer, - int32_t aStride) -{ - // ConvertYCbCrToRGB et al. assume the chroma planes are rounded up if the - // luma plane is odd sized. - MOZ_ASSERT((aData.mCbCrSize.width == aData.mYSize.width || - aData.mCbCrSize.width == (aData.mYSize.width + 1) >> 1) && - (aData.mCbCrSize.height == aData.mYSize.height || - aData.mCbCrSize.height == (aData.mYSize.height + 1) >> 1)); - YUVType yuvtype = - TypeFromSize(aData.mYSize.width, - aData.mYSize.height, - aData.mCbCrSize.width, - aData.mCbCrSize.height); - - // Convert from YCbCr to RGB now, scaling the image if needed. - if (aDestSize != aData.mPicSize) { -#if defined(HAVE_YCBCR_TO_RGB565) - if (aDestFormat == SurfaceFormat::R5G6B5_UINT16) { - ScaleYCbCrToRGB565(aData.mYChannel, - aData.mCbChannel, - aData.mCrChannel, - aDestBuffer, - aData.mPicX, - aData.mPicY, - aData.mPicSize.width, - aData.mPicSize.height, - aDestSize.width, - aDestSize.height, - aData.mYStride, - aData.mCbCrStride, - aStride, - yuvtype, - FILTER_BILINEAR); - } else -#endif - ScaleYCbCrToRGB32(aData.mYChannel, // - aData.mCbChannel, - aData.mCrChannel, - aDestBuffer, - aData.mPicSize.width, - aData.mPicSize.height, - aDestSize.width, - aDestSize.height, - aData.mYStride, - aData.mCbCrStride, - aStride, - yuvtype, - aData.mYUVColorSpace, - FILTER_BILINEAR); - } else { // no prescale -#if defined(HAVE_YCBCR_TO_RGB565) - if (aDestFormat == SurfaceFormat::R5G6B5_UINT16) { - ConvertYCbCrToRGB565(aData.mYChannel, - aData.mCbChannel, - aData.mCrChannel, - aDestBuffer, - aData.mPicX, - aData.mPicY, - aData.mPicSize.width, - aData.mPicSize.height, - aData.mYStride, - aData.mCbCrStride, - aStride, - yuvtype); - } else // aDestFormat != SurfaceFormat::R5G6B5_UINT16 -#endif - ConvertYCbCrToRGB32(aData.mYChannel, // - aData.mCbChannel, - aData.mCrChannel, - aDestBuffer, - aData.mPicX, - aData.mPicY, - aData.mPicSize.width, - aData.mPicSize.height, - aData.mYStride, - aData.mCbCrStride, - aStride, - yuvtype, - aData.mYUVColorSpace); - } -} - -} // namespace gfx -} // namespace mozilla diff --git a/gfx/ycbcr/YCbCrUtils.h b/gfx/ycbcr/YCbCrUtils.h deleted file mode 100644 index 1cd2e1c4f..000000000 --- a/gfx/ycbcr/YCbCrUtils.h +++ /dev/null @@ -1,30 +0,0 @@ -/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef MOZILLA_GFX_UTILS_H_ -#define MOZILLA_GFX_UTILS_H_ - -#include "mozilla/gfx/Types.h" -#include "ImageContainer.h" - -namespace mozilla { -namespace gfx { - -void -GetYCbCrToRGBDestFormatAndSize(const layers::PlanarYCbCrData& aData, - SurfaceFormat& aSuggestedFormat, - IntSize& aSuggestedSize); - -void -ConvertYCbCrToRGB(const layers::PlanarYCbCrData& aData, - const SurfaceFormat& aDestFormat, - const IntSize& aDestSize, - unsigned char* aDestBuffer, - int32_t aStride); - -} // namespace gfx -} // namespace mozilla - -#endif /* MOZILLA_GFX_UTILS_H_ */ diff --git a/gfx/ycbcr/chromium_types.h b/gfx/ycbcr/chromium_types.h deleted file mode 100644 index dceac4766..000000000 --- a/gfx/ycbcr/chromium_types.h +++ /dev/null @@ -1,50 +0,0 @@ -/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#ifndef GFX_CHROMIUMTYPES_H -#define GFX_CHROMIUMTYPES_H - -#include <stdint.h> - -#include "libyuv/basic_types.h" - -// From Chromium build_config.h: -// Processor architecture detection. For more info on what's defined, see: -// http://msdn.microsoft.com/en-us/library/b0084kay.aspx -// http://www.agner.org/optimize/calling_conventions.pdf -// or with gcc, run: "echo | gcc -E -dM -" -#if defined(_M_X64) || defined(__x86_64__) -#define ARCH_CPU_X86_FAMILY 1 -#define ARCH_CPU_X86_64 1 -#define ARCH_CPU_64_BITS 1 -#elif defined(_M_IX86) || defined(__i386__) || defined(__i386) -#define ARCH_CPU_X86_FAMILY 1 -#define ARCH_CPU_X86_32 1 -#define ARCH_CPU_X86 1 -#define ARCH_CPU_32_BITS 1 -#elif defined(__ARMEL__) -#define ARCH_CPU_ARM_FAMILY 1 -#define ARCH_CPU_ARMEL 1 -#define ARCH_CPU_32_BITS 1 -#elif defined(__ppc__) || defined(__powerpc) || defined(__PPC__) -#define ARCH_CPU_PPC_FAMILY 1 -#define ARCH_CPU_PPC 1 -#define ARCH_CPU_32_BITS 1 -#elif defined(__sparc) -#define ARCH_CPU_SPARC_FAMILY 1 -#define ARCH_CPU_SPARC 1 -#define ARCH_CPU_32_BITS 1 -#elif defined(__sparcv9) -#define ARCH_CPU_SPARC_FAMILY 1 -#define ARCH_CPU_SPARC 1 -#define ARCH_CPU_64_BITS 1 -#elif defined(__aarch64__) -#define ARCH_CPU_AARCH64_FAMILY 1 -#define ARCH_CPU_AARCH64 1 -#define ARCH_CPU_64_BITS 1 -#else -#warning Please add support for your architecture in chromium_types.h -#endif - -#endif // GFX_CHROMIUMTYPES_H diff --git a/gfx/ycbcr/convert.patch.outdated b/gfx/ycbcr/convert.patch.outdated deleted file mode 100644 index e39f923b3..000000000 --- a/gfx/ycbcr/convert.patch.outdated +++ /dev/null @@ -1,3143 +0,0 @@ -diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp ---- a/gfx/ycbcr/yuv_convert.cpp -+++ b/gfx/ycbcr/yuv_convert.cpp -@@ -6,145 +6,102 @@ - // http://www.fourcc.org/yuv.php - // The actual conversion is best described here - // http://en.wikipedia.org/wiki/YUV - // An article on optimizing YUV conversion using tables instead of multiplies - // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf - // - // YV12 is a full plane of Y and a half height, half width chroma planes - // YV16 is a full plane of Y and a full height, half width chroma planes -+// YV24 is a full plane of Y and a full height, full width chroma planes - // - // ARGB pixel format is output, which on little endian is stored as BGRA. - // The alpha is set to 255, allowing the application to use RGBA or RGB32. - --#include "media/base/yuv_convert.h" -+#include "yuv_convert.h" - - // Header for low level row functions. --#include "media/base/yuv_row.h" -- --#if USE_MMX --#if defined(_MSC_VER) --#include <intrin.h> --#else --#include <mmintrin.h> --#endif --#endif -- --#if USE_SSE2 --#include <emmintrin.h> --#endif -- --namespace media { -- -+#include "yuv_row.h" -+#include "mozilla/SSE.h" -+ -+namespace mozilla { -+ -+namespace gfx { -+ - // 16.16 fixed point arithmetic - const int kFractionBits = 16; - const int kFractionMax = 1 << kFractionBits; - const int kFractionMask = ((1 << kFractionBits) - 1); - - // Convert a frame of YUV to 32 bit ARGB. --void ConvertYUVToRGB32(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int height, -- int y_pitch, -- int uv_pitch, -- int rgb_pitch, -- YUVType yuv_type) { -- unsigned int y_shift = yuv_type; -- for (int y = 0; y < height; ++y) { -- uint8* rgb_row = rgb_buf + y * rgb_pitch; -- const uint8* y_ptr = y_buf + y * y_pitch; -- const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch; -- const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch; -- -- FastConvertYUVToRGB32Row(y_ptr, -- u_ptr, -- v_ptr, -- rgb_row, -- width); -- } -+void ConvertYCbCrToRGB32(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int pic_x, -+ int pic_y, -+ int pic_width, -+ int pic_height, -+ int y_pitch, -+ int uv_pitch, -+ int rgb_pitch, -+ YUVType yuv_type) { -+ unsigned int y_shift = yuv_type == YV12 ? 1 : 0; -+ unsigned int x_shift = yuv_type == YV24 ? 0 : 1; -+ // Test for SSE because the optimized code uses movntq, which is not part of MMX. -+ bool has_sse = supports_mmx() && supports_sse(); -+ // There is no optimized YV24 SSE routine so we check for this and -+ // fall back to the C code. -+ has_sse &= yuv_type != YV24; -+ bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0; -+ int x_width = odd_pic_x ? pic_width - 1 : pic_width; -+ -+ for (int y = pic_y; y < pic_height + pic_y; ++y) { -+ uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch; -+ const uint8* y_ptr = y_buf + y * y_pitch + pic_x; -+ const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift); -+ const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift); -+ -+ if (odd_pic_x) { -+ // Handle the single odd pixel manually and use the -+ // fast routines for the remaining. -+ FastConvertYUVToRGB32Row_C(y_ptr++, -+ u_ptr++, -+ v_ptr++, -+ rgb_row, -+ 1, -+ x_shift); -+ rgb_row += 4; -+ } -+ -+ if (has_sse) { -+ FastConvertYUVToRGB32Row(y_ptr, -+ u_ptr, -+ v_ptr, -+ rgb_row, -+ x_width); -+ } -+ else { -+ FastConvertYUVToRGB32Row_C(y_ptr, -+ u_ptr, -+ v_ptr, -+ rgb_row, -+ x_width, -+ x_shift); -+ } -+ } - - // MMX used for FastConvertYUVToRGB32Row requires emms instruction. -- EMMS(); --} -- --#if USE_SSE2 --// FilterRows combines two rows of the image using linear interpolation. --// SSE2 version does 16 pixels at a time -- --static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, -- int source_width, int source_y_fraction) { -- __m128i zero = _mm_setzero_si128(); -- __m128i y1_fraction = _mm_set1_epi16(source_y_fraction); -- __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction); -- -- const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr); -- const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr); -- __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf); -- __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width); -- -- do { -- __m128i y0 = _mm_loadu_si128(y0_ptr128); -- __m128i y1 = _mm_loadu_si128(y1_ptr128); -- __m128i y2 = _mm_unpackhi_epi8(y0, zero); -- __m128i y3 = _mm_unpackhi_epi8(y1, zero); -- y0 = _mm_unpacklo_epi8(y0, zero); -- y1 = _mm_unpacklo_epi8(y1, zero); -- y0 = _mm_mullo_epi16(y0, y0_fraction); -- y1 = _mm_mullo_epi16(y1, y1_fraction); -- y2 = _mm_mullo_epi16(y2, y0_fraction); -- y3 = _mm_mullo_epi16(y3, y1_fraction); -- y0 = _mm_add_epi16(y0, y1); -- y2 = _mm_add_epi16(y2, y3); -- y0 = _mm_srli_epi16(y0, 8); -- y2 = _mm_srli_epi16(y2, 8); -- y0 = _mm_packus_epi16(y0, y2); -- *dest128++ = y0; -- ++y0_ptr128; -- ++y1_ptr128; -- } while (dest128 < end128); --} --#elif USE_MMX --// MMX version does 8 pixels at a time --static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, -- int source_width, int source_y_fraction) { -- __m64 zero = _mm_setzero_si64(); -- __m64 y1_fraction = _mm_set1_pi16(source_y_fraction); -- __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction); -- -- const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr); -- const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr); -- __m64* dest64 = reinterpret_cast<__m64*>(ybuf); -- __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width); -- -- do { -- __m64 y0 = *y0_ptr64++; -- __m64 y1 = *y1_ptr64++; -- __m64 y2 = _mm_unpackhi_pi8(y0, zero); -- __m64 y3 = _mm_unpackhi_pi8(y1, zero); -- y0 = _mm_unpacklo_pi8(y0, zero); -- y1 = _mm_unpacklo_pi8(y1, zero); -- y0 = _mm_mullo_pi16(y0, y0_fraction); -- y1 = _mm_mullo_pi16(y1, y1_fraction); -- y2 = _mm_mullo_pi16(y2, y0_fraction); -- y3 = _mm_mullo_pi16(y3, y1_fraction); -- y0 = _mm_add_pi16(y0, y1); -- y2 = _mm_add_pi16(y2, y3); -- y0 = _mm_srli_pi16(y0, 8); -- y2 = _mm_srli_pi16(y2, 8); -- y0 = _mm_packs_pu16(y0, y2); -- *dest64++ = y0; -- } while (dest64 < end64); --} --#else // no MMX or SSE2 -+ if (has_sse) -+ EMMS(); -+} -+ - // C version does 8 at a time to mimic MMX code --static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, -- int source_width, int source_y_fraction) { -+static void FilterRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, -+ int source_width, int source_y_fraction) { - int y1_fraction = source_y_fraction; - int y0_fraction = 256 - y1_fraction; - uint8* end = ybuf + source_width; - do { - ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8; - ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8; - ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8; - ybuf[3] = (y0_ptr[3] * y0_fraction + y1_ptr[3] * y1_fraction) >> 8; -@@ -152,46 +140,77 @@ static void FilterRows(uint8* ybuf, cons - ybuf[5] = (y0_ptr[5] * y0_fraction + y1_ptr[5] * y1_fraction) >> 8; - ybuf[6] = (y0_ptr[6] * y0_fraction + y1_ptr[6] * y1_fraction) >> 8; - ybuf[7] = (y0_ptr[7] * y0_fraction + y1_ptr[7] * y1_fraction) >> 8; - y0_ptr += 8; - y1_ptr += 8; - ybuf += 8; - } while (ybuf < end); - } --#endif -+ -+#ifdef MOZILLA_MAY_SUPPORT_MMX -+void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, -+ int source_width, int source_y_fraction); -+#endif -+ -+#ifdef MOZILLA_MAY_SUPPORT_SSE2 -+void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, -+ int source_width, int source_y_fraction); -+#endif -+ -+static inline void FilterRows(uint8* ybuf, const uint8* y0_ptr, -+ const uint8* y1_ptr, int source_width, -+ int source_y_fraction) { -+#ifdef MOZILLA_MAY_SUPPORT_SSE2 -+ if (mozilla::supports_sse2()) { -+ FilterRows_SSE2(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); -+ return; -+ } -+#endif -+ -+#ifdef MOZILLA_MAY_SUPPORT_MMX -+ if (mozilla::supports_mmx()) { -+ FilterRows_MMX(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); -+ return; -+ } -+#endif -+ -+ FilterRows_C(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); -+} - - - // Scale a frame of YUV to 32 bit ARGB. --void ScaleYUVToRGB32(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int source_width, -- int source_height, -- int width, -- int height, -- int y_pitch, -- int uv_pitch, -- int rgb_pitch, -- YUVType yuv_type, -- Rotate view_rotate, -- ScaleFilter filter) { -+void ScaleYCbCrToRGB32(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int source_width, -+ int source_height, -+ int width, -+ int height, -+ int y_pitch, -+ int uv_pitch, -+ int rgb_pitch, -+ YUVType yuv_type, -+ Rotate view_rotate, -+ ScaleFilter filter) { -+ bool has_mmx = supports_mmx(); -+ - // 4096 allows 3 buffers to fit in 12k. - // Helps performance on CPU with 16K L1 cache. - // Large enough for 3830x2160 and 30" displays which are 2560x1600. - const int kFilterBufferSize = 4096; - // Disable filtering if the screen is too big (to avoid buffer overflows). - // This should never happen to regular users: they don't have monitors - // wider than 4096 pixels. - // TODO(fbarchard): Allow rotated videos to filter. - if (source_width > kFilterBufferSize || view_rotate) - filter = FILTER_NONE; - -- unsigned int y_shift = yuv_type; -+ unsigned int y_shift = yuv_type == YV12 ? 1 : 0; - // Diagram showing origin and direction of source sampling. - // ->0 4<- - // 7 3 - // - // 6 5 - // ->1 2<- - // Rotations that start at right side of image. - if ((view_rotate == ROTATE_180) || -@@ -276,17 +295,17 @@ void ScaleYUVToRGB32(const uint8* y_buf, - int source_uv_fraction = - ((source_y_subpixel >> y_shift) & kFractionMask) >> 8; - - const uint8* y_ptr = y0_ptr; - const uint8* u_ptr = u0_ptr; - const uint8* v_ptr = v0_ptr; - // Apply vertical filtering if necessary. - // TODO(fbarchard): Remove memcpy when not necessary. -- if (filter & media::FILTER_BILINEAR_V) { -+ if (filter & mozilla::gfx::FILTER_BILINEAR_V) { - if (yscale_fixed != kFractionMax && - source_y_fraction && ((source_y + 1) < source_height)) { - FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); - } else { - memcpy(ybuf, y0_ptr, source_width); - } - y_ptr = ybuf; - ybuf[source_width] = ybuf[source_width-1]; -@@ -303,44 +322,50 @@ void ScaleYUVToRGB32(const uint8* y_buf, - u_ptr = ubuf; - v_ptr = vbuf; - ubuf[uv_source_width] = ubuf[uv_source_width - 1]; - vbuf[uv_source_width] = vbuf[uv_source_width - 1]; - } - if (source_dx == kFractionMax) { // Not scaled - FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, - dest_pixel, width); -- } else { -- if (filter & FILTER_BILINEAR_H) { -+ } else if (filter & FILTER_BILINEAR_H) { - LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, - dest_pixel, width, source_dx); - } else { - // Specialized scalers and rotation. --#if USE_MMX && defined(_MSC_VER) -+#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_MSC_VER) && defined(_M_IX86) -+ if(mozilla::supports_sse()) { - if (width == (source_width * 2)) { -- DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, -- dest_pixel, width); -+ DoubleYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr, -+ dest_pixel, width); - } else if ((source_dx & kFractionMask) == 0) { - // Scaling by integer scale factor. ie half. -- ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, -- dest_pixel, width, -- source_dx >> kFractionBits); -+ ConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr, -+ dest_pixel, width, -+ source_dx >> kFractionBits); - } else if (source_dx_uv == source_dx) { // Not rotated. - ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, - dest_pixel, width, source_dx); - } else { -- RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, -- dest_pixel, width, -- source_dx >> kFractionBits, -- source_dx_uv >> kFractionBits); -+ RotateConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr, -+ dest_pixel, width, -+ source_dx >> kFractionBits, -+ source_dx_uv >> kFractionBits); - } -+ } -+ else { -+ ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr, -+ dest_pixel, width, source_dx); -+ } - #else -- ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, -- dest_pixel, width, source_dx); --#endif -- } -+ ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, -+ dest_pixel, width, source_dx); -+#endif - } - } - // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms. -- EMMS(); --} -- --} // namespace media -+ if (has_mmx) -+ EMMS(); -+} -+ -+} // namespace gfx -+} // namespace mozilla -diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h ---- a/gfx/ycbcr/yuv_convert.h -+++ b/gfx/ycbcr/yuv_convert.h -@@ -1,72 +1,79 @@ - // Copyright (c) 2010 The Chromium Authors. All rights reserved. - // Use of this source code is governed by a BSD-style license that can be - // found in the LICENSE file. - - #ifndef MEDIA_BASE_YUV_CONVERT_H_ - #define MEDIA_BASE_YUV_CONVERT_H_ - --#include "base/basictypes.h" -- --namespace media { -- -+#include "chromium_types.h" -+#include "gfxCore.h" -+ -+namespace mozilla { -+ -+namespace gfx { -+ - // Type of YUV surface. - // The value of these enums matter as they are used to shift vertical indices. - enum YUVType { -- YV16 = 0, // YV16 is half width and full height chroma channels. -- YV12 = 1, // YV12 is half width and half height chroma channels. -+ YV12 = 0, // YV12 is half width and half height chroma channels. -+ YV16 = 1, // YV16 is half width and full height chroma channels. -+ YV24 = 2 // YV24 is full width and full height chroma channels. - }; - - // Mirror means flip the image horizontally, as in looking in a mirror. - // Rotate happens after mirroring. - enum Rotate { - ROTATE_0, // Rotation off. - ROTATE_90, // Rotate clockwise. - ROTATE_180, // Rotate upside down. - ROTATE_270, // Rotate counter clockwise. - MIRROR_ROTATE_0, // Mirror horizontally. - MIRROR_ROTATE_90, // Mirror then Rotate clockwise. - MIRROR_ROTATE_180, // Mirror vertically. -- MIRROR_ROTATE_270, // Transpose. -+ MIRROR_ROTATE_270 // Transpose. - }; - - // Filter affects how scaling looks. - enum ScaleFilter { - FILTER_NONE = 0, // No filter (point sampled). - FILTER_BILINEAR_H = 1, // Bilinear horizontal filter. - FILTER_BILINEAR_V = 2, // Bilinear vertical filter. -- FILTER_BILINEAR = 3, // Bilinear filter. -+ FILTER_BILINEAR = 3 // Bilinear filter. - }; - - // Convert a frame of YUV to 32 bit ARGB. - // Pass in YV16/YV12 depending on source format --void ConvertYUVToRGB32(const uint8* yplane, -- const uint8* uplane, -- const uint8* vplane, -- uint8* rgbframe, -- int width, -- int height, -- int ystride, -- int uvstride, -- int rgbstride, -- YUVType yuv_type); -+void ConvertYCbCrToRGB32(const uint8* yplane, -+ const uint8* uplane, -+ const uint8* vplane, -+ uint8* rgbframe, -+ int pic_x, -+ int pic_y, -+ int pic_width, -+ int pic_height, -+ int ystride, -+ int uvstride, -+ int rgbstride, -+ YUVType yuv_type); - - // Scale a frame of YUV to 32 bit ARGB. - // Supports rotation and mirroring. --void ScaleYUVToRGB32(const uint8* yplane, -- const uint8* uplane, -- const uint8* vplane, -- uint8* rgbframe, -- int source_width, -- int source_height, -- int width, -- int height, -- int ystride, -- int uvstride, -- int rgbstride, -- YUVType yuv_type, -- Rotate view_rotate, -- ScaleFilter filter); -- --} // namespace media -- -+void ScaleYCbCrToRGB32(const uint8* yplane, -+ const uint8* uplane, -+ const uint8* vplane, -+ uint8* rgbframe, -+ int source_width, -+ int source_height, -+ int width, -+ int height, -+ int ystride, -+ int uvstride, -+ int rgbstride, -+ YUVType yuv_type, -+ Rotate view_rotate, -+ ScaleFilter filter); -+ -+} // namespace gfx -+} // namespace mozilla -+ - #endif // MEDIA_BASE_YUV_CONVERT_H_ -diff --git a/gfx/ycbcr/yuv_convert_mmx.cpp b/gfx/ycbcr/yuv_convert_mmx.cpp -new file mode 100644 ---- /dev/null -+++ b/gfx/ycbcr/yuv_convert_mmx.cpp -@@ -0,0 +1,45 @@ -+// Copyright (c) 2010 The Chromium Authors. All rights reserved. -+// Use of this source code is governed by a BSD-style license that can be -+// found in the LICENSE file. -+ -+#include <mmintrin.h> -+#include "yuv_row.h" -+ -+namespace mozilla { -+namespace gfx { -+ -+// FilterRows combines two rows of the image using linear interpolation. -+// MMX version does 8 pixels at a time. -+void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, -+ int source_width, int source_y_fraction) { -+ __m64 zero = _mm_setzero_si64(); -+ __m64 y1_fraction = _mm_set1_pi16(source_y_fraction); -+ __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction); -+ -+ const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr); -+ const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr); -+ __m64* dest64 = reinterpret_cast<__m64*>(ybuf); -+ __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width); -+ -+ do { -+ __m64 y0 = *y0_ptr64++; -+ __m64 y1 = *y1_ptr64++; -+ __m64 y2 = _mm_unpackhi_pi8(y0, zero); -+ __m64 y3 = _mm_unpackhi_pi8(y1, zero); -+ y0 = _mm_unpacklo_pi8(y0, zero); -+ y1 = _mm_unpacklo_pi8(y1, zero); -+ y0 = _mm_mullo_pi16(y0, y0_fraction); -+ y1 = _mm_mullo_pi16(y1, y1_fraction); -+ y2 = _mm_mullo_pi16(y2, y0_fraction); -+ y3 = _mm_mullo_pi16(y3, y1_fraction); -+ y0 = _mm_add_pi16(y0, y1); -+ y2 = _mm_add_pi16(y2, y3); -+ y0 = _mm_srli_pi16(y0, 8); -+ y2 = _mm_srli_pi16(y2, 8); -+ y0 = _mm_packs_pu16(y0, y2); -+ *dest64++ = y0; -+ } while (dest64 < end64); -+} -+ -+} -+} -diff --git a/gfx/ycbcr/yuv_convert_sse2.cpp b/gfx/ycbcr/yuv_convert_sse2.cpp -new file mode 100644 ---- /dev/null -+++ b/gfx/ycbcr/yuv_convert_sse2.cpp -@@ -0,0 +1,47 @@ -+// Copyright (c) 2010 The Chromium Authors. All rights reserved. -+// Use of this source code is governed by a BSD-style license that can be -+// found in the LICENSE file. -+ -+#include <emmintrin.h> -+#include "yuv_row.h" -+ -+namespace mozilla { -+namespace gfx { -+ -+// FilterRows combines two rows of the image using linear interpolation. -+// SSE2 version does 16 pixels at a time. -+void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, -+ int source_width, int source_y_fraction) { -+ __m128i zero = _mm_setzero_si128(); -+ __m128i y1_fraction = _mm_set1_epi16(source_y_fraction); -+ __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction); -+ -+ const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr); -+ const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr); -+ __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf); -+ __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width); -+ -+ do { -+ __m128i y0 = _mm_loadu_si128(y0_ptr128); -+ __m128i y1 = _mm_loadu_si128(y1_ptr128); -+ __m128i y2 = _mm_unpackhi_epi8(y0, zero); -+ __m128i y3 = _mm_unpackhi_epi8(y1, zero); -+ y0 = _mm_unpacklo_epi8(y0, zero); -+ y1 = _mm_unpacklo_epi8(y1, zero); -+ y0 = _mm_mullo_epi16(y0, y0_fraction); -+ y1 = _mm_mullo_epi16(y1, y1_fraction); -+ y2 = _mm_mullo_epi16(y2, y0_fraction); -+ y3 = _mm_mullo_epi16(y3, y1_fraction); -+ y0 = _mm_add_epi16(y0, y1); -+ y2 = _mm_add_epi16(y2, y3); -+ y0 = _mm_srli_epi16(y0, 8); -+ y2 = _mm_srli_epi16(y2, 8); -+ y0 = _mm_packus_epi16(y0, y2); -+ *dest128++ = y0; -+ ++y0_ptr128; -+ ++y1_ptr128; -+ } while (dest128 < end128); -+} -+ -+} -+} -diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h ---- a/gfx/ycbcr/yuv_row.h -+++ b/gfx/ycbcr/yuv_row.h -@@ -5,109 +5,133 @@ - // yuv_row internal functions to handle YUV conversion and scaling to RGB. - // These functions are used from both yuv_convert.cc and yuv_scale.cc. - - // TODO(fbarchard): Write function that can handle rotation and scaling. - - #ifndef MEDIA_BASE_YUV_ROW_H_ - #define MEDIA_BASE_YUV_ROW_H_ - --#include "base/basictypes.h" -+#include "chromium_types.h" - - extern "C" { - // Can only do 1x. - // This is the second fastest of the scalers. - void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); - --// Can do 1x, half size or any scale down by an integer amount. --// Step can be negative (mirroring, rotate 180). --// This is the third fastest of the scalers. --void ConvertYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int step); -- --// Rotate is like Convert, but applies different step to Y versus U and V. --// This allows rotation by 90 or 270, by stepping by stride. --// This is the forth fastest of the scalers. --void RotateConvertYUVToRGB32Row(const uint8* y_buf, -+void FastConvertYUVToRGB32Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, -- int ystep, -- int uvstep); -+ unsigned int x_shift); -+ -+void FastConvertYUVToRGB32Row(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width); -+ -+// Can do 1x, half size or any scale down by an integer amount. -+// Step can be negative (mirroring, rotate 180). -+// This is the third fastest of the scalers. -+// Only defined on Windows x86-32. -+void ConvertYUVToRGB32Row_SSE(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int step); -+ -+// Rotate is like Convert, but applies different step to Y versus U and V. -+// This allows rotation by 90 or 270, by stepping by stride. -+// This is the forth fastest of the scalers. -+// Only defined on Windows x86-32. -+void RotateConvertYUVToRGB32Row_SSE(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int ystep, -+ int uvstep); - - // Doubler does 4 pixels at a time. Each pixel is replicated. - // This is the fastest of the scalers. --void DoubleYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width); -+// Only defined on Windows x86-32. -+void DoubleYUVToRGB32Row_SSE(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width); - - // Handles arbitrary scaling up or down. - // Mirroring is supported, but not 90 or 270 degree rotation. - // Chroma is under sampled every 2 pixels for performance. - void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - -+void ScaleYUVToRGB32Row(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx); -+ -+void ScaleYUVToRGB32Row_C(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx); -+ - // Handles arbitrary scaling up or down with bilinear filtering. - // Mirroring is supported, but not 90 or 270 degree rotation. - // Chroma is under sampled every 2 pixels for performance. - // This is the slowest of the scalers. - void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - -+void LinearScaleYUVToRGB32Row(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx); -+ -+void LinearScaleYUVToRGB32Row_C(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx); -+ -+ - #if defined(_MSC_VER) - #define SIMD_ALIGNED(var) __declspec(align(16)) var - #else - #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) - #endif - extern SIMD_ALIGNED(int16 kCoefficientsRgbY[768][4]); - --// Method to force C version. --//#define USE_MMX 0 --//#define USE_SSE2 0 -- --#if !defined(USE_MMX) --// Windows, Mac and Linux/BSD use MMX --#if defined(__MMX__) || defined(_MSC_VER) --#define USE_MMX 1 --#else --#define USE_MMX 0 --#endif --#endif -- --#if !defined(USE_SSE2) --#if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || _M_IX86_FP==2 --#define USE_SSE2 1 --#else --#define USE_SSE2 0 --#endif --#endif -- - // x64 uses MMX2 (SSE) so emms is not required. - // Warning C4799: function has no EMMS instruction. - // EMMS() is slow and should be called by the calling function once per image. --#if USE_MMX && !defined(ARCH_CPU_X86_64) -+#if defined(ARCH_CPU_X86) && !defined(ARCH_CPU_X86_64) - #if defined(_MSC_VER) - #define EMMS() __asm emms - #pragma warning(disable: 4799) - #else - #define EMMS() asm("emms") - #endif - #else - #define EMMS() -diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp ---- a/gfx/ycbcr/yuv_row_c.cpp -+++ b/gfx/ycbcr/yuv_row_c.cpp -@@ -1,812 +1,18 @@ - // Copyright (c) 2010 The Chromium Authors. All rights reserved. - // Use of this source code is governed by a BSD-style license that can be - // found in the LICENSE file. - --#include "media/base/yuv_row.h" -- --#ifdef _DEBUG --#include "base/logging.h" --#else -+#include "yuv_row.h" -+ - #define DCHECK(a) --#endif - - extern "C" { - --#if USE_SSE2 && defined(ARCH_CPU_X86_64) -- --// AMD64 ABI uses register paremters. --void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi -- const uint8* u_buf, // rsi -- const uint8* v_buf, // rdx -- uint8* rgb_buf, // rcx -- int width) { // r8 -- asm( -- "jmp convertend\n" --"convertloop:" -- "movzb (%1),%%r10\n" -- "add $0x1,%1\n" -- "movzb (%2),%%r11\n" -- "add $0x1,%2\n" -- "movq 2048(%5,%%r10,8),%%xmm0\n" -- "movzb (%0),%%r10\n" -- "movq 4096(%5,%%r11,8),%%xmm1\n" -- "movzb 0x1(%0),%%r11\n" -- "paddsw %%xmm1,%%xmm0\n" -- "movq (%5,%%r10,8),%%xmm2\n" -- "add $0x2,%0\n" -- "movq (%5,%%r11,8),%%xmm3\n" -- "paddsw %%xmm0,%%xmm2\n" -- "paddsw %%xmm0,%%xmm3\n" -- "shufps $0x44,%%xmm3,%%xmm2\n" -- "psraw $0x6,%%xmm2\n" -- "packuswb %%xmm2,%%xmm2\n" -- "movq %%xmm2,0x0(%3)\n" -- "add $0x8,%3\n" --"convertend:" -- "sub $0x2,%4\n" -- "jns convertloop\n" -- --"convertnext:" -- "add $0x1,%4\n" -- "js convertdone\n" -- -- "movzb (%1),%%r10\n" -- "movq 2048(%5,%%r10,8),%%xmm0\n" -- "movzb (%2),%%r10\n" -- "movq 4096(%5,%%r10,8),%%xmm1\n" -- "paddsw %%xmm1,%%xmm0\n" -- "movzb (%0),%%r10\n" -- "movq (%5,%%r10,8),%%xmm1\n" -- "paddsw %%xmm0,%%xmm1\n" -- "psraw $0x6,%%xmm1\n" -- "packuswb %%xmm1,%%xmm1\n" -- "movd %%xmm1,0x0(%3)\n" --"convertdone:" -- : -- : "r"(y_buf), // %0 -- "r"(u_buf), // %1 -- "r"(v_buf), // %2 -- "r"(rgb_buf), // %3 -- "r"(width), // %4 -- "r" (kCoefficientsRgbY) // %5 -- : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3" --); --} -- --void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi -- const uint8* u_buf, // rsi -- const uint8* v_buf, // rdx -- uint8* rgb_buf, // rcx -- int width, // r8 -- int source_dx) { // r9 -- asm( -- "xor %%r11,%%r11\n" -- "sub $0x2,%4\n" -- "js scalenext\n" -- --"scaleloop:" -- "mov %%r11,%%r10\n" -- "sar $0x11,%%r10\n" -- "movzb (%1,%%r10,1),%%rax\n" -- "movq 2048(%5,%%rax,8),%%xmm0\n" -- "movzb (%2,%%r10,1),%%rax\n" -- "movq 4096(%5,%%rax,8),%%xmm1\n" -- "lea (%%r11,%6),%%r10\n" -- "sar $0x10,%%r11\n" -- "movzb (%0,%%r11,1),%%rax\n" -- "paddsw %%xmm1,%%xmm0\n" -- "movq (%5,%%rax,8),%%xmm1\n" -- "lea (%%r10,%6),%%r11\n" -- "sar $0x10,%%r10\n" -- "movzb (%0,%%r10,1),%%rax\n" -- "movq (%5,%%rax,8),%%xmm2\n" -- "paddsw %%xmm0,%%xmm1\n" -- "paddsw %%xmm0,%%xmm2\n" -- "shufps $0x44,%%xmm2,%%xmm1\n" -- "psraw $0x6,%%xmm1\n" -- "packuswb %%xmm1,%%xmm1\n" -- "movq %%xmm1,0x0(%3)\n" -- "add $0x8,%3\n" -- "sub $0x2,%4\n" -- "jns scaleloop\n" -- --"scalenext:" -- "add $0x1,%4\n" -- "js scaledone\n" -- -- "mov %%r11,%%r10\n" -- "sar $0x11,%%r10\n" -- "movzb (%1,%%r10,1),%%rax\n" -- "movq 2048(%5,%%rax,8),%%xmm0\n" -- "movzb (%2,%%r10,1),%%rax\n" -- "movq 4096(%5,%%rax,8),%%xmm1\n" -- "paddsw %%xmm1,%%xmm0\n" -- "sar $0x10,%%r11\n" -- "movzb (%0,%%r11,1),%%rax\n" -- "movq (%5,%%rax,8),%%xmm1\n" -- "paddsw %%xmm0,%%xmm1\n" -- "psraw $0x6,%%xmm1\n" -- "packuswb %%xmm1,%%xmm1\n" -- "movd %%xmm1,0x0(%3)\n" -- --"scaledone:" -- : -- : "r"(y_buf), // %0 -- "r"(u_buf), // %1 -- "r"(v_buf), // %2 -- "r"(rgb_buf), // %3 -- "r"(width), // %4 -- "r" (kCoefficientsRgbY), // %5 -- "r"(static_cast<long>(source_dx)) // %6 -- : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2" --); --} -- --void LinearScaleYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int source_dx) { -- asm( -- "xor %%r11,%%r11\n" // x = 0 -- "sub $0x2,%4\n" -- "js .lscalenext\n" -- "cmp $0x20000,%6\n" // if source_dx >= 2.0 -- "jl .lscalehalf\n" -- "mov $0x8000,%%r11\n" // x = 0.5 for 1/2 or less --".lscalehalf:" -- --".lscaleloop:" -- "mov %%r11,%%r10\n" -- "sar $0x11,%%r10\n" -- -- "movzb (%1, %%r10, 1), %%r13 \n" -- "movzb 1(%1, %%r10, 1), %%r14 \n" -- "mov %%r11, %%rax \n" -- "and $0x1fffe, %%rax \n" -- "imul %%rax, %%r14 \n" -- "xor $0x1fffe, %%rax \n" -- "imul %%rax, %%r13 \n" -- "add %%r14, %%r13 \n" -- "shr $17, %%r13 \n" -- "movq 2048(%5,%%r13,8), %%xmm0\n" -- -- "movzb (%2, %%r10, 1), %%r13 \n" -- "movzb 1(%2, %%r10, 1), %%r14 \n" -- "mov %%r11, %%rax \n" -- "and $0x1fffe, %%rax \n" -- "imul %%rax, %%r14 \n" -- "xor $0x1fffe, %%rax \n" -- "imul %%rax, %%r13 \n" -- "add %%r14, %%r13 \n" -- "shr $17, %%r13 \n" -- "movq 4096(%5,%%r13,8), %%xmm1\n" -- -- "mov %%r11, %%rax \n" -- "lea (%%r11,%6),%%r10\n" -- "sar $0x10,%%r11\n" -- "paddsw %%xmm1,%%xmm0\n" -- -- "movzb (%0, %%r11, 1), %%r13 \n" -- "movzb 1(%0, %%r11, 1), %%r14 \n" -- "and $0xffff, %%rax \n" -- "imul %%rax, %%r14 \n" -- "xor $0xffff, %%rax \n" -- "imul %%rax, %%r13 \n" -- "add %%r14, %%r13 \n" -- "shr $16, %%r13 \n" -- "movq (%5,%%r13,8),%%xmm1\n" -- -- "mov %%r10, %%rax \n" -- "lea (%%r10,%6),%%r11\n" -- "sar $0x10,%%r10\n" -- -- "movzb (%0,%%r10,1), %%r13 \n" -- "movzb 1(%0,%%r10,1), %%r14 \n" -- "and $0xffff, %%rax \n" -- "imul %%rax, %%r14 \n" -- "xor $0xffff, %%rax \n" -- "imul %%rax, %%r13 \n" -- "add %%r14, %%r13 \n" -- "shr $16, %%r13 \n" -- "movq (%5,%%r13,8),%%xmm2\n" -- -- "paddsw %%xmm0,%%xmm1\n" -- "paddsw %%xmm0,%%xmm2\n" -- "shufps $0x44,%%xmm2,%%xmm1\n" -- "psraw $0x6,%%xmm1\n" -- "packuswb %%xmm1,%%xmm1\n" -- "movq %%xmm1,0x0(%3)\n" -- "add $0x8,%3\n" -- "sub $0x2,%4\n" -- "jns .lscaleloop\n" -- --".lscalenext:" -- "add $0x1,%4\n" -- "js .lscaledone\n" -- -- "mov %%r11,%%r10\n" -- "sar $0x11,%%r10\n" -- -- "movzb (%1,%%r10,1), %%r13 \n" -- "movq 2048(%5,%%r13,8),%%xmm0\n" -- -- "movzb (%2,%%r10,1), %%r13 \n" -- "movq 4096(%5,%%r13,8),%%xmm1\n" -- -- "paddsw %%xmm1,%%xmm0\n" -- "sar $0x10,%%r11\n" -- -- "movzb (%0,%%r11,1), %%r13 \n" -- "movq (%5,%%r13,8),%%xmm1\n" -- -- "paddsw %%xmm0,%%xmm1\n" -- "psraw $0x6,%%xmm1\n" -- "packuswb %%xmm1,%%xmm1\n" -- "movd %%xmm1,0x0(%3)\n" -- --".lscaledone:" -- : -- : "r"(y_buf), // %0 -- "r"(u_buf), // %1 -- "r"(v_buf), // %2 -- "r"(rgb_buf), // %3 -- "r"(width), // %4 -- "r" (kCoefficientsRgbY), // %5 -- "r"(static_cast<long>(source_dx)) // %6 -- : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2" --); --} -- --#elif USE_MMX && !defined(ARCH_CPU_X86_64) && !defined(__PIC__) -- --// PIC version is slower because less registers are available, so --// non-PIC is used on platforms where it is possible. -- --void FastConvertYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width); -- asm( -- ".text\n" -- ".global FastConvertYUVToRGB32Row\n" --"FastConvertYUVToRGB32Row:\n" -- "pusha\n" -- "mov 0x24(%esp),%edx\n" -- "mov 0x28(%esp),%edi\n" -- "mov 0x2c(%esp),%esi\n" -- "mov 0x30(%esp),%ebp\n" -- "mov 0x34(%esp),%ecx\n" -- "jmp convertend\n" -- --"convertloop:" -- "movzbl (%edi),%eax\n" -- "add $0x1,%edi\n" -- "movzbl (%esi),%ebx\n" -- "add $0x1,%esi\n" -- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" -- "movzbl (%edx),%eax\n" -- "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n" -- "movzbl 0x1(%edx),%ebx\n" -- "movq kCoefficientsRgbY(,%eax,8),%mm1\n" -- "add $0x2,%edx\n" -- "movq kCoefficientsRgbY(,%ebx,8),%mm2\n" -- "paddsw %mm0,%mm1\n" -- "paddsw %mm0,%mm2\n" -- "psraw $0x6,%mm1\n" -- "psraw $0x6,%mm2\n" -- "packuswb %mm2,%mm1\n" -- "movntq %mm1,0x0(%ebp)\n" -- "add $0x8,%ebp\n" --"convertend:" -- "sub $0x2,%ecx\n" -- "jns convertloop\n" -- -- "and $0x1,%ecx\n" -- "je convertdone\n" -- -- "movzbl (%edi),%eax\n" -- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" -- "movzbl (%esi),%eax\n" -- "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n" -- "movzbl (%edx),%eax\n" -- "movq kCoefficientsRgbY(,%eax,8),%mm1\n" -- "paddsw %mm0,%mm1\n" -- "psraw $0x6,%mm1\n" -- "packuswb %mm1,%mm1\n" -- "movd %mm1,0x0(%ebp)\n" --"convertdone:" -- "popa\n" -- "ret\n" --); -- -- --void ScaleYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int source_dx); -- asm( -- ".text\n" -- ".global ScaleYUVToRGB32Row\n" --"ScaleYUVToRGB32Row:\n" -- "pusha\n" -- "mov 0x24(%esp),%edx\n" -- "mov 0x28(%esp),%edi\n" -- "mov 0x2c(%esp),%esi\n" -- "mov 0x30(%esp),%ebp\n" -- "mov 0x34(%esp),%ecx\n" -- "xor %ebx,%ebx\n" -- "jmp scaleend\n" -- --"scaleloop:" -- "mov %ebx,%eax\n" -- "sar $0x11,%eax\n" -- "movzbl (%edi,%eax,1),%eax\n" -- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" -- "mov %ebx,%eax\n" -- "sar $0x11,%eax\n" -- "movzbl (%esi,%eax,1),%eax\n" -- "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n" -- "mov %ebx,%eax\n" -- "add 0x38(%esp),%ebx\n" -- "sar $0x10,%eax\n" -- "movzbl (%edx,%eax,1),%eax\n" -- "movq kCoefficientsRgbY(,%eax,8),%mm1\n" -- "mov %ebx,%eax\n" -- "add 0x38(%esp),%ebx\n" -- "sar $0x10,%eax\n" -- "movzbl (%edx,%eax,1),%eax\n" -- "movq kCoefficientsRgbY(,%eax,8),%mm2\n" -- "paddsw %mm0,%mm1\n" -- "paddsw %mm0,%mm2\n" -- "psraw $0x6,%mm1\n" -- "psraw $0x6,%mm2\n" -- "packuswb %mm2,%mm1\n" -- "movntq %mm1,0x0(%ebp)\n" -- "add $0x8,%ebp\n" --"scaleend:" -- "sub $0x2,%ecx\n" -- "jns scaleloop\n" -- -- "and $0x1,%ecx\n" -- "je scaledone\n" -- -- "mov %ebx,%eax\n" -- "sar $0x11,%eax\n" -- "movzbl (%edi,%eax,1),%eax\n" -- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" -- "mov %ebx,%eax\n" -- "sar $0x11,%eax\n" -- "movzbl (%esi,%eax,1),%eax\n" -- "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n" -- "mov %ebx,%eax\n" -- "sar $0x10,%eax\n" -- "movzbl (%edx,%eax,1),%eax\n" -- "movq kCoefficientsRgbY(,%eax,8),%mm1\n" -- "paddsw %mm0,%mm1\n" -- "psraw $0x6,%mm1\n" -- "packuswb %mm1,%mm1\n" -- "movd %mm1,0x0(%ebp)\n" -- --"scaledone:" -- "popa\n" -- "ret\n" --); -- --void LinearScaleYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int source_dx); -- asm( -- ".text\n" -- ".global LinearScaleYUVToRGB32Row\n" --"LinearScaleYUVToRGB32Row:\n" -- "pusha\n" -- "mov 0x24(%esp),%edx\n" -- "mov 0x28(%esp),%edi\n" -- "mov 0x30(%esp),%ebp\n" -- -- // source_width = width * source_dx + ebx -- "mov 0x34(%esp), %ecx\n" -- "imull 0x38(%esp), %ecx\n" -- "mov %ecx, 0x34(%esp)\n" -- -- "mov 0x38(%esp), %ecx\n" -- "xor %ebx,%ebx\n" // x = 0 -- "cmp $0x20000,%ecx\n" // if source_dx >= 2.0 -- "jl .lscaleend\n" -- "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less -- "jmp .lscaleend\n" -- --".lscaleloop:" -- "mov %ebx,%eax\n" -- "sar $0x11,%eax\n" -- -- "movzbl (%edi,%eax,1),%ecx\n" -- "movzbl 1(%edi,%eax,1),%esi\n" -- "mov %ebx,%eax\n" -- "andl $0x1fffe, %eax \n" -- "imul %eax, %esi \n" -- "xorl $0x1fffe, %eax \n" -- "imul %eax, %ecx \n" -- "addl %esi, %ecx \n" -- "shrl $17, %ecx \n" -- "movq kCoefficientsRgbY+2048(,%ecx,8),%mm0\n" -- -- "mov 0x2c(%esp),%esi\n" -- "mov %ebx,%eax\n" -- "sar $0x11,%eax\n" -- -- "movzbl (%esi,%eax,1),%ecx\n" -- "movzbl 1(%esi,%eax,1),%esi\n" -- "mov %ebx,%eax\n" -- "andl $0x1fffe, %eax \n" -- "imul %eax, %esi \n" -- "xorl $0x1fffe, %eax \n" -- "imul %eax, %ecx \n" -- "addl %esi, %ecx \n" -- "shrl $17, %ecx \n" -- "paddsw kCoefficientsRgbY+4096(,%ecx,8),%mm0\n" -- -- "mov %ebx,%eax\n" -- "sar $0x10,%eax\n" -- "movzbl (%edx,%eax,1),%ecx\n" -- "movzbl 1(%edx,%eax,1),%esi\n" -- "mov %ebx,%eax\n" -- "add 0x38(%esp),%ebx\n" -- "andl $0xffff, %eax \n" -- "imul %eax, %esi \n" -- "xorl $0xffff, %eax \n" -- "imul %eax, %ecx \n" -- "addl %esi, %ecx \n" -- "shrl $16, %ecx \n" -- "movq kCoefficientsRgbY(,%ecx,8),%mm1\n" -- -- "cmp 0x34(%esp), %ebx\n" -- "jge .lscalelastpixel\n" -- -- "mov %ebx,%eax\n" -- "sar $0x10,%eax\n" -- "movzbl (%edx,%eax,1),%ecx\n" -- "movzbl 1(%edx,%eax,1),%esi\n" -- "mov %ebx,%eax\n" -- "add 0x38(%esp),%ebx\n" -- "andl $0xffff, %eax \n" -- "imul %eax, %esi \n" -- "xorl $0xffff, %eax \n" -- "imul %eax, %ecx \n" -- "addl %esi, %ecx \n" -- "shrl $16, %ecx \n" -- "movq kCoefficientsRgbY(,%ecx,8),%mm2\n" -- -- "paddsw %mm0,%mm1\n" -- "paddsw %mm0,%mm2\n" -- "psraw $0x6,%mm1\n" -- "psraw $0x6,%mm2\n" -- "packuswb %mm2,%mm1\n" -- "movntq %mm1,0x0(%ebp)\n" -- "add $0x8,%ebp\n" -- --".lscaleend:" -- "cmp 0x34(%esp), %ebx\n" -- "jl .lscaleloop\n" -- "popa\n" -- "ret\n" -- --".lscalelastpixel:" -- "paddsw %mm0, %mm1\n" -- "psraw $6, %mm1\n" -- "packuswb %mm1, %mm1\n" -- "movd %mm1, (%ebp)\n" -- "popa\n" -- "ret\n" --); -- --#elif USE_MMX && !defined(ARCH_CPU_X86_64) && defined(__PIC__) -- --extern void PICConvertYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int16 *kCoefficientsRgbY); -- asm( -- ".text\n" --#if defined(OS_MACOSX) --"_PICConvertYUVToRGB32Row:\n" --#else --"PICConvertYUVToRGB32Row:\n" --#endif -- "pusha\n" -- "mov 0x24(%esp),%edx\n" -- "mov 0x28(%esp),%edi\n" -- "mov 0x2c(%esp),%esi\n" -- "mov 0x30(%esp),%ebp\n" -- "mov 0x38(%esp),%ecx\n" -- -- "jmp .Lconvertend\n" -- --".Lconvertloop:" -- "movzbl (%edi),%eax\n" -- "add $0x1,%edi\n" -- "movzbl (%esi),%ebx\n" -- "add $0x1,%esi\n" -- "movq 2048(%ecx,%eax,8),%mm0\n" -- "movzbl (%edx),%eax\n" -- "paddsw 4096(%ecx,%ebx,8),%mm0\n" -- "movzbl 0x1(%edx),%ebx\n" -- "movq 0(%ecx,%eax,8),%mm1\n" -- "add $0x2,%edx\n" -- "movq 0(%ecx,%ebx,8),%mm2\n" -- "paddsw %mm0,%mm1\n" -- "paddsw %mm0,%mm2\n" -- "psraw $0x6,%mm1\n" -- "psraw $0x6,%mm2\n" -- "packuswb %mm2,%mm1\n" -- "movntq %mm1,0x0(%ebp)\n" -- "add $0x8,%ebp\n" --".Lconvertend:" -- "subl $0x2,0x34(%esp)\n" -- "jns .Lconvertloop\n" -- -- "andl $0x1,0x34(%esp)\n" -- "je .Lconvertdone\n" -- -- "movzbl (%edi),%eax\n" -- "movq 2048(%ecx,%eax,8),%mm0\n" -- "movzbl (%esi),%eax\n" -- "paddsw 4096(%ecx,%eax,8),%mm0\n" -- "movzbl (%edx),%eax\n" -- "movq 0(%ecx,%eax,8),%mm1\n" -- "paddsw %mm0,%mm1\n" -- "psraw $0x6,%mm1\n" -- "packuswb %mm1,%mm1\n" -- "movd %mm1,0x0(%ebp)\n" --".Lconvertdone:\n" -- "popa\n" -- "ret\n" --); -- --void FastConvertYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width) { -- PICConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, -- &kCoefficientsRgbY[0][0]); --} -- --extern void PICScaleYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int source_dx, -- int16 *kCoefficientsRgbY); -- -- asm( -- ".text\n" --#if defined(OS_MACOSX) --"_PICScaleYUVToRGB32Row:\n" --#else --"PICScaleYUVToRGB32Row:\n" --#endif -- "pusha\n" -- "mov 0x24(%esp),%edx\n" -- "mov 0x28(%esp),%edi\n" -- "mov 0x2c(%esp),%esi\n" -- "mov 0x30(%esp),%ebp\n" -- "mov 0x3c(%esp),%ecx\n" -- "xor %ebx,%ebx\n" -- "jmp Lscaleend\n" -- --"Lscaleloop:" -- "mov %ebx,%eax\n" -- "sar $0x11,%eax\n" -- "movzbl (%edi,%eax,1),%eax\n" -- "movq 2048(%ecx,%eax,8),%mm0\n" -- "mov %ebx,%eax\n" -- "sar $0x11,%eax\n" -- "movzbl (%esi,%eax,1),%eax\n" -- "paddsw 4096(%ecx,%eax,8),%mm0\n" -- "mov %ebx,%eax\n" -- "add 0x38(%esp),%ebx\n" -- "sar $0x10,%eax\n" -- "movzbl (%edx,%eax,1),%eax\n" -- "movq 0(%ecx,%eax,8),%mm1\n" -- "mov %ebx,%eax\n" -- "add 0x38(%esp),%ebx\n" -- "sar $0x10,%eax\n" -- "movzbl (%edx,%eax,1),%eax\n" -- "movq 0(%ecx,%eax,8),%mm2\n" -- "paddsw %mm0,%mm1\n" -- "paddsw %mm0,%mm2\n" -- "psraw $0x6,%mm1\n" -- "psraw $0x6,%mm2\n" -- "packuswb %mm2,%mm1\n" -- "movntq %mm1,0x0(%ebp)\n" -- "add $0x8,%ebp\n" --"Lscaleend:" -- "subl $0x2,0x34(%esp)\n" -- "jns Lscaleloop\n" -- -- "andl $0x1,0x34(%esp)\n" -- "je Lscaledone\n" -- -- "mov %ebx,%eax\n" -- "sar $0x11,%eax\n" -- "movzbl (%edi,%eax,1),%eax\n" -- "movq 2048(%ecx,%eax,8),%mm0\n" -- "mov %ebx,%eax\n" -- "sar $0x11,%eax\n" -- "movzbl (%esi,%eax,1),%eax\n" -- "paddsw 4096(%ecx,%eax,8),%mm0\n" -- "mov %ebx,%eax\n" -- "sar $0x10,%eax\n" -- "movzbl (%edx,%eax,1),%eax\n" -- "movq 0(%ecx,%eax,8),%mm1\n" -- "paddsw %mm0,%mm1\n" -- "psraw $0x6,%mm1\n" -- "packuswb %mm1,%mm1\n" -- "movd %mm1,0x0(%ebp)\n" -- --"Lscaledone:" -- "popa\n" -- "ret\n" --); -- -- --void ScaleYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int source_dx) { -- PICScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx, -- &kCoefficientsRgbY[0][0]); --} -- --void PICLinearScaleYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int source_dx, -- int16 *kCoefficientsRgbY); -- asm( -- ".text\n" --#if defined(OS_MACOSX) --"_PICLinearScaleYUVToRGB32Row:\n" --#else --"PICLinearScaleYUVToRGB32Row:\n" --#endif -- "pusha\n" -- "mov 0x24(%esp),%edx\n" -- "mov 0x30(%esp),%ebp\n" -- "mov 0x34(%esp),%ecx\n" -- "mov 0x3c(%esp),%edi\n" -- "xor %ebx,%ebx\n" -- -- // source_width = width * source_dx + ebx -- "mov 0x34(%esp), %ecx\n" -- "imull 0x38(%esp), %ecx\n" -- "mov %ecx, 0x34(%esp)\n" -- -- "mov 0x38(%esp), %ecx\n" -- "xor %ebx,%ebx\n" // x = 0 -- "cmp $0x20000,%ecx\n" // if source_dx >= 2.0 -- "jl .lscaleend\n" -- "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less -- "jmp .lscaleend\n" -- --".lscaleloop:" -- "mov 0x28(%esp),%esi\n" -- "mov %ebx,%eax\n" -- "sar $0x11,%eax\n" -- -- "movzbl (%esi,%eax,1),%ecx\n" -- "movzbl 1(%esi,%eax,1),%esi\n" -- "mov %ebx,%eax\n" -- "andl $0x1fffe, %eax \n" -- "imul %eax, %esi \n" -- "xorl $0x1fffe, %eax \n" -- "imul %eax, %ecx \n" -- "addl %esi, %ecx \n" -- "shrl $17, %ecx \n" -- "movq 2048(%edi,%ecx,8),%mm0\n" -- -- "mov 0x2c(%esp),%esi\n" -- "mov %ebx,%eax\n" -- "sar $0x11,%eax\n" -- -- "movzbl (%esi,%eax,1),%ecx\n" -- "movzbl 1(%esi,%eax,1),%esi\n" -- "mov %ebx,%eax\n" -- "andl $0x1fffe, %eax \n" -- "imul %eax, %esi \n" -- "xorl $0x1fffe, %eax \n" -- "imul %eax, %ecx \n" -- "addl %esi, %ecx \n" -- "shrl $17, %ecx \n" -- "paddsw 4096(%edi,%ecx,8),%mm0\n" -- -- "mov %ebx,%eax\n" -- "sar $0x10,%eax\n" -- "movzbl (%edx,%eax,1),%ecx\n" -- "movzbl 1(%edx,%eax,1),%esi\n" -- "mov %ebx,%eax\n" -- "add 0x38(%esp),%ebx\n" -- "andl $0xffff, %eax \n" -- "imul %eax, %esi \n" -- "xorl $0xffff, %eax \n" -- "imul %eax, %ecx \n" -- "addl %esi, %ecx \n" -- "shrl $16, %ecx \n" -- "movq (%edi,%ecx,8),%mm1\n" -- -- "cmp 0x34(%esp), %ebx\n" -- "jge .lscalelastpixel\n" -- -- "mov %ebx,%eax\n" -- "sar $0x10,%eax\n" -- "movzbl (%edx,%eax,1),%ecx\n" -- "movzbl 1(%edx,%eax,1),%esi\n" -- "mov %ebx,%eax\n" -- "add 0x38(%esp),%ebx\n" -- "andl $0xffff, %eax \n" -- "imul %eax, %esi \n" -- "xorl $0xffff, %eax \n" -- "imul %eax, %ecx \n" -- "addl %esi, %ecx \n" -- "shrl $16, %ecx \n" -- "movq (%edi,%ecx,8),%mm2\n" -- -- "paddsw %mm0,%mm1\n" -- "paddsw %mm0,%mm2\n" -- "psraw $0x6,%mm1\n" -- "psraw $0x6,%mm2\n" -- "packuswb %mm2,%mm1\n" -- "movntq %mm1,0x0(%ebp)\n" -- "add $0x8,%ebp\n" -- --".lscaleend:" -- "cmp %ebx, 0x34(%esp)\n" -- "jg .lscaleloop\n" -- "popa\n" -- "ret\n" -- --".lscalelastpixel:" -- "paddsw %mm0, %mm1\n" -- "psraw $6, %mm1\n" -- "packuswb %mm1, %mm1\n" -- "movd %mm1, (%ebp)\n" -- "popa\n" -- "ret\n" --); -- --void LinearScaleYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int source_dx) { -- PICLinearScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx, -- &kCoefficientsRgbY[0][0]); --} -- --#else // USE_MMX -- - // C reference code that mimic the YUV assembly. - #define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x))) - #define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \ - (((x) + (y)) > 32767 ? 32767 : ((x) + (y)))) - - static inline void YuvPixel(uint8 y, - uint8 u, - uint8 v, -@@ -833,66 +39,71 @@ static inline void YuvPixel(uint8 y, - a >>= 6; - - *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) | - (packuswb(g) << 8) | - (packuswb(r) << 16) | - (packuswb(a) << 24); - } - --void FastConvertYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width) { -+void FastConvertYUVToRGB32Row_C(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ unsigned int x_shift) { - for (int x = 0; x < width; x += 2) { -- uint8 u = u_buf[x >> 1]; -- uint8 v = v_buf[x >> 1]; -+ uint8 u = u_buf[x >> x_shift]; -+ uint8 v = v_buf[x >> x_shift]; - uint8 y0 = y_buf[x]; - YuvPixel(y0, u, v, rgb_buf); - if ((x + 1) < width) { - uint8 y1 = y_buf[x + 1]; -+ if (x_shift == 0) { -+ u = u_buf[x + 1]; -+ v = v_buf[x + 1]; -+ } - YuvPixel(y1, u, v, rgb_buf + 4); - } - rgb_buf += 8; // Advance 2 pixels. - } - } - - // 16.16 fixed point is used. A shift by 16 isolates the integer. - // A shift by 17 is used to further subsample the chrominence channels. - // & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits, - // for 1/65536 pixel accurate interpolation. --void ScaleYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int source_dx) { -+void ScaleYUVToRGB32Row_C(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx) { - int x = 0; - for (int i = 0; i < width; i += 2) { - int y = y_buf[x >> 16]; - int u = u_buf[(x >> 17)]; - int v = v_buf[(x >> 17)]; - YuvPixel(y, u, v, rgb_buf); - x += source_dx; - if ((i + 1) < width) { - y = y_buf[x >> 16]; - YuvPixel(y, u, v, rgb_buf+4); - x += source_dx; - } - rgb_buf += 8; - } - } - --void LinearScaleYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int source_dx) { -+void LinearScaleYUVToRGB32Row_C(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx) { - int x = 0; - if (source_dx >= 0x20000) { - x = 32768; - } - for (int i = 0; i < width; i += 2) { - int y0 = y_buf[x >> 16]; - int y1 = y_buf[(x >> 16) + 1]; - int u0 = u_buf[(x >> 17)]; -@@ -913,11 +124,10 @@ void LinearScaleYUVToRGB32Row(const uint - y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; - YuvPixel(y, u, v, rgb_buf+4); - x += source_dx; - } - rgb_buf += 8; - } - } - --#endif // USE_MMX - } // extern "C" - -diff --git a/gfx/ycbcr/yuv_row_posix.cpp b/gfx/ycbcr/yuv_row_posix.cpp ---- a/gfx/ycbcr/yuv_row_posix.cpp -+++ b/gfx/ycbcr/yuv_row_posix.cpp -@@ -1,33 +1,32 @@ - // Copyright (c) 2010 The Chromium Authors. All rights reserved. - // Use of this source code is governed by a BSD-style license that can be - // found in the LICENSE file. - --#include "media/base/yuv_row.h" -- --#ifdef _DEBUG --#include "base/logging.h" --#else -+#include "yuv_row.h" -+#include "mozilla/SSE.h" -+ - #define DCHECK(a) --#endif - - extern "C" { - --#if USE_SSE2 && defined(ARCH_CPU_X86_64) -+#if defined(ARCH_CPU_X86_64) -+ -+// We don't need CPUID guards here, since x86-64 implies SSE2. - - // AMD64 ABI uses register paremters. - void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi - const uint8* u_buf, // rsi - const uint8* v_buf, // rdx - uint8* rgb_buf, // rcx - int width) { // r8 - asm( -- "jmp convertend\n" --"convertloop:" -+ "jmp 1f\n" -+"0:" - "movzb (%1),%%r10\n" - "add $0x1,%1\n" - "movzb (%2),%%r11\n" - "add $0x1,%2\n" - "movq 2048(%5,%%r10,8),%%xmm0\n" - "movzb (%0),%%r10\n" - "movq 4096(%5,%%r11,8),%%xmm1\n" - "movzb 0x1(%0),%%r11\n" -@@ -37,36 +36,36 @@ void FastConvertYUVToRGB32Row(const uint - "movq (%5,%%r11,8),%%xmm3\n" - "paddsw %%xmm0,%%xmm2\n" - "paddsw %%xmm0,%%xmm3\n" - "shufps $0x44,%%xmm3,%%xmm2\n" - "psraw $0x6,%%xmm2\n" - "packuswb %%xmm2,%%xmm2\n" - "movq %%xmm2,0x0(%3)\n" - "add $0x8,%3\n" --"convertend:" -+"1:" - "sub $0x2,%4\n" -- "jns convertloop\n" -- --"convertnext:" -+ "jns 0b\n" -+ -+"2:" - "add $0x1,%4\n" -- "js convertdone\n" -+ "js 3f\n" - - "movzb (%1),%%r10\n" - "movq 2048(%5,%%r10,8),%%xmm0\n" - "movzb (%2),%%r10\n" - "movq 4096(%5,%%r10,8),%%xmm1\n" - "paddsw %%xmm1,%%xmm0\n" - "movzb (%0),%%r10\n" - "movq (%5,%%r10,8),%%xmm1\n" - "paddsw %%xmm0,%%xmm1\n" - "psraw $0x6,%%xmm1\n" - "packuswb %%xmm1,%%xmm1\n" - "movd %%xmm1,0x0(%3)\n" --"convertdone:" -+"3:" - : - : "r"(y_buf), // %0 - "r"(u_buf), // %1 - "r"(v_buf), // %2 - "r"(rgb_buf), // %3 - "r"(width), // %4 - "r" (kCoefficientsRgbY) // %5 - : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3" -@@ -77,19 +76,19 @@ void ScaleYUVToRGB32Row(const uint8* y_b - const uint8* u_buf, // rsi - const uint8* v_buf, // rdx - uint8* rgb_buf, // rcx - int width, // r8 - int source_dx) { // r9 - asm( - "xor %%r11,%%r11\n" - "sub $0x2,%4\n" -- "js scalenext\n" -- --"scaleloop:" -+ "js 1f\n" -+ -+"0:" - "mov %%r11,%%r10\n" - "sar $0x11,%%r10\n" - "movzb (%1,%%r10,1),%%rax\n" - "movq 2048(%5,%%rax,8),%%xmm0\n" - "movzb (%2,%%r10,1),%%rax\n" - "movq 4096(%5,%%rax,8),%%xmm1\n" - "lea (%%r11,%6),%%r10\n" - "sar $0x10,%%r11\n" -@@ -103,38 +102,38 @@ void ScaleYUVToRGB32Row(const uint8* y_b - "paddsw %%xmm0,%%xmm1\n" - "paddsw %%xmm0,%%xmm2\n" - "shufps $0x44,%%xmm2,%%xmm1\n" - "psraw $0x6,%%xmm1\n" - "packuswb %%xmm1,%%xmm1\n" - "movq %%xmm1,0x0(%3)\n" - "add $0x8,%3\n" - "sub $0x2,%4\n" -- "jns scaleloop\n" -- --"scalenext:" -+ "jns 0b\n" -+ -+"1:" - "add $0x1,%4\n" -- "js scaledone\n" -+ "js 2f\n" - - "mov %%r11,%%r10\n" - "sar $0x11,%%r10\n" - "movzb (%1,%%r10,1),%%rax\n" - "movq 2048(%5,%%rax,8),%%xmm0\n" - "movzb (%2,%%r10,1),%%rax\n" - "movq 4096(%5,%%rax,8),%%xmm1\n" - "paddsw %%xmm1,%%xmm0\n" - "sar $0x10,%%r11\n" - "movzb (%0,%%r11,1),%%rax\n" - "movq (%5,%%rax,8),%%xmm1\n" - "paddsw %%xmm0,%%xmm1\n" - "psraw $0x6,%%xmm1\n" - "packuswb %%xmm1,%%xmm1\n" - "movd %%xmm1,0x0(%3)\n" - --"scaledone:" -+"2:" - : - : "r"(y_buf), // %0 - "r"(u_buf), // %1 - "r"(v_buf), // %2 - "r"(rgb_buf), // %3 - "r"(width), // %4 - "r" (kCoefficientsRgbY), // %5 - "r"(static_cast<long>(source_dx)) // %6 -@@ -146,23 +145,23 @@ void LinearScaleYUVToRGB32Row(const uint - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - asm( - "xor %%r11,%%r11\n" // x = 0 - "sub $0x2,%4\n" -- "js .lscalenext\n" -+ "js 2f\n" - "cmp $0x20000,%6\n" // if source_dx >= 2.0 -- "jl .lscalehalf\n" -+ "jl 0f\n" - "mov $0x8000,%%r11\n" // x = 0.5 for 1/2 or less --".lscalehalf:" -- --".lscaleloop:" -+"0:" -+ -+"1:" - "mov %%r11,%%r10\n" - "sar $0x11,%%r10\n" - - "movzb (%1, %%r10, 1), %%r13 \n" - "movzb 1(%1, %%r10, 1), %%r14 \n" - "mov %%r11, %%rax \n" - "and $0x1fffe, %%rax \n" - "imul %%rax, %%r14 \n" -@@ -215,21 +214,21 @@ void LinearScaleYUVToRGB32Row(const uint - "paddsw %%xmm0,%%xmm1\n" - "paddsw %%xmm0,%%xmm2\n" - "shufps $0x44,%%xmm2,%%xmm1\n" - "psraw $0x6,%%xmm1\n" - "packuswb %%xmm1,%%xmm1\n" - "movq %%xmm1,0x0(%3)\n" - "add $0x8,%3\n" - "sub $0x2,%4\n" -- "jns .lscaleloop\n" -- --".lscalenext:" -+ "jns 1b\n" -+ -+"2:" - "add $0x1,%4\n" -- "js .lscaledone\n" -+ "js 3f\n" - - "mov %%r11,%%r10\n" - "sar $0x11,%%r10\n" - - "movzb (%1,%%r10,1), %%r13 \n" - "movq 2048(%5,%%r13,8),%%xmm0\n" - - "movzb (%2,%%r10,1), %%r13 \n" -@@ -241,52 +240,52 @@ void LinearScaleYUVToRGB32Row(const uint - "movzb (%0,%%r11,1), %%r13 \n" - "movq (%5,%%r13,8),%%xmm1\n" - - "paddsw %%xmm0,%%xmm1\n" - "psraw $0x6,%%xmm1\n" - "packuswb %%xmm1,%%xmm1\n" - "movd %%xmm1,0x0(%3)\n" - --".lscaledone:" -+"3:" - : - : "r"(y_buf), // %0 - "r"(u_buf), // %1 - "r"(v_buf), // %2 - "r"(rgb_buf), // %3 - "r"(width), // %4 - "r" (kCoefficientsRgbY), // %5 - "r"(static_cast<long>(source_dx)) // %6 - : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2" - ); - } - --#elif USE_MMX && !defined(ARCH_CPU_X86_64) && !defined(__PIC__) -+#elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && !defined(__PIC__) - - // PIC version is slower because less registers are available, so - // non-PIC is used on platforms where it is possible. -- --void FastConvertYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width); -+void FastConvertYUVToRGB32Row_SSE(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width); - asm( - ".text\n" -- ".global FastConvertYUVToRGB32Row\n" --"FastConvertYUVToRGB32Row:\n" -+ ".global FastConvertYUVToRGB32Row_SSE\n" -+ ".type FastConvertYUVToRGB32Row_SSE, @function\n" -+"FastConvertYUVToRGB32Row_SSE:\n" - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x28(%esp),%edi\n" - "mov 0x2c(%esp),%esi\n" - "mov 0x30(%esp),%ebp\n" - "mov 0x34(%esp),%ecx\n" -- "jmp convertend\n" -- --"convertloop:" -+ "jmp 1f\n" -+ -+"0:" - "movzbl (%edi),%eax\n" - "add $0x1,%edi\n" - "movzbl (%esi),%ebx\n" - "add $0x1,%esi\n" - "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" - "movzbl (%edx),%eax\n" - "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n" - "movzbl 0x1(%edx),%ebx\n" -@@ -295,59 +294,77 @@ void FastConvertYUVToRGB32Row(const uint - "movq kCoefficientsRgbY(,%ebx,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" --"convertend:" -+"1:" - "sub $0x2,%ecx\n" -- "jns convertloop\n" -+ "jns 0b\n" - - "and $0x1,%ecx\n" -- "je convertdone\n" -+ "je 2f\n" - - "movzbl (%edi),%eax\n" - "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" - "movzbl (%esi),%eax\n" - "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n" - "movzbl (%edx),%eax\n" - "movq kCoefficientsRgbY(,%eax,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%ebp)\n" --"convertdone:" -+"2:" - "popa\n" - "ret\n" -+#if !defined(XP_MACOSX) -+ ".previous\n" -+#endif - ); - -- --void ScaleYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int source_dx); -+void FastConvertYUVToRGB32Row(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width) -+{ -+ if (mozilla::supports_sse()) { -+ FastConvertYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width); -+ return; -+ } -+ -+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1); -+} -+ -+ -+void ScaleYUVToRGB32Row_SSE(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx); - asm( - ".text\n" -- ".global ScaleYUVToRGB32Row\n" --"ScaleYUVToRGB32Row:\n" -+ ".global ScaleYUVToRGB32Row_SSE\n" -+ ".type ScaleYUVToRGB32Row_SSE, @function\n" -+"ScaleYUVToRGB32Row_SSE:\n" - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x28(%esp),%edi\n" - "mov 0x2c(%esp),%esi\n" - "mov 0x30(%esp),%ebp\n" - "mov 0x34(%esp),%ecx\n" - "xor %ebx,%ebx\n" -- "jmp scaleend\n" -- --"scaleloop:" -+ "jmp 1f\n" -+ -+"0:" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%edi,%eax,1),%eax\n" - "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%esi,%eax,1),%eax\n" - "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n" -@@ -363,22 +380,22 @@ void ScaleYUVToRGB32Row(const uint8* y_b - "movq kCoefficientsRgbY(,%eax,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" --"scaleend:" -+"1:" - "sub $0x2,%ecx\n" -- "jns scaleloop\n" -+ "jns 0b\n" - - "and $0x1,%ecx\n" -- "je scaledone\n" -+ "je 2f\n" - - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%edi,%eax,1),%eax\n" - "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%esi,%eax,1),%eax\n" -@@ -387,51 +404,71 @@ void ScaleYUVToRGB32Row(const uint8* y_b - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq kCoefficientsRgbY(,%eax,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%ebp)\n" - --"scaledone:" -+"2:" - "popa\n" - "ret\n" -+#if !defined(XP_MACOSX) -+ ".previous\n" -+#endif - ); - --void LinearScaleYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int source_dx); -+void ScaleYUVToRGB32Row(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx) -+{ -+ if (mozilla::supports_sse()) { -+ ScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, -+ width, source_dx); -+ } -+ -+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, -+ width, source_dx); -+} -+ -+void LinearScaleYUVToRGB32Row_SSE(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx); - asm( - ".text\n" -- ".global LinearScaleYUVToRGB32Row\n" --"LinearScaleYUVToRGB32Row:\n" -+ ".global LinearScaleYUVToRGB32Row_SSE\n" -+ ".type LinearScaleYUVToRGB32Row_SSE, @function\n" -+"LinearScaleYUVToRGB32Row_SSE:\n" - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x28(%esp),%edi\n" - "mov 0x30(%esp),%ebp\n" - - // source_width = width * source_dx + ebx - "mov 0x34(%esp), %ecx\n" - "imull 0x38(%esp), %ecx\n" - "mov %ecx, 0x34(%esp)\n" - - "mov 0x38(%esp), %ecx\n" - "xor %ebx,%ebx\n" // x = 0 - "cmp $0x20000,%ecx\n" // if source_dx >= 2.0 -- "jl .lscaleend\n" -+ "jl 1f\n" - "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less -- "jmp .lscaleend\n" -- --".lscaleloop:" -- "mov %ebx,%eax\n" -- "sar $0x11,%eax\n" -+ "jmp 1f\n" -+ -+"0:" -+ "mov %ebx,%eax\n" -+ "sar $0x11,%eax\n" - - "movzbl (%edi,%eax,1),%ecx\n" - "movzbl 1(%edi,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "andl $0x1fffe, %eax \n" - "imul %eax, %esi \n" - "xorl $0x1fffe, %eax \n" - "imul %eax, %ecx \n" -@@ -464,17 +501,17 @@ void LinearScaleYUVToRGB32Row(const uint - "imul %eax, %esi \n" - "xorl $0xffff, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $16, %ecx \n" - "movq kCoefficientsRgbY(,%ecx,8),%mm1\n" - - "cmp 0x34(%esp), %ebx\n" -- "jge .lscalelastpixel\n" -+ "jge 2f\n" - - "mov %ebx,%eax\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%ecx\n" - "movzbl 1(%edx,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "andl $0xffff, %eax \n" -@@ -488,56 +525,76 @@ void LinearScaleYUVToRGB32Row(const uint - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" - --".lscaleend:" -+"1:" - "cmp 0x34(%esp), %ebx\n" -- "jl .lscaleloop\n" -+ "jl 0b\n" - "popa\n" - "ret\n" - --".lscalelastpixel:" -+"2:" - "paddsw %mm0, %mm1\n" - "psraw $6, %mm1\n" - "packuswb %mm1, %mm1\n" - "movd %mm1, (%ebp)\n" - "popa\n" - "ret\n" -+#if !defined(XP_MACOSX) -+ ".previous\n" -+#endif - ); - --#elif USE_MMX && !defined(ARCH_CPU_X86_64) && defined(__PIC__) -- --extern void PICConvertYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int16 *kCoefficientsRgbY); -+void LinearScaleYUVToRGB32Row(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx) -+{ -+ if (mozilla::supports_sse()) { -+ LinearScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, -+ width, source_dx); -+ } -+ -+ LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, -+ width, source_dx); -+} -+ -+#elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && defined(__PIC__) -+ -+void PICConvertYUVToRGB32Row_SSE(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int16 *kCoefficientsRgbY); -+ - asm( - ".text\n" --#if defined(OS_MACOSX) --"_PICConvertYUVToRGB32Row:\n" -+#if defined(XP_MACOSX) -+"_PICConvertYUVToRGB32Row_SSE:\n" - #else --"PICConvertYUVToRGB32Row:\n" -+"PICConvertYUVToRGB32Row_SSE:\n" - #endif - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x28(%esp),%edi\n" - "mov 0x2c(%esp),%esi\n" - "mov 0x30(%esp),%ebp\n" - "mov 0x38(%esp),%ecx\n" - -- "jmp .Lconvertend\n" -- --".Lconvertloop:" -+ "jmp 1f\n" -+ -+"0:" - "movzbl (%edi),%eax\n" - "add $0x1,%edi\n" - "movzbl (%esi),%ebx\n" - "add $0x1,%esi\n" - "movq 2048(%ecx,%eax,8),%mm0\n" - "movzbl (%edx),%eax\n" - "paddsw 4096(%ecx,%ebx,8),%mm0\n" - "movzbl 0x1(%edx),%ebx\n" -@@ -546,72 +603,81 @@ extern void PICConvertYUVToRGB32Row(cons - "movq 0(%ecx,%ebx,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" --".Lconvertend:" -+"1:" - "subl $0x2,0x34(%esp)\n" -- "jns .Lconvertloop\n" -+ "jns 0b\n" - - "andl $0x1,0x34(%esp)\n" -- "je .Lconvertdone\n" -+ "je 2f\n" - - "movzbl (%edi),%eax\n" - "movq 2048(%ecx,%eax,8),%mm0\n" - "movzbl (%esi),%eax\n" - "paddsw 4096(%ecx,%eax,8),%mm0\n" - "movzbl (%edx),%eax\n" - "movq 0(%ecx,%eax,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%ebp)\n" --".Lconvertdone:\n" -+"2:" - "popa\n" - "ret\n" -+#if !defined(XP_MACOSX) -+ ".previous\n" -+#endif - ); - - void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, -- int width) { -- PICConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, -- &kCoefficientsRgbY[0][0]); --} -- --extern void PICScaleYUVToRGB32Row(const uint8* y_buf, -+ int width) -+{ -+ if (mozilla::supports_sse()) { -+ PICConvertYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width, -+ &kCoefficientsRgbY[0][0]); -+ return; -+ } -+ -+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1); -+} -+ -+void PICScaleYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx, - int16 *kCoefficientsRgbY); - - asm( - ".text\n" --#if defined(OS_MACOSX) --"_PICScaleYUVToRGB32Row:\n" -+#if defined(XP_MACOSX) -+"_PICScaleYUVToRGB32Row_SSE:\n" - #else --"PICScaleYUVToRGB32Row:\n" -+"PICScaleYUVToRGB32Row_SSE:\n" - #endif - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x28(%esp),%edi\n" - "mov 0x2c(%esp),%esi\n" - "mov 0x30(%esp),%ebp\n" - "mov 0x3c(%esp),%ecx\n" - "xor %ebx,%ebx\n" -- "jmp Lscaleend\n" -- --"Lscaleloop:" -+ "jmp 1f\n" -+ -+"0:" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%edi,%eax,1),%eax\n" - "movq 2048(%ecx,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%esi,%eax,1),%eax\n" - "paddsw 4096(%ecx,%eax,8),%mm0\n" -@@ -627,22 +693,22 @@ extern void PICScaleYUVToRGB32Row(const - "movq 0(%ecx,%eax,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" --"Lscaleend:" -+"1:" - "subl $0x2,0x34(%esp)\n" -- "jns Lscaleloop\n" -+ "jns 0b\n" - - "andl $0x1,0x34(%esp)\n" -- "je Lscaledone\n" -+ "je 2f\n" - - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%edi,%eax,1),%eax\n" - "movq 2048(%ecx,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%esi,%eax,1),%eax\n" -@@ -651,66 +717,75 @@ extern void PICScaleYUVToRGB32Row(const - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq 0(%ecx,%eax,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%ebp)\n" - --"Lscaledone:" -+"2:" - "popa\n" - "ret\n" -+#if !defined(XP_MACOSX) -+ ".previous\n" -+#endif - ); - -- - void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, -- int source_dx) { -- PICScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx, -- &kCoefficientsRgbY[0][0]); --} -- --void PICLinearScaleYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int source_dx, -- int16 *kCoefficientsRgbY); -+ int source_dx) -+{ -+ if (mozilla::supports_sse()) { -+ PICScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width, source_dx, -+ &kCoefficientsRgbY[0][0]); -+ return; -+ } -+ -+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); -+} -+ -+void PICLinearScaleYUVToRGB32Row_SSE(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx, -+ int16 *kCoefficientsRgbY); -+ - asm( - ".text\n" --#if defined(OS_MACOSX) --"_PICLinearScaleYUVToRGB32Row:\n" -+#if defined(XP_MACOSX) -+"_PICLinearScaleYUVToRGB32Row_SSE:\n" - #else --"PICLinearScaleYUVToRGB32Row:\n" -+"PICLinearScaleYUVToRGB32Row_SSE:\n" - #endif - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x30(%esp),%ebp\n" - "mov 0x34(%esp),%ecx\n" - "mov 0x3c(%esp),%edi\n" - "xor %ebx,%ebx\n" - - // source_width = width * source_dx + ebx - "mov 0x34(%esp), %ecx\n" - "imull 0x38(%esp), %ecx\n" - "mov %ecx, 0x34(%esp)\n" - - "mov 0x38(%esp), %ecx\n" - "xor %ebx,%ebx\n" // x = 0 - "cmp $0x20000,%ecx\n" // if source_dx >= 2.0 -- "jl .lscaleend\n" -+ "jl 1f\n" - "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less -- "jmp .lscaleend\n" -- --".lscaleloop:" -+ "jmp 1f\n" -+ -+"0:" - "mov 0x28(%esp),%esi\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - - "movzbl (%esi,%eax,1),%ecx\n" - "movzbl 1(%esi,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "andl $0x1fffe, %eax \n" -@@ -746,17 +821,17 @@ void PICLinearScaleYUVToRGB32Row(const u - "imul %eax, %esi \n" - "xorl $0xffff, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $16, %ecx \n" - "movq (%edi,%ecx,8),%mm1\n" - - "cmp 0x34(%esp), %ebx\n" -- "jge .lscalelastpixel\n" -+ "jge 2f\n" - - "mov %ebx,%eax\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%ecx\n" - "movzbl 1(%edx,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "andl $0xffff, %eax \n" -@@ -770,154 +845,71 @@ void PICLinearScaleYUVToRGB32Row(const u - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" - --".lscaleend:" -+"1:" - "cmp %ebx, 0x34(%esp)\n" -- "jg .lscaleloop\n" -+ "jg 0b\n" - "popa\n" - "ret\n" - --".lscalelastpixel:" -+"2:" - "paddsw %mm0, %mm1\n" - "psraw $6, %mm1\n" - "packuswb %mm1, %mm1\n" - "movd %mm1, (%ebp)\n" - "popa\n" - "ret\n" -+#if !defined(XP_MACOSX) -+ ".previous\n" -+#endif - ); - -+ - void LinearScaleYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int source_dx) { -- PICLinearScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx, -- &kCoefficientsRgbY[0][0]); --} -- --#else // USE_MMX -- --// C reference code that mimic the YUV assembly. --#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x))) --#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \ -- (((x) + (y)) > 32767 ? 32767 : ((x) + (y)))) -- --static inline void YuvPixel(uint8 y, -- uint8 u, -- uint8 v, -- uint8* rgb_buf) { -- -- int b = kCoefficientsRgbY[256+u][0]; -- int g = kCoefficientsRgbY[256+u][1]; -- int r = kCoefficientsRgbY[256+u][2]; -- int a = kCoefficientsRgbY[256+u][3]; -- -- b = paddsw(b, kCoefficientsRgbY[512+v][0]); -- g = paddsw(g, kCoefficientsRgbY[512+v][1]); -- r = paddsw(r, kCoefficientsRgbY[512+v][2]); -- a = paddsw(a, kCoefficientsRgbY[512+v][3]); -- -- b = paddsw(b, kCoefficientsRgbY[y][0]); -- g = paddsw(g, kCoefficientsRgbY[y][1]); -- r = paddsw(r, kCoefficientsRgbY[y][2]); -- a = paddsw(a, kCoefficientsRgbY[y][3]); -- -- b >>= 6; -- g >>= 6; -- r >>= 6; -- a >>= 6; -- -- *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) | -- (packuswb(g) << 8) | -- (packuswb(r) << 16) | -- (packuswb(a) << 24); --} -- -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx) -+{ -+ if (mozilla::supports_sse()) { -+ PICLinearScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width, -+ source_dx, &kCoefficientsRgbY[0][0]); -+ return; -+ } -+ -+ LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); -+} -+#else - void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { -- for (int x = 0; x < width; x += 2) { -- uint8 u = u_buf[x >> 1]; -- uint8 v = v_buf[x >> 1]; -- uint8 y0 = y_buf[x]; -- YuvPixel(y0, u, v, rgb_buf); -- if ((x + 1) < width) { -- uint8 y1 = y_buf[x + 1]; -- YuvPixel(y1, u, v, rgb_buf + 4); -- } -- rgb_buf += 8; // Advance 2 pixels. -- } --} -- --// 16.16 fixed point is used. A shift by 16 isolates the integer. --// A shift by 17 is used to further subsample the chrominence channels. --// & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits, --// for 1/65536 pixel accurate interpolation. -+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1); -+} -+ - void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { -- int x = 0; -- for (int i = 0; i < width; i += 2) { -- int y = y_buf[x >> 16]; -- int u = u_buf[(x >> 17)]; -- int v = v_buf[(x >> 17)]; -- YuvPixel(y, u, v, rgb_buf); -- x += source_dx; -- if ((i + 1) < width) { -- y = y_buf[x >> 16]; -- YuvPixel(y, u, v, rgb_buf+4); -- x += source_dx; -- } -- rgb_buf += 8; -- } --} -+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); -+} - - void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { -- int x = 0; -- if (source_dx >= 0x20000) { -- x = 32768; -- } -- for (int i = 0; i < width; i += 2) { -- int y0 = y_buf[x >> 16]; -- int y1 = y_buf[(x >> 16) + 1]; -- int u0 = u_buf[(x >> 17)]; -- int u1 = u_buf[(x >> 17) + 1]; -- int v0 = v_buf[(x >> 17)]; -- int v1 = v_buf[(x >> 17) + 1]; -- int y_frac = (x & 65535); -- int uv_frac = ((x >> 1) & 65535); -- int y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; -- int u = (uv_frac * u1 + (uv_frac ^ 65535) * u0) >> 16; -- int v = (uv_frac * v1 + (uv_frac ^ 65535) * v0) >> 16; -- YuvPixel(y, u, v, rgb_buf); -- x += source_dx; -- if ((i + 1) < width) { -- y0 = y_buf[x >> 16]; -- y1 = y_buf[(x >> 16) + 1]; -- y_frac = (x & 65535); -- y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; -- YuvPixel(y, u, v, rgb_buf+4); -- x += source_dx; -- } -- rgb_buf += 8; -- } --} -- --#endif // USE_MMX --} // extern "C" -- -+ LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); -+} -+#endif -+ -+} -diff --git a/gfx/ycbcr/yuv_row_table.cpp b/gfx/ycbcr/yuv_row_table.cpp ---- a/gfx/ycbcr/yuv_row_table.cpp -+++ b/gfx/ycbcr/yuv_row_table.cpp -@@ -1,13 +1,13 @@ - // Copyright (c) 2010 The Chromium Authors. All rights reserved. - // Use of this source code is governed by a BSD-style license that can be - // found in the LICENSE file. - --#include "media/base/yuv_row.h" -+#include "yuv_row.h" - - extern "C" { - - #define RGBY(i) { \ - static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ - static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ - static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ - 0 \ -diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp ---- a/gfx/ycbcr/yuv_row_win.cpp -+++ b/gfx/ycbcr/yuv_row_win.cpp -@@ -1,26 +1,27 @@ - // Copyright (c) 2010 The Chromium Authors. All rights reserved. - // Use of this source code is governed by a BSD-style license that can be - // found in the LICENSE file. - --#include "media/base/yuv_row.h" -+#include "yuv_row.h" -+#include "mozilla/SSE.h" - - #define kCoefficientsRgbU kCoefficientsRgbY + 2048 - #define kCoefficientsRgbV kCoefficientsRgbY + 4096 - - extern "C" { - --#if USE_MMX --__declspec(naked) --void FastConvertYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width) { -+#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86) -+__declspec(naked) -+void FastConvertYUVToRGB32Row_SSE(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - jmp convertend -@@ -64,22 +65,22 @@ void FastConvertYUVToRGB32Row(const uint - convertdone : - - popad - ret - } - } - - __declspec(naked) --void ConvertYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int step) { -+void ConvertYUVToRGB32Row_SSE(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int step) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - mov ebx, [esp + 32 + 24] // step -@@ -125,23 +126,23 @@ void ConvertYUVToRGB32Row(const uint8* y - wdone : - - popad - ret - } - } - - __declspec(naked) --void RotateConvertYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int ystep, -- int uvstep) { -+void RotateConvertYUVToRGB32Row_SSE(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int ystep, -+ int uvstep) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - jmp wend -@@ -188,21 +189,21 @@ void RotateConvertYUVToRGB32Row(const ui - wdone : - - popad - ret - } - } - - __declspec(naked) --void DoubleYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width) { -+void DoubleYUVToRGB32Row_SSE(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - jmp wend -@@ -256,26 +257,26 @@ void DoubleYUVToRGB32Row(const uint8* y_ - jns wloop1 - wdone : - popad - ret - } - } - - // This version does general purpose scaling by any amount, up or down. --// The only thing it can not do it rotation by 90 or 270. --// For performance the chroma is under sampled, reducing cost of a 3x -+// The only thing it cannot do is rotation by 90 or 270. -+// For performance the chroma is under-sampled, reducing cost of a 3x - // 1080p scale from 8.4 ms to 5.4 ms. - __declspec(naked) --void ScaleYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int source_dx) { -+void ScaleYUVToRGB32Row_SSE(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - xor ebx, ebx // x -@@ -333,22 +334,22 @@ void ScaleYUVToRGB32Row(const uint8* y_b - - scaledone : - popad - ret - } - } - - __declspec(naked) --void LinearScaleYUVToRGB32Row(const uint8* y_buf, -- const uint8* u_buf, -- const uint8* v_buf, -- uint8* rgb_buf, -- int width, -- int source_dx) { -+void LinearScaleYUVToRGB32Row_SSE(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - // [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - imul ecx, [esp + 32 + 24] // source_dx -@@ -438,152 +439,60 @@ lscalelastpixel: - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ebp], mm1 - popad - ret - }; - } --#else // USE_MMX -- --// C reference code that mimic the YUV assembly. --#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x))) --#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \ -- (((x) + (y)) > 32767 ? 32767 : ((x) + (y)))) -- --static inline void YuvPixel(uint8 y, -- uint8 u, -- uint8 v, -- uint8* rgb_buf) { -- -- int b = kCoefficientsRgbY[256+u][0]; -- int g = kCoefficientsRgbY[256+u][1]; -- int r = kCoefficientsRgbY[256+u][2]; -- int a = kCoefficientsRgbY[256+u][3]; -- -- b = paddsw(b, kCoefficientsRgbY[512+v][0]); -- g = paddsw(g, kCoefficientsRgbY[512+v][1]); -- r = paddsw(r, kCoefficientsRgbY[512+v][2]); -- a = paddsw(a, kCoefficientsRgbY[512+v][3]); -- -- b = paddsw(b, kCoefficientsRgbY[y][0]); -- g = paddsw(g, kCoefficientsRgbY[y][1]); -- r = paddsw(r, kCoefficientsRgbY[y][2]); -- a = paddsw(a, kCoefficientsRgbY[y][3]); -- -- b >>= 6; -- g >>= 6; -- r >>= 6; -- a >>= 6; -- -- *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) | -- (packuswb(g) << 8) | -- (packuswb(r) << 16) | -- (packuswb(a) << 24); --} -- --#if TEST_MMX_YUV --static inline void YuvPixel(uint8 y, -- uint8 u, -- uint8 v, -- uint8* rgb_buf) { -- -- __asm { -- movzx eax, u -- movq mm0, [kCoefficientsRgbY+2048 + 8 * eax] -- movzx eax, v -- paddsw mm0, [kCoefficientsRgbY+4096 + 8 * eax] -- movzx eax, y -- movq mm1, [kCoefficientsRgbY + 8 * eax] -- paddsw mm1, mm0 -- psraw mm1, 6 -- packuswb mm1, mm1 -- mov eax, rgb_buf -- movd [eax], mm1 -- emms -- } --} --#endif -+#endif // if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86) - - void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { -- for (int x = 0; x < width; x += 2) { -- uint8 u = u_buf[x >> 1]; -- uint8 v = v_buf[x >> 1]; -- uint8 y0 = y_buf[x]; -- YuvPixel(y0, u, v, rgb_buf); -- if ((x + 1) < width) { -- uint8 y1 = y_buf[x + 1]; -- YuvPixel(y1, u, v, rgb_buf + 4); -- } -- rgb_buf += 8; // Advance 2 pixels. -- } --} -- --// 16.16 fixed point is used. A shift by 16 isolates the integer. --// A shift by 17 is used to further subsample the chrominence channels. --// & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits, --// for 1/65536 pixel accurate interpolation. -+#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86) -+ if (mozilla::supports_sse()) { -+ FastConvertYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width); -+ return; -+ } -+#endif -+ -+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1); -+} -+ - void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { -- int x = 0; -- for (int i = 0; i < width; i += 2) { -- int y = y_buf[x >> 16]; -- int u = u_buf[(x >> 17)]; -- int v = v_buf[(x >> 17)]; -- YuvPixel(y, u, v, rgb_buf); -- x += source_dx; -- if ((i + 1) < width) { -- y = y_buf[x >> 16]; -- YuvPixel(y, u, v, rgb_buf+4); -- x += source_dx; -- } -- rgb_buf += 8; -- } --} -+ -+#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86) -+ if (mozilla::supports_sse()) { -+ ScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); -+ return; -+ } -+#endif -+ -+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); -+} - - void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { -- int x = 0; -- if (source_dx >= 0x20000) { -- x = 32768; -- } -- for (int i = 0; i < width; i += 2) { -- int y0 = y_buf[x >> 16]; -- int y1 = y_buf[(x >> 16) + 1]; -- int u0 = u_buf[(x >> 17)]; -- int u1 = u_buf[(x >> 17) + 1]; -- int v0 = v_buf[(x >> 17)]; -- int v1 = v_buf[(x >> 17) + 1]; -- int y_frac = (x & 65535); -- int uv_frac = ((x >> 1) & 65535); -- int y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; -- int u = (uv_frac * u1 + (uv_frac ^ 65535) * u0) >> 16; -- int v = (uv_frac * v1 + (uv_frac ^ 65535) * v0) >> 16; -- YuvPixel(y, u, v, rgb_buf); -- x += source_dx; -- if ((i + 1) < width) { -- y0 = y_buf[x >> 16]; -- y1 = y_buf[(x >> 16) + 1]; -- y_frac = (x & 65535); -- y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; -- YuvPixel(y, u, v, rgb_buf+4); -- x += source_dx; -- } -- rgb_buf += 8; -- } --} -- --#endif // USE_MMX --} // extern "C" -- -+#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86) -+ if (mozilla::supports_sse()) { -+ LinearScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width, -+ source_dx); -+ return; -+ } -+#endif -+ -+ LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); -+} -+ -+} // extern "C" diff --git a/gfx/ycbcr/moz.build b/gfx/ycbcr/moz.build deleted file mode 100644 index 7e04c96ed..000000000 --- a/gfx/ycbcr/moz.build +++ /dev/null @@ -1,64 +0,0 @@ -# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -EXPORTS += [ - 'YCbCrUtils.h', -] - -SOURCES += [ - 'scale_yuv_argb.cpp', - 'ycbcr_to_rgb565.cpp', - 'YCbCrUtils.cpp', - 'yuv_convert.cpp', - 'yuv_row_c.cpp', - 'yuv_row_table.cpp', -] - -if CONFIG['INTEL_ARCHITECTURE']: - # These files use MMX and SSE2 intrinsics, so they need special compile flags - # on some compilers. - SOURCES += ['yuv_convert_sse2.cpp'] - SOURCES['yuv_convert_sse2.cpp'].flags += CONFIG['SSE2_FLAGS'] - - # MSVC doesn't support MMX when targeting AMD64. - if CONFIG['_MSC_VER']: - if CONFIG['OS_TEST'] != 'x86_64': - SOURCES += [ - 'yuv_convert_mmx.cpp', - ] - else: - SOURCES += ['yuv_convert_mmx.cpp'] - SOURCES['yuv_convert_mmx.cpp'].flags += CONFIG['MMX_FLAGS'] - -if CONFIG['_MSC_VER']: - if CONFIG['OS_TEST'] == 'x86_64': - SOURCES += [ - 'yuv_row_win64.cpp', - ] - else: - SOURCES += [ - 'yuv_row_win.cpp', - ] -elif CONFIG['OS_ARCH'] in ('Linux', 'SunOS', 'Darwin', 'DragonFly', - 'FreeBSD', 'NetBSD', 'OpenBSD'): - SOURCES += [ - 'yuv_row_posix.cpp', - ] -else: - SOURCES += [ - 'yuv_row_other.cpp', - ] - -if CONFIG['CPU_ARCH'] == 'arm' and CONFIG['HAVE_ARM_NEON']: - SOURCES += [ - 'yuv_row_arm.s', - ] - SOURCES += [ - 'yuv_convert_arm.cpp', - ] - -LOCAL_INCLUDES += ['/media/libyuv/include'] - -FINAL_LIBRARY = 'xul' diff --git a/gfx/ycbcr/scale_yuv_argb.cpp b/gfx/ycbcr/scale_yuv_argb.cpp deleted file mode 100644 index 13b16c802..000000000 --- a/gfx/ycbcr/scale_yuv_argb.cpp +++ /dev/null @@ -1,1128 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * Copyright 2016 Mozilla Foundation - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "scale_yuv_argb.h" - -#include "libyuv/scale.h" - -#include <assert.h> -#include <string.h> - -#include "libyuv/cpu_id.h" -#include "libyuv/row.h" -#include "libyuv/scale_row.h" -#include "libyuv/video_common.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// YUV to RGB conversion and scaling functions were implemented by referencing -// scale_argb.cc -// -// libyuv already has ScaleYUVToARGBBilinearUp(), but its implementation is not -// completed yet. Implementations of the functions are based on it. -// At first, ScaleYUVToARGBBilinearUp() was implemented by modidying the -// libyuv's one. Then all another functions were implemented similarly. -// -// Function relationship between yuv_convert.cpp abd scale_argb.cc are like -// the followings -// - ScaleYUVToARGBDown2() <-- ScaleARGBDown2() -// - ScaleYUVToARGBDownEven() <-- ScaleARGBDownEven() -// - ScaleYUVToARGBBilinearDown() <-- ScaleARGBBilinearDown() -// - ScaleYUVToARGBBilinearUp() <-- ScaleARGBBilinearUp() and ScaleYUVToARGBBilinearUp() in libyuv -// - ScaleYUVToARGBSimple() <-- ScaleARGBSimple() -// - ScaleYUVToARGB() <-- ScaleARGB() // Removed some function calls for simplicity. -// - YUVToARGBScale() <-- ARGBScale() -// -// Callings and selections of InterpolateRow() and ScaleARGBFilterCols() were -// kept as same as possible. -// -// The followings changes were done to each scaling functions. -// -// -[1] Allocate YUV conversion buffer and use it as source buffer of scaling. -// Its usage is borrowed from the libyuv's ScaleYUVToARGBBilinearUp(). -// -[2] Conversion from YUV to RGB was abstracted as YUVBuferIter. -// It is for handling multiple yuv color formats. -// -[3] Modified scaling functions as to handle YUV conversion buffer and -// use YUVBuferIter. -// -[4] Color conversion function selections in YUVBuferIter were borrowed from -// I444ToARGBMatrix(), I422ToARGBMatrix() and I420ToARGBMatrix() - -static __inline int Abs(int v) { - return v >= 0 ? v : -v; -} - -struct YUVBuferIter { - int src_width; - int src_height; - int src_stride_y; - int src_stride_u; - int src_stride_v; - const uint8* src_y; - const uint8* src_u; - const uint8* src_v; - - uint32 src_fourcc; - const struct YuvConstants* yuvconstants; - int y_index; - const uint8* src_row_y; - const uint8* src_row_u; - const uint8* src_row_v; - - void (*YUVToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - const struct YuvConstants* yuvconstants, - int width); - void (*MoveTo)(YUVBuferIter& iter, int y_index); - void (*MoveToNextRow)(YUVBuferIter& iter); -}; - -void YUVBuferIter_InitI422(YUVBuferIter& iter) { - iter.YUVToARGBRow = I422ToARGBRow_C; -#if defined(HAS_I422TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - iter.YUVToARGBRow = I422ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(iter.src_width, 8)) { - iter.YUVToARGBRow = I422ToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_I422TOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - iter.YUVToARGBRow = I422ToARGBRow_Any_AVX2; - if (IS_ALIGNED(iter.src_width, 16)) { - iter.YUVToARGBRow = I422ToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_I422TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - iter.YUVToARGBRow = I422ToARGBRow_Any_NEON; - if (IS_ALIGNED(iter.src_width, 8)) { - iter.YUVToARGBRow = I422ToARGBRow_NEON; - } - } -#endif -#if defined(HAS_I422TOARGBROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(iter.src_width, 4) && - IS_ALIGNED(iter.src_y, 4) && IS_ALIGNED(iter.src_stride_y, 4) && - IS_ALIGNED(iter.src_u, 2) && IS_ALIGNED(iter.src_stride_u, 2) && - IS_ALIGNED(iter.src_v, 2) && IS_ALIGNED(iter.src_stride_v, 2) { - // Always satisfy IS_ALIGNED(argb_cnv_row, 4) && IS_ALIGNED(argb_cnv_rowstride, 4) - iter.YUVToARGBRow = I422ToARGBRow_DSPR2; - } -#endif -} - -void YUVBuferIter_InitI444(YUVBuferIter& iter) { - iter.YUVToARGBRow = I444ToARGBRow_C; -#if defined(HAS_I444TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - iter.YUVToARGBRow = I444ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(iter.src_width, 8)) { - iter.YUVToARGBRow = I444ToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_I444TOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - iter.YUVToARGBRow = I444ToARGBRow_Any_AVX2; - if (IS_ALIGNED(iter.src_width, 16)) { - iter.YUVToARGBRow = I444ToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_I444TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - iter.YUVToARGBRow = I444ToARGBRow_Any_NEON; - if (IS_ALIGNED(iter.src_width, 8)) { - iter.YUVToARGBRow = I444ToARGBRow_NEON; - } - } -#endif -} - - -static void YUVBuferIter_MoveToForI444(YUVBuferIter& iter, int y_index) { - iter.y_index = y_index; - iter.src_row_y = iter.src_y + y_index * iter.src_stride_y; - iter.src_row_u = iter.src_u + y_index * iter.src_stride_u; - iter.src_row_v = iter.src_v + y_index * iter.src_stride_v; -} - -static void YUVBuferIter_MoveToNextRowForI444(YUVBuferIter& iter) { - iter.src_row_y += iter.src_stride_y; - iter.src_row_u += iter.src_stride_u; - iter.src_row_v += iter.src_stride_v; - iter.y_index++; -} - -static void YUVBuferIter_MoveToForI422(YUVBuferIter& iter, int y_index) { - iter.y_index = y_index; - iter.src_row_y = iter.src_y + y_index * iter.src_stride_y; - iter.src_row_u = iter.src_u + y_index * iter.src_stride_u; - iter.src_row_v = iter.src_v + y_index * iter.src_stride_v; -} - -static void YUVBuferIter_MoveToNextRowForI422(YUVBuferIter& iter) { - iter.src_row_y += iter.src_stride_y; - iter.src_row_u += iter.src_stride_u; - iter.src_row_v += iter.src_stride_v; - iter.y_index++; -} - -static void YUVBuferIter_MoveToForI420(YUVBuferIter& iter, int y_index) { - const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate. - int uv_y_index = y_index >> kYShift; - - iter.y_index = y_index; - iter.src_row_y = iter.src_y + y_index * iter.src_stride_y; - iter.src_row_u = iter.src_u + uv_y_index * iter.src_stride_u; - iter.src_row_v = iter.src_v + uv_y_index * iter.src_stride_v; -} - -static void YUVBuferIter_MoveToNextRowForI420(YUVBuferIter& iter) { - iter.src_row_y += iter.src_stride_y; - if (iter.y_index & 1) { - iter.src_row_u += iter.src_stride_u; - iter.src_row_v += iter.src_stride_v; - } - iter.y_index++; -} - -static __inline void YUVBuferIter_ConvertToARGBRow(YUVBuferIter& iter, uint8* argb_row) { - iter.YUVToARGBRow(iter.src_row_y, iter.src_row_u, iter.src_row_v, argb_row, iter.yuvconstants, iter.src_width); -} - -void YUVBuferIter_Init(YUVBuferIter& iter, uint32 src_fourcc, mozilla::YUVColorSpace yuv_color_space) { - iter.src_fourcc = src_fourcc; - iter.y_index = 0; - iter.src_row_y = iter.src_y; - iter.src_row_u = iter.src_u; - iter.src_row_v = iter.src_v; - if (yuv_color_space == mozilla::YUVColorSpace::BT709) { - iter.yuvconstants = &kYuvH709Constants; - } else { - iter.yuvconstants = &kYuvI601Constants; - } - - if (src_fourcc == FOURCC_I444) { - YUVBuferIter_InitI444(iter); - iter.MoveTo = YUVBuferIter_MoveToForI444; - iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI444; - } else if(src_fourcc == FOURCC_I422){ - YUVBuferIter_InitI422(iter); - iter.MoveTo = YUVBuferIter_MoveToForI422; - iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI422; - } else { - assert(src_fourcc == FOURCC_I420); // Should be FOURCC_I420 - YUVBuferIter_InitI422(iter); - iter.MoveTo = YUVBuferIter_MoveToForI420; - iter.MoveToNextRow = YUVBuferIter_MoveToNextRowForI420; - } -} - -// ScaleARGB ARGB, 1/2 -// This is an optimized version for scaling down a ARGB to 1/2 of -// its original size. -static void ScaleYUVToARGBDown2(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride_y, - int src_stride_u, - int src_stride_v, - int dst_stride_argb, - const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int x, int dx, int y, int dy, - enum FilterMode filtering, - uint32 src_fourcc, - mozilla::YUVColorSpace yuv_color_space) { - int j; - - // Allocate 2 rows of ARGB for source conversion. - const int kRowSize = (src_width * 4 + 15) & ~15; - align_buffer_64(argb_cnv_row, kRowSize * 2); - uint8* argb_cnv_rowptr = argb_cnv_row; - int argb_cnv_rowstride = kRowSize; - - YUVBuferIter iter; - iter.src_width = src_width; - iter.src_height = src_height; - iter.src_stride_y = src_stride_y; - iter.src_stride_u = src_stride_u; - iter.src_stride_v = src_stride_v; - iter.src_y = src_y; - iter.src_u = src_u; - iter.src_v = src_v; - YUVBuferIter_Init(iter, src_fourcc, yuv_color_space); - - void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) = - filtering == kFilterNone ? ScaleARGBRowDown2_C : - (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C : - ScaleARGBRowDown2Box_C); - assert(dx == 65536 * 2); // Test scale factor of 2. - assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. - // Advance to odd row, even column. - int yi = y >> 16; - iter.MoveTo(iter, yi); - ptrdiff_t x_offset; - if (filtering == kFilterBilinear) { - x_offset = (x >> 16) * 4; - } else { - x_offset = ((x >> 16) - 1) * 4; - } -#if defined(HAS_SCALEARGBROWDOWN2_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_SSE2 : - (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 : - ScaleARGBRowDown2Box_Any_SSE2); - if (IS_ALIGNED(dst_width, 4)) { - ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 : - (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 : - ScaleARGBRowDown2Box_SSE2); - } - } - -#endif -#if defined(HAS_SCALEARGBROWDOWN2_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_NEON : - (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON : - ScaleARGBRowDown2Box_Any_NEON); - if (IS_ALIGNED(dst_width, 8)) { - ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON : - (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON : - ScaleARGBRowDown2Box_NEON); - } - } -#endif - - const int dyi = dy >> 16; - int lastyi = yi; - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr); - // Prepare next row if necessary - if (filtering != kFilterLinear) { - if ((yi + dyi) < (src_height - 1)) { - iter.MoveTo(iter, yi + dyi); - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride); - } else { - argb_cnv_rowstride = 0; - } - } - - if (filtering == kFilterLinear) { - argb_cnv_rowstride = 0; - } - const int max_yi = src_height - 1; - const int max_yi_minus_dyi = max_yi - dyi; - for (j = 0; j < dst_height; ++j) { - if (yi != lastyi) { - if (yi > max_yi) { - yi = max_yi; - } - if (yi != lastyi) { - if (filtering == kFilterLinear) { - iter.MoveTo(iter, yi); - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr); - lastyi = yi; - } else { - // Prepare current row - if (yi == iter.y_index) { - argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride; - argb_cnv_rowstride = - argb_cnv_rowstride; - } else { - iter.MoveTo(iter, yi); - argb_cnv_rowptr = argb_cnv_row; - argb_cnv_rowstride = kRowSize; - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr); - } - // Prepare next row if necessary - if (iter.y_index < max_yi) { - int next_yi = yi < max_yi_minus_dyi ? yi + dyi : max_yi; - iter.MoveTo(iter, next_yi); - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride); - } else { - argb_cnv_rowstride = 0; - } - lastyi = yi; - } - } - } - ScaleARGBRowDown2(argb_cnv_rowptr + x_offset, argb_cnv_rowstride, dst_argb, dst_width); - dst_argb += dst_stride_argb; - yi += dyi; - } - - free_aligned_buffer_64(argb_cnv_row); -} - -// ScaleARGB ARGB Even -// This is an optimized version for scaling down a ARGB to even -// multiple of its original size. -static void ScaleYUVToARGBDownEven(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride_y, - int src_stride_u, - int src_stride_v, - int dst_stride_argb, - const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int x, int dx, int y, int dy, - enum FilterMode filtering, - uint32 src_fourcc, - mozilla::YUVColorSpace yuv_color_space) { - int j; - // Allocate 2 rows of ARGB for source conversion. - const int kRowSize = (src_width * 4 + 15) & ~15; - align_buffer_64(argb_cnv_row, kRowSize * 2); - uint8* argb_cnv_rowptr = argb_cnv_row; - int argb_cnv_rowstride = kRowSize; - - int col_step = dx >> 16; - void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride, - int src_step, uint8* dst_argb, int dst_width) = - filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C; - assert(IS_ALIGNED(src_width, 2)); - assert(IS_ALIGNED(src_height, 2)); - int yi = y >> 16; - const ptrdiff_t x_offset = (x >> 16) * 4; - -#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 : - ScaleARGBRowDownEven_Any_SSE2; - if (IS_ALIGNED(dst_width, 4)) { - ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 : - ScaleARGBRowDownEven_SSE2; - } - } -#endif -#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON : - ScaleARGBRowDownEven_Any_NEON; - if (IS_ALIGNED(dst_width, 4)) { - ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON : - ScaleARGBRowDownEven_NEON; - } - } -#endif - - YUVBuferIter iter; - iter.src_width = src_width; - iter.src_height = src_height; - iter.src_stride_y = src_stride_y; - iter.src_stride_u = src_stride_u; - iter.src_stride_v = src_stride_v; - iter.src_y = src_y; - iter.src_u = src_u; - iter.src_v = src_v; - YUVBuferIter_Init(iter, src_fourcc, yuv_color_space); - - const int dyi = dy >> 16; - int lastyi = yi; - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr); - // Prepare next row if necessary - if (filtering != kFilterLinear) { - if ((yi + dyi) < (src_height - 1)) { - iter.MoveTo(iter, yi + dyi); - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride); - } else { - argb_cnv_rowstride = 0; - } - } - - if (filtering == kFilterLinear) { - argb_cnv_rowstride = 0; - } - const int max_yi = src_height - 1; - const int max_yi_minus_dyi = max_yi - dyi; - for (j = 0; j < dst_height; ++j) { - if (yi != lastyi) { - if (yi > max_yi) { - yi = max_yi; - } - if (yi != lastyi) { - if (filtering == kFilterLinear) { - iter.MoveTo(iter, yi); - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr); - lastyi = yi; - } else { - // Prepare current row - if (yi == iter.y_index) { - argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride; - argb_cnv_rowstride = - argb_cnv_rowstride; - } else { - iter.MoveTo(iter, yi); - argb_cnv_rowptr = argb_cnv_row; - argb_cnv_rowstride = kRowSize; - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr); - } - // Prepare next row if necessary - if (iter.y_index < max_yi) { - int next_yi = yi < max_yi_minus_dyi ? yi + dyi : max_yi; - iter.MoveTo(iter, next_yi); - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride); - } else { - argb_cnv_rowstride = 0; - } - lastyi = yi; - } - } - } - ScaleARGBRowDownEven(argb_cnv_rowptr + x_offset, argb_cnv_rowstride, col_step, dst_argb, dst_width); - dst_argb += dst_stride_argb; - yi += dyi; - } - free_aligned_buffer_64(argb_cnv_row); -} - -// Scale YUV to ARGB down with bilinear interpolation. -static void ScaleYUVToARGBBilinearDown(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride_y, - int src_stride_u, - int src_stride_v, - int dst_stride_argb, - const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int x, int dx, int y, int dy, - enum FilterMode filtering, - uint32 src_fourcc, - mozilla::YUVColorSpace yuv_color_space) { - int j; - void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, - ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - InterpolateRow_C; - void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) = - (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C; - int64 xlast = x + (int64)(dst_width - 1) * dx; - int64 xl = (dx >= 0) ? x : xlast; - int64 xr = (dx >= 0) ? xlast : x; - int clip_src_width; - xl = (xl >> 16) & ~3; // Left edge aligned. - xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels. - xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel. - if (xr > src_width) { - xr = src_width; - } - clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4. - const ptrdiff_t xl_offset = xl * 4; - x -= (int)(xl << 16); - - // Allocate 2 row of ARGB for source conversion. - const int kRowSize = (src_width * 4 + 15) & ~15; - align_buffer_64(argb_cnv_row, kRowSize * 2); - uint8* argb_cnv_rowptr = argb_cnv_row; - int argb_cnv_rowstride = kRowSize; - -#if defined(HAS_INTERPOLATEROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - InterpolateRow = InterpolateRow_Any_SSSE3; - if (IS_ALIGNED(clip_src_width, 16)) { - InterpolateRow = InterpolateRow_SSSE3; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - InterpolateRow = InterpolateRow_Any_AVX2; - if (IS_ALIGNED(clip_src_width, 32)) { - InterpolateRow = InterpolateRow_AVX2; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - InterpolateRow = InterpolateRow_Any_NEON; - if (IS_ALIGNED(clip_src_width, 16)) { - InterpolateRow = InterpolateRow_NEON; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && - IS_ALIGNED(src_argb, 4) && IS_ALIGNED(argb_cnv_rowstride, 4)) { - InterpolateRow = InterpolateRow_Any_DSPR2; - if (IS_ALIGNED(clip_src_width, 4)) { - InterpolateRow = InterpolateRow_DSPR2; - } - } -#endif -#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { - ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; - } -#endif -#if defined(HAS_SCALEARGBFILTERCOLS_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON; - if (IS_ALIGNED(dst_width, 4)) { - ScaleARGBFilterCols = ScaleARGBFilterCols_NEON; - } - } -#endif - - int yi = y >> 16; - - YUVBuferIter iter; - iter.src_width = src_width; - iter.src_height = src_height; - iter.src_stride_y = src_stride_y; - iter.src_stride_u = src_stride_u; - iter.src_stride_v = src_stride_v; - iter.src_y = src_y; - iter.src_u = src_u; - iter.src_v = src_v; - YUVBuferIter_Init(iter, src_fourcc, yuv_color_space); - iter.MoveTo(iter, yi); - - // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. - // Allocate a row of ARGB. - align_buffer_64(row, clip_src_width * 4); - - int lastyi = yi; - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr); - // Prepare next row if necessary - if (filtering != kFilterLinear) { - if ((yi + 1) < src_height) { - iter.MoveToNextRow(iter); - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride); - } else { - argb_cnv_rowstride = 0; - } - } - - const int max_y = (src_height - 1) << 16; - const int max_yi = src_height - 1; - for (j = 0; j < dst_height; ++j) { - yi = y >> 16; - if (yi != lastyi) { - if (y > max_y) { - y = max_y; - yi = y >> 16; - } - if (yi != lastyi) { - if (filtering == kFilterLinear) { - iter.MoveTo(iter, yi); - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr); - lastyi = yi; - } else { - // Prepare current row - if (yi == iter.y_index) { - argb_cnv_rowptr = argb_cnv_rowptr + argb_cnv_rowstride; - argb_cnv_rowstride = - argb_cnv_rowstride; - } else { - iter.MoveTo(iter, yi); - argb_cnv_rowptr = argb_cnv_row; - argb_cnv_rowstride = kRowSize; - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr); - } - // Prepare next row if necessary - if (iter.y_index < max_yi) { - iter.MoveToNextRow(iter); - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_rowptr + argb_cnv_rowstride); - } else { - argb_cnv_rowstride = 0; - } - lastyi = yi; - } - } - } - if (filtering == kFilterLinear) { - ScaleARGBFilterCols(dst_argb, argb_cnv_rowptr + xl_offset, dst_width, x, dx); - } else { - int yf = (y >> 8) & 255; - InterpolateRow(row, argb_cnv_rowptr + xl_offset, argb_cnv_rowstride, clip_src_width, yf); - ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx); - } - dst_argb += dst_stride_argb; - y += dy; - } - free_aligned_buffer_64(row); - free_aligned_buffer_64(argb_cnv_row); -} - -// Scale YUV to ARGB up with bilinear interpolation. -static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride_y, - int src_stride_u, - int src_stride_v, - int dst_stride_argb, - const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int x, int dx, int y, int dy, - enum FilterMode filtering, - uint32 src_fourcc, - mozilla::YUVColorSpace yuv_color_space) { - int j; - void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, - ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - InterpolateRow_C; - void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) = - filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; - const int max_y = (src_height - 1) << 16; - - // Allocate 1 row of ARGB for source conversion. - align_buffer_64(argb_cnv_row, src_width * 4); - -#if defined(HAS_INTERPOLATEROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - InterpolateRow = InterpolateRow_Any_SSSE3; - if (IS_ALIGNED(dst_width, 4)) { - InterpolateRow = InterpolateRow_SSSE3; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - InterpolateRow = InterpolateRow_Any_AVX2; - if (IS_ALIGNED(dst_width, 8)) { - InterpolateRow = InterpolateRow_AVX2; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - InterpolateRow = InterpolateRow_Any_NEON; - if (IS_ALIGNED(dst_width, 4)) { - InterpolateRow = InterpolateRow_NEON; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_DSPR2) - if (TestCpuFlag(kCpuHasDSPR2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - InterpolateRow = InterpolateRow_DSPR2; - } -#endif - if (src_width >= 32768) { - ScaleARGBFilterCols = filtering ? - ScaleARGBFilterCols64_C : ScaleARGBCols64_C; - } -#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) - if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { - ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; - } -#endif -#if defined(HAS_SCALEARGBFILTERCOLS_NEON) - if (filtering && TestCpuFlag(kCpuHasNEON)) { - ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON; - if (IS_ALIGNED(dst_width, 4)) { - ScaleARGBFilterCols = ScaleARGBFilterCols_NEON; - } - } -#endif -#if defined(HAS_SCALEARGBCOLS_SSE2) - if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { - ScaleARGBFilterCols = ScaleARGBCols_SSE2; - } -#endif -#if defined(HAS_SCALEARGBCOLS_NEON) - if (!filtering && TestCpuFlag(kCpuHasNEON)) { - ScaleARGBFilterCols = ScaleARGBCols_Any_NEON; - if (IS_ALIGNED(dst_width, 8)) { - ScaleARGBFilterCols = ScaleARGBCols_NEON; - } - } -#endif - if (!filtering && src_width * 2 == dst_width && x < 0x8000) { - ScaleARGBFilterCols = ScaleARGBColsUp2_C; -#if defined(HAS_SCALEARGBCOLSUP2_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { - ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; - } -#endif - } - - if (y > max_y) { - y = max_y; - } - - int yi = y >> 16; - - YUVBuferIter iter; - iter.src_width = src_width; - iter.src_height = src_height; - iter.src_stride_y = src_stride_y; - iter.src_stride_u = src_stride_u; - iter.src_stride_v = src_stride_v; - iter.src_y = src_y; - iter.src_u = src_u; - iter.src_v = src_v; - YUVBuferIter_Init(iter, src_fourcc, yuv_color_space); - iter.MoveTo(iter, yi); - - // Allocate 2 rows of ARGB. - const int kRowSize = (dst_width * 4 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); - - uint8* rowptr = row; - int rowstride = kRowSize; - int lastyi = yi; - - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row); - ScaleARGBFilterCols(rowptr, argb_cnv_row, dst_width, x, dx); - - if (filtering == kFilterLinear) { - rowstride = 0; - } - // Prepare next row if necessary - if (filtering != kFilterLinear) { - if ((yi + 1) < src_height) { - iter.MoveToNextRow(iter); - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row); - ScaleARGBFilterCols(rowptr + rowstride, argb_cnv_row, dst_width, x, dx); - }else { - rowstride = 0; - } - } - - const int max_yi = src_height - 1; - for (j = 0; j < dst_height; ++j) { - yi = y >> 16; - if (yi != lastyi) { - if (y > max_y) { - y = max_y; - yi = y >> 16; - } - if (yi != lastyi) { - if (filtering == kFilterLinear) { - iter.MoveToNextRow(iter); - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row); - ScaleARGBFilterCols(rowptr, argb_cnv_row, dst_width, x, dx); - } else { - // Prepare next row if necessary - if (yi < max_yi) { - iter.MoveToNextRow(iter); - rowptr += rowstride; - rowstride = -rowstride; - // TODO(fbarchard): Convert the clipped region of row. - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row); - ScaleARGBFilterCols(rowptr + rowstride, argb_cnv_row, dst_width, x, dx); - } else { - rowstride = 0; - } - } - lastyi = yi; - } - } - if (filtering == kFilterLinear) { - InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0); - } else { - int yf = (y >> 8) & 255; - InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); - } - dst_argb += dst_stride_argb; - y += dy; - } - free_aligned_buffer_64(row); - free_aligned_buffer_64(argb_cnv_row); -} - -// Scale ARGB to/from any dimensions, without interpolation. -// Fixed point math is used for performance: The upper 16 bits -// of x and dx is the integer part of the source position and -// the lower 16 bits are the fixed decimal part. - -static void ScaleYUVToARGBSimple(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride_y, - int src_stride_u, - int src_stride_v, - int dst_stride_argb, - const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int x, int dx, int y, int dy, - uint32 src_fourcc, - mozilla::YUVColorSpace yuv_color_space) { - int j; - void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) = - (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C; - - // Allocate 1 row of ARGB for source conversion. - align_buffer_64(argb_cnv_row, src_width * 4); - -#if defined(HAS_SCALEARGBCOLS_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { - ScaleARGBCols = ScaleARGBCols_SSE2; - } -#endif -#if defined(HAS_SCALEARGBCOLS_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ScaleARGBCols = ScaleARGBCols_Any_NEON; - if (IS_ALIGNED(dst_width, 8)) { - ScaleARGBCols = ScaleARGBCols_NEON; - } - } -#endif - if (src_width * 2 == dst_width && x < 0x8000) { - ScaleARGBCols = ScaleARGBColsUp2_C; -#if defined(HAS_SCALEARGBCOLSUP2_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { - ScaleARGBCols = ScaleARGBColsUp2_SSE2; - } -#endif - } - - int yi = y >> 16; - - YUVBuferIter iter; - iter.src_width = src_width; - iter.src_height = src_height; - iter.src_stride_y = src_stride_y; - iter.src_stride_u = src_stride_u; - iter.src_stride_v = src_stride_v; - iter.src_y = src_y; - iter.src_u = src_u; - iter.src_v = src_v; - YUVBuferIter_Init(iter, src_fourcc, yuv_color_space); - iter.MoveTo(iter, yi); - - int lasty = yi; - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row); - - for (j = 0; j < dst_height; ++j) { - yi = y >> 16; - if (yi != lasty) { - iter.MoveTo(iter, yi); - YUVBuferIter_ConvertToARGBRow(iter, argb_cnv_row); - lasty = yi; - } - ScaleARGBCols(dst_argb, argb_cnv_row, dst_width, x, dx); - dst_argb += dst_stride_argb; - y += dy; - } - free_aligned_buffer_64(argb_cnv_row); -} - -static void YUVToARGBCopy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - int dst_width, int dst_height, - uint32 src_fourcc, - mozilla::YUVColorSpace yuv_color_space) -{ - YUVBuferIter iter; - iter.src_width = src_width; - iter.src_height = src_height; - iter.src_stride_y = src_stride_y; - iter.src_stride_u = src_stride_u; - iter.src_stride_v = src_stride_v; - iter.src_y = src_y; - iter.src_u = src_u; - iter.src_v = src_v; - YUVBuferIter_Init(iter, src_fourcc, yuv_color_space); - - for (int j = 0; j < dst_height; ++j) { - YUVBuferIter_ConvertToARGBRow(iter, dst_argb); - iter.MoveToNextRow(iter); - dst_argb += dst_stride_argb; - } -} - -static void ScaleYUVToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - int dst_width, int dst_height, - enum FilterMode filtering, - uint32 src_fourcc, - mozilla::YUVColorSpace yuv_color_space) -{ - // Initial source x/y coordinate and step values as 16.16 fixed point. - int x = 0; - int y = 0; - int dx = 0; - int dy = 0; - // ARGB does not support box filter yet, but allow the user to pass it. - // Simplify filtering when possible. - filtering = ScaleFilterReduce(src_width, src_height, - dst_width, dst_height, - filtering); - ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, - &x, &y, &dx, &dy); - - // Special case for integer step values. - if (((dx | dy) & 0xffff) == 0) { - if (!dx || !dy) { // 1 pixel wide and/or tall. - filtering = kFilterNone; - } else { - // Optimized even scale down. ie 2, 4, 6, 8, 10x. - if (!(dx & 0x10000) && !(dy & 0x10000)) { - if (dx == 0x20000) { - // Optimized 1/2 downsample. - ScaleYUVToARGBDown2(src_width, src_height, - dst_width, dst_height, - src_stride_y, - src_stride_u, - src_stride_v, - dst_stride_argb, - src_y, - src_u, - src_v, - dst_argb, - x, dx, y, dy, - filtering, - src_fourcc, - yuv_color_space); - return; - } - ScaleYUVToARGBDownEven(src_width, src_height, - dst_width, dst_height, - src_stride_y, - src_stride_u, - src_stride_v, - dst_stride_argb, - src_y, - src_u, - src_v, - dst_argb, - x, dx, y, dy, - filtering, - src_fourcc, - yuv_color_space); - return; - } - // Optimized odd scale down. ie 3, 5, 7, 9x. - if ((dx & 0x10000) && (dy & 0x10000)) { - filtering = kFilterNone; - if (dx == 0x10000 && dy == 0x10000) { - // Straight conversion and copy. - YUVToARGBCopy(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - src_width, src_height, - dst_argb, dst_stride_argb, - dst_width, dst_height, - src_fourcc, - yuv_color_space); - return; - } - } - } - } - if (filtering && dy < 65536) { - ScaleYUVToARGBBilinearUp(src_width, src_height, - dst_width, dst_height, - src_stride_y, - src_stride_u, - src_stride_v, - dst_stride_argb, - src_y, - src_u, - src_v, - dst_argb, - x, dx, y, dy, - filtering, - src_fourcc, - yuv_color_space); - return; - } - if (filtering) { - ScaleYUVToARGBBilinearDown(src_width, src_height, - dst_width, dst_height, - src_stride_y, - src_stride_u, - src_stride_v, - dst_stride_argb, - src_y, - src_u, - src_v, - dst_argb, - x, dx, y, dy, - filtering, - src_fourcc, - yuv_color_space); - return; - } - ScaleYUVToARGBSimple(src_width, src_height, - dst_width, dst_height, - src_stride_y, - src_stride_u, - src_stride_v, - dst_stride_argb, - src_y, - src_u, - src_v, - dst_argb, - x, dx, y, dy, - src_fourcc, - yuv_color_space); -} - -bool IsConvertSupported(uint32 src_fourcc) -{ - if (src_fourcc == FOURCC_I444 || - src_fourcc == FOURCC_I422 || - src_fourcc == FOURCC_I420) { - return true; - } - return false; -} - -LIBYUV_API -int YUVToARGBScale(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint32 src_fourcc, - mozilla::YUVColorSpace yuv_color_space, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - int dst_width, int dst_height, - enum FilterMode filtering) -{ - if (!src_y || !src_u || !src_v || - src_width == 0 || src_height == 0 || - !dst_argb || dst_width <= 0 || dst_height <= 0) { - return -1; - } - if (!IsConvertSupported(src_fourcc)) { - return -1; - } - ScaleYUVToARGB(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - src_width, src_height, - dst_argb, dst_stride_argb, - dst_width, dst_height, - filtering, - src_fourcc, - yuv_color_space); - return 0; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/gfx/ycbcr/scale_yuv_argb.h b/gfx/ycbcr/scale_yuv_argb.h deleted file mode 100644 index d1a42db1b..000000000 --- a/gfx/ycbcr/scale_yuv_argb.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_SCALE_YUV_ARGB_H_ // NOLINT -#define INCLUDE_LIBYUV_SCALE_YUV_ARGB_H_ - -#include "libyuv/basic_types.h" -#include "libyuv/scale.h" // For FilterMode - -#include "ImageTypes.h" // For YUVColorSpace - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -int YUVToARGBScale(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint32 src_fourcc, - mozilla::YUVColorSpace yuv_color_space, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - int dst_width, int dst_height, - enum FilterMode filtering); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_SCALE_YUV_ARGB_H_ NOLINT diff --git a/gfx/ycbcr/update.sh b/gfx/ycbcr/update.sh deleted file mode 100644 index 3a38fe81a..000000000 --- a/gfx/ycbcr/update.sh +++ /dev/null @@ -1,12 +0,0 @@ -# update.sh <chromium-src-directory> -cp $1/media/base/yuv_convert.h . -cp $1/media/base/yuv_convert.cc yuv_convert.cpp -cp $1/media/base/yuv_row.h . -cp $1/media/base/yuv_row_table.cc yuv_row_table.cpp -cp $1/media/base/yuv_row_posix.cc yuv_row_posix.cpp -cp $1/media/base/yuv_row_win.cc yuv_row_win.cpp -cp $1/media/base/yuv_row_posix.cc yuv_row_c.cpp -patch -p3 <convert.patch -patch -p3 <win64.patch -patch -p3 <TypeFromSize.patch -patch -p3 <QuellGccWarnings.patch diff --git a/gfx/ycbcr/win64.patch b/gfx/ycbcr/win64.patch deleted file mode 100644 index bdccf2784..000000000 --- a/gfx/ycbcr/win64.patch +++ /dev/null @@ -1,210 +0,0 @@ -diff --git a/gfx/ycbcr/yuv_row_win64.cpp b/gfx/ycbcr/yuv_row_win64.cpp -new file mode 100644 ---- /dev/null -+++ b/gfx/ycbcr/yuv_row_win64.cpp -@@ -0,0 +1,205 @@ -+// Copyright (c) 2010 The Chromium Authors. All rights reserved. -+// Use of this source code is governed by a BSD-style license that can be -+// found in the LICENSE file. -+ -+#include "yuv_row.h" -+ -+extern "C" { -+ -+// x64 compiler doesn't support MMX and inline assembler. Use SSE2 intrinsics. -+ -+#define kCoefficientsRgbU (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 2048) -+#define kCoefficientsRgbV (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 4096) -+ -+#include <emmintrin.h> -+ -+static void FastConvertYUVToRGB32Row_SSE2(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width) { -+ __m128i xmm0, xmmY1, xmmY2; -+ __m128 xmmY; -+ -+ while (width >= 2) { -+ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf++)), -+ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf++))); -+ -+ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++)); -+ xmmY1 = _mm_adds_epi16(xmmY1, xmm0); -+ -+ xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++)); -+ xmmY2 = _mm_adds_epi16(xmmY2, xmm0); -+ -+ xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2), -+ 0x44); -+ xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6); -+ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); -+ -+ _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1); -+ rgb_buf += 8; -+ width -= 2; -+ } -+ -+ if (width) { -+ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf)), -+ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf))); -+ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf)); -+ xmmY1 = _mm_adds_epi16(xmmY1, xmm0); -+ xmmY1 = _mm_srai_epi16(xmmY1, 6); -+ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); -+ *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1); -+ } -+} -+ -+static void ScaleYUVToRGB32Row_SSE2(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx) { -+ __m128i xmm0, xmmY1, xmmY2; -+ __m128 xmmY; -+ uint8 u, v, y; -+ int x = 0; -+ -+ while (width >= 2) { -+ u = u_buf[x >> 17]; -+ v = v_buf[x >> 17]; -+ y = y_buf[x >> 16]; -+ x += source_dx; -+ -+ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)), -+ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v))); -+ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y)); -+ xmmY1 = _mm_adds_epi16(xmmY1, xmm0); -+ -+ y = y_buf[x >> 16]; -+ x += source_dx; -+ -+ xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y)); -+ xmmY2 = _mm_adds_epi16(xmmY2, xmm0); -+ -+ xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2), -+ 0x44); -+ xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6); -+ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); -+ -+ _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1); -+ rgb_buf += 8; -+ width -= 2; -+ } -+ -+ if (width) { -+ u = u_buf[x >> 17]; -+ v = v_buf[x >> 17]; -+ y = y_buf[x >> 16]; -+ -+ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)), -+ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v))); -+ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y)); -+ xmmY1 = _mm_adds_epi16(xmmY1, xmm0); -+ xmmY1 = _mm_srai_epi16(xmmY1, 6); -+ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); -+ *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1); -+ } -+} -+ -+static void LinearScaleYUVToRGB32Row_SSE2(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx) { -+ __m128i xmm0, xmmY1, xmmY2; -+ __m128 xmmY; -+ uint8 u0, u1, v0, v1, y0, y1; -+ uint32 uv_frac, y_frac, u, v, y; -+ int x = 0; -+ -+ if (source_dx >= 0x20000) { -+ x = 32768; -+ } -+ -+ while(width >= 2) { -+ u0 = u_buf[x >> 17]; -+ u1 = u_buf[(x >> 17) + 1]; -+ v0 = v_buf[x >> 17]; -+ v1 = v_buf[(x >> 17) + 1]; -+ y0 = y_buf[x >> 16]; -+ y1 = y_buf[(x >> 16) + 1]; -+ uv_frac = (x & 0x1fffe); -+ y_frac = (x & 0xffff); -+ u = (uv_frac * u1 + (uv_frac ^ 0x1fffe) * u0) >> 17; -+ v = (uv_frac * v1 + (uv_frac ^ 0x1fffe) * v0) >> 17; -+ y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16; -+ x += source_dx; -+ -+ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)), -+ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v))); -+ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y)); -+ xmmY1 = _mm_adds_epi16(xmmY1, xmm0); -+ -+ y0 = y_buf[x >> 16]; -+ y1 = y_buf[(x >> 16) + 1]; -+ y_frac = (x & 0xffff); -+ y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16; -+ x += source_dx; -+ -+ xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y)); -+ xmmY2 = _mm_adds_epi16(xmmY2, xmm0); -+ -+ xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2), -+ 0x44); -+ xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6); -+ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); -+ -+ _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1); -+ rgb_buf += 8; -+ width -= 2; -+ } -+ -+ if (width) { -+ u = u_buf[x >> 17]; -+ v = v_buf[x >> 17]; -+ y = y_buf[x >> 16]; -+ -+ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)), -+ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v))); -+ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y)); -+ -+ xmmY1 = _mm_adds_epi16(xmmY1, xmm0); -+ xmmY1 = _mm_srai_epi16(xmmY1, 6); -+ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); -+ *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1); -+ } -+} -+ -+void FastConvertYUVToRGB32Row(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width) { -+ FastConvertYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width); -+} -+ -+void ScaleYUVToRGB32Row(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx) { -+ ScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); -+} -+ -+void LinearScaleYUVToRGB32Row(const uint8* y_buf, -+ const uint8* u_buf, -+ const uint8* v_buf, -+ uint8* rgb_buf, -+ int width, -+ int source_dx) { -+ LinearScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width, -+ source_dx); -+} -+ -+} // extern "C" diff --git a/gfx/ycbcr/ycbcr_to_rgb565.cpp b/gfx/ycbcr/ycbcr_to_rgb565.cpp deleted file mode 100644 index 0572e3e09..000000000 --- a/gfx/ycbcr/ycbcr_to_rgb565.cpp +++ /dev/null @@ -1,672 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include <stdlib.h> -#include <limits.h> -#include "nsDebug.h" -#include "ycbcr_to_rgb565.h" -#include "nsAlgorithm.h" - - - -#ifdef HAVE_YCBCR_TO_RGB565 - -namespace mozilla { - -namespace gfx { - -/*This contains all of the parameters that are needed to convert a row. - Passing them in a struct instead of as individual parameters saves the need - to continually push onto the stack the ones that are fixed for every row.*/ -struct yuv2rgb565_row_scale_bilinear_ctx{ - uint16_t *rgb_row; - const uint8_t *y_row; - const uint8_t *u_row; - const uint8_t *v_row; - int y_yweight; - int y_pitch; - int width; - int source_x0_q16; - int source_dx_q16; - /*Not used for 4:4:4, except with chroma-nearest.*/ - int source_uv_xoffs_q16; - /*Not used for 4:4:4 or chroma-nearest.*/ - int uv_pitch; - /*Not used for 4:2:2, 4:4:4, or chroma-nearest.*/ - int uv_yweight; -}; - - - -/*This contains all of the parameters that are needed to convert a row. - Passing them in a struct instead of as individual parameters saves the need - to continually push onto the stack the ones that are fixed for every row.*/ -struct yuv2rgb565_row_scale_nearest_ctx{ - uint16_t *rgb_row; - const uint8_t *y_row; - const uint8_t *u_row; - const uint8_t *v_row; - int width; - int source_x0_q16; - int source_dx_q16; - /*Not used for 4:4:4.*/ - int source_uv_xoffs_q16; -}; - - - -typedef void (*yuv2rgb565_row_scale_bilinear_func)( - const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither); - -typedef void (*yuv2rgb565_row_scale_nearest_func)( - const yuv2rgb565_row_scale_nearest_ctx *ctx, int dither); - - - -//TODO: fix NEON asm for iOS -# if defined(MOZILLA_MAY_SUPPORT_NEON) && !defined(__APPLE__) - -extern "C" void ScaleYCbCr42xToRGB565_BilinearY_Row_NEON( - const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither); - -void __attribute((noinline)) yuv42x_to_rgb565_row_neon(uint16 *dst, - const uint8 *y, - const uint8 *u, - const uint8 *v, - int n, - int oddflag); - -#endif - - - -/*Bilinear interpolation of a single value. - This uses the exact same formulas as the asm, even though it adds some extra - shifts that do nothing but reduce accuracy.*/ -static int bislerp(const uint8_t *row, - int pitch, - int source_x, - int xweight, - int yweight) { - int a; - int b; - int c; - int d; - a = row[source_x]; - b = row[source_x+1]; - c = row[source_x+pitch]; - d = row[source_x+pitch+1]; - a = ((a<<8)+(c-a)*yweight+128)>>8; - b = ((b<<8)+(d-b)*yweight+128)>>8; - return ((a<<8)+(b-a)*xweight+128)>>8; -} - -/*Convert a single pixel from Y'CbCr to RGB565. - This uses the exact same formulas as the asm, even though we could make the - constants a lot more accurate with 32-bit wide registers.*/ -static uint16_t yu2rgb565(int y, int u, int v, int dither) { - /*This combines the constant offset that needs to be added during the Y'CbCr - conversion with a rounding offset that depends on the dither parameter.*/ - static const int DITHER_BIAS[4][3]={ - {-14240, 8704, -17696}, - {-14240+128,8704+64, -17696+128}, - {-14240+256,8704+128,-17696+256}, - {-14240+384,8704+192,-17696+384} - }; - int r; - int g; - int b; - r = clamped((74*y+102*v+DITHER_BIAS[dither][0])>>9, 0, 31); - g = clamped((74*y-25*u-52*v+DITHER_BIAS[dither][1])>>8, 0, 63); - b = clamped((74*y+129*u+DITHER_BIAS[dither][2])>>9, 0, 31); - return (uint16_t)(r<<11 | g<<5 | b); -} - -static void ScaleYCbCr420ToRGB565_Bilinear_Row_C( - const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){ - int x; - int source_x_q16; - source_x_q16 = ctx->source_x0_q16; - for (x = 0; x < ctx->width; x++) { - int source_x; - int xweight; - int y; - int u; - int v; - xweight = ((source_x_q16&0xFFFF)+128)>>8; - source_x = source_x_q16>>16; - y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight); - xweight = (((source_x_q16+ctx->source_uv_xoffs_q16)&0x1FFFF)+256)>>9; - source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>17; - source_x_q16 += ctx->source_dx_q16; - u = bislerp(ctx->u_row, ctx->uv_pitch, source_x, xweight, ctx->uv_yweight); - v = bislerp(ctx->v_row, ctx->uv_pitch, source_x, xweight, ctx->uv_yweight); - ctx->rgb_row[x] = yu2rgb565(y, u, v, dither); - dither ^= 3; - } -} - -static void ScaleYCbCr422ToRGB565_Bilinear_Row_C( - const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){ - int x; - int source_x_q16; - source_x_q16 = ctx->source_x0_q16; - for (x = 0; x < ctx->width; x++) { - int source_x; - int xweight; - int y; - int u; - int v; - xweight = ((source_x_q16&0xFFFF)+128)>>8; - source_x = source_x_q16>>16; - y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight); - xweight = (((source_x_q16+ctx->source_uv_xoffs_q16)&0x1FFFF)+256)>>9; - source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>17; - source_x_q16 += ctx->source_dx_q16; - u = bislerp(ctx->u_row, ctx->uv_pitch, source_x, xweight, ctx->y_yweight); - v = bislerp(ctx->v_row, ctx->uv_pitch, source_x, xweight, ctx->y_yweight); - ctx->rgb_row[x] = yu2rgb565(y, u, v, dither); - dither ^= 3; - } -} - -static void ScaleYCbCr444ToRGB565_Bilinear_Row_C( - const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){ - int x; - int source_x_q16; - source_x_q16 = ctx->source_x0_q16; - for (x = 0; x < ctx->width; x++) { - int source_x; - int xweight; - int y; - int u; - int v; - xweight = ((source_x_q16&0xFFFF)+128)>>8; - source_x = source_x_q16>>16; - source_x_q16 += ctx->source_dx_q16; - y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight); - u = bislerp(ctx->u_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight); - v = bislerp(ctx->v_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight); - ctx->rgb_row[x] = yu2rgb565(y, u, v, dither); - dither ^= 3; - } -} - -static void ScaleYCbCr42xToRGB565_BilinearY_Row_C( - const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){ - int x; - int source_x_q16; - source_x_q16 = ctx->source_x0_q16; - for (x = 0; x < ctx->width; x++) { - int source_x; - int xweight; - int y; - int u; - int v; - xweight = ((source_x_q16&0xFFFF)+128)>>8; - source_x = source_x_q16>>16; - y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight); - source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>17; - source_x_q16 += ctx->source_dx_q16; - u = ctx->u_row[source_x]; - v = ctx->v_row[source_x]; - ctx->rgb_row[x] = yu2rgb565(y, u, v, dither); - dither ^= 3; - } -} - -static void ScaleYCbCr444ToRGB565_BilinearY_Row_C( - const yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither){ - int x; - int source_x_q16; - source_x_q16 = ctx->source_x0_q16; - for (x = 0; x < ctx->width; x++) { - int source_x; - int xweight; - int y; - int u; - int v; - xweight = ((source_x_q16&0xFFFF)+128)>>8; - source_x = source_x_q16>>16; - y = bislerp(ctx->y_row, ctx->y_pitch, source_x, xweight, ctx->y_yweight); - source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>16; - source_x_q16 += ctx->source_dx_q16; - u = ctx->u_row[source_x]; - v = ctx->v_row[source_x]; - ctx->rgb_row[x] = yu2rgb565(y, u, v, dither); - dither ^= 3; - } -} - -static void ScaleYCbCr42xToRGB565_Nearest_Row_C( - const yuv2rgb565_row_scale_nearest_ctx *ctx, int dither){ - int y; - int u; - int v; - int x; - int source_x_q16; - int source_x; - source_x_q16 = ctx->source_x0_q16; - for (x = 0; x < ctx->width; x++) { - source_x = source_x_q16>>16; - y = ctx->y_row[source_x]; - source_x = (source_x_q16+ctx->source_uv_xoffs_q16)>>17; - source_x_q16 += ctx->source_dx_q16; - u = ctx->u_row[source_x]; - v = ctx->v_row[source_x]; - ctx->rgb_row[x] = yu2rgb565(y, u, v, dither); - dither ^= 3; - } -} - -static void ScaleYCbCr444ToRGB565_Nearest_Row_C( - const yuv2rgb565_row_scale_nearest_ctx *ctx, int dither){ - int y; - int u; - int v; - int x; - int source_x_q16; - int source_x; - source_x_q16 = ctx->source_x0_q16; - for (x = 0; x < ctx->width; x++) { - source_x = source_x_q16>>16; - source_x_q16 += ctx->source_dx_q16; - y = ctx->y_row[source_x]; - u = ctx->u_row[source_x]; - v = ctx->v_row[source_x]; - ctx->rgb_row[x] = yu2rgb565(y, u, v, dither); - dither ^= 3; - } -} - -void ScaleYCbCrToRGB565(const uint8_t *y_buf, - const uint8_t *u_buf, - const uint8_t *v_buf, - uint8_t *rgb_buf, - int source_x0, - int source_y0, - int source_width, - int source_height, - int width, - int height, - int y_pitch, - int uv_pitch, - int rgb_pitch, - YUVType yuv_type, - ScaleFilter filter) { - int source_x0_q16; - int source_y0_q16; - int source_dx_q16; - int source_dy_q16; - int source_uv_xoffs_q16; - int source_uv_yoffs_q16; - int x_shift; - int y_shift; - int ymin; - int ymax; - int uvmin; - int uvmax; - int dither; - /*We don't support negative destination rectangles (just flip the source - instead), and for empty ones there's nothing to do.*/ - if (width <= 0 || height <= 0) - return; - /*These bounds are required to avoid 16.16 fixed-point overflow.*/ - NS_ASSERTION(source_x0 > (INT_MIN>>16) && source_x0 < (INT_MAX>>16), - "ScaleYCbCrToRGB565 source X offset out of bounds."); - NS_ASSERTION(source_x0+source_width > (INT_MIN>>16) - && source_x0+source_width < (INT_MAX>>16), - "ScaleYCbCrToRGB565 source width out of bounds."); - NS_ASSERTION(source_y0 > (INT_MIN>>16) && source_y0 < (INT_MAX>>16), - "ScaleYCbCrToRGB565 source Y offset out of bounds."); - NS_ASSERTION(source_y0+source_height > (INT_MIN>>16) - && source_y0+source_height < (INT_MAX>>16), - "ScaleYCbCrToRGB565 source height out of bounds."); - /*We require the same stride for Y' and Cb and Cr for 4:4:4 content.*/ - NS_ASSERTION(yuv_type != YV24 || y_pitch == uv_pitch, - "ScaleYCbCrToRGB565 luma stride differs from chroma for 4:4:4 content."); - /*We assume we can read outside the bounds of the input, because it makes - the code much simpler (and in practice is true: both Theora and VP8 return - padded reference frames). - In practice, we do not even _have_ the actual bounds of the source, as - we are passed a crop rectangle from it, and not the dimensions of the full - image. - This assertion will not guarantee our out-of-bounds reads are safe, but it - should at least catch the simple case of passing in an unpadded buffer.*/ - NS_ASSERTION(abs(y_pitch) >= abs(source_width)+16, - "ScaleYCbCrToRGB565 source image unpadded?"); - /*The NEON code requires the pointers to be aligned to a 16-byte boundary at - the start of each row. - This should be true for all of our sources. - We could try to fix this up if it's not true by adjusting source_x0, but - that would require the mis-alignment to be the same for the U and V - planes.*/ - NS_ASSERTION((y_pitch&15) == 0 && (uv_pitch&15) == 0 && - ((y_buf-(uint8_t *)nullptr)&15) == 0 && - ((u_buf-(uint8_t *)nullptr)&15) == 0 && - ((v_buf-(uint8_t *)nullptr)&15) == 0, - "ScaleYCbCrToRGB565 source image unaligned"); - /*We take an area-based approach to pixel coverage to avoid shifting by small - amounts (or not so small, when up-scaling or down-scaling by a large - factor). - - An illustrative example: scaling 4:2:0 up by 2, using JPEG chroma cositing^. - - + = RGB destination locations - * = Y' source locations - - = Cb, Cr source locations - - + + + + + + + + - * * * * - + + + + + + + + - - - - + + + + + + + + - * * * * - + + + + + + + + - - + + + + + + + + - * * * * - + + + + + + + + - - - - + + + + + + + + - * * * * - + + + + + + + + - - So, the coordinates of the upper-left + (first destination site) should - be (-0.25,-0.25) in the source Y' coordinate system. - Similarly, the coordinates should be (-0.375,-0.375) in the source Cb, Cr - coordinate system. - Note that the origin and scale of these two coordinate systems is not the - same! - - ^JPEG cositing is required for Theora; VP8 doesn't specify cositing rules, - but nearly all software converters in existence (at least those that are - open source, and many that are not) use JPEG cositing instead of MPEG.*/ - source_dx_q16 = (source_width<<16) / width; - source_x0_q16 = (source_x0<<16)+(source_dx_q16>>1)-0x8000; - source_dy_q16 = (source_height<<16) / height; - source_y0_q16 = (source_y0<<16)+(source_dy_q16>>1)-0x8000; - x_shift = (yuv_type != YV24); - y_shift = (yuv_type == YV12); - /*These two variables hold the difference between the origins of the Y' and - the Cb, Cr coordinate systems, using the scale of the Y' coordinate - system.*/ - source_uv_xoffs_q16 = -(x_shift<<15); - source_uv_yoffs_q16 = -(y_shift<<15); - /*Compute the range of source rows we'll actually use. - This doesn't guarantee we won't read outside this range.*/ - ymin = source_height >= 0 ? source_y0 : source_y0+source_height-1; - ymax = source_height >= 0 ? source_y0+source_height-1 : source_y0; - uvmin = ymin>>y_shift; - uvmax = ((ymax+1+y_shift)>>y_shift)-1; - /*Pick a dithering pattern. - The "&3" at the end is just in case RAND_MAX is lying.*/ - dither = (rand()/(RAND_MAX>>2))&3; - /*Nearest-neighbor scaling.*/ - if (filter == FILTER_NONE) { - yuv2rgb565_row_scale_nearest_ctx ctx; - yuv2rgb565_row_scale_nearest_func scale_row; - int y; - /*Add rounding offsets once, in advance.*/ - source_x0_q16 += 0x8000; - source_y0_q16 += 0x8000; - source_uv_xoffs_q16 += (x_shift<<15); - source_uv_yoffs_q16 += (y_shift<<15); - if (yuv_type == YV12) - scale_row = ScaleYCbCr42xToRGB565_Nearest_Row_C; - else - scale_row = ScaleYCbCr444ToRGB565_Nearest_Row_C; - ctx.width = width; - ctx.source_x0_q16 = source_x0_q16; - ctx.source_dx_q16 = source_dx_q16; - ctx.source_uv_xoffs_q16 = source_uv_xoffs_q16; - for (y=0; y<height; y++) { - int source_y; - ctx.rgb_row = (uint16_t *)(rgb_buf + y*rgb_pitch); - source_y = source_y0_q16>>16; - source_y = clamped(source_y, ymin, ymax); - ctx.y_row = y_buf + source_y*y_pitch; - source_y = (source_y0_q16+source_uv_yoffs_q16)>>(16+y_shift); - source_y = clamped(source_y, uvmin, uvmax); - source_y0_q16 += source_dy_q16; - ctx.u_row = u_buf + source_y*uv_pitch; - ctx.v_row = v_buf + source_y*uv_pitch; - (*scale_row)(&ctx, dither); - dither ^= 2; - } - } - /*Bilinear scaling.*/ - else { - yuv2rgb565_row_scale_bilinear_ctx ctx; - yuv2rgb565_row_scale_bilinear_func scale_row; - int uvxscale_min; - int uvxscale_max; - int uvyscale_min; - int uvyscale_max; - int y; - /*Check how close the chroma scaling is to unity. - If it's close enough, we can get away with nearest-neighbor chroma - sub-sampling, and only doing bilinear on luma. - If a given axis is subsampled, we use bounds on the luma step of - [0.67...2], which is equivalent to scaling chroma by [1...3]. - If it's not subsampled, we use bounds of [0.5...1.33], which is - equivalent to scaling chroma by [0.75...2]. - The lower bound is chosen as a trade-off between speed and how terrible - nearest neighbor looks when upscaling.*/ -# define CHROMA_NEAREST_SUBSAMP_STEP_MIN 0xAAAA -# define CHROMA_NEAREST_NORMAL_STEP_MIN 0x8000 -# define CHROMA_NEAREST_SUBSAMP_STEP_MAX 0x20000 -# define CHROMA_NEAREST_NORMAL_STEP_MAX 0x15555 - uvxscale_min = yuv_type != YV24 ? - CHROMA_NEAREST_SUBSAMP_STEP_MIN : CHROMA_NEAREST_NORMAL_STEP_MIN; - uvxscale_max = yuv_type != YV24 ? - CHROMA_NEAREST_SUBSAMP_STEP_MAX : CHROMA_NEAREST_NORMAL_STEP_MAX; - uvyscale_min = yuv_type == YV12 ? - CHROMA_NEAREST_SUBSAMP_STEP_MIN : CHROMA_NEAREST_NORMAL_STEP_MIN; - uvyscale_max = yuv_type == YV12 ? - CHROMA_NEAREST_SUBSAMP_STEP_MAX : CHROMA_NEAREST_NORMAL_STEP_MAX; - if (uvxscale_min <= abs(source_dx_q16) - && abs(source_dx_q16) <= uvxscale_max - && uvyscale_min <= abs(source_dy_q16) - && abs(source_dy_q16) <= uvyscale_max) { - /*Add the rounding offsets now.*/ - source_uv_xoffs_q16 += 1<<(15+x_shift); - source_uv_yoffs_q16 += 1<<(15+y_shift); - if (yuv_type != YV24) { - scale_row = -//TODO: fix NEON asm for iOS -# if defined(MOZILLA_MAY_SUPPORT_NEON) && !defined(__APPLE__) - supports_neon() ? ScaleYCbCr42xToRGB565_BilinearY_Row_NEON : -# endif - ScaleYCbCr42xToRGB565_BilinearY_Row_C; - } - else - scale_row = ScaleYCbCr444ToRGB565_BilinearY_Row_C; - } - else { - if (yuv_type == YV12) - scale_row = ScaleYCbCr420ToRGB565_Bilinear_Row_C; - else if (yuv_type == YV16) - scale_row = ScaleYCbCr422ToRGB565_Bilinear_Row_C; - else - scale_row = ScaleYCbCr444ToRGB565_Bilinear_Row_C; - } - ctx.width = width; - ctx.y_pitch = y_pitch; - ctx.source_x0_q16 = source_x0_q16; - ctx.source_dx_q16 = source_dx_q16; - ctx.source_uv_xoffs_q16 = source_uv_xoffs_q16; - ctx.uv_pitch = uv_pitch; - for (y=0; y<height; y++) { - int source_y; - int yweight; - int uvweight; - ctx.rgb_row = (uint16_t *)(rgb_buf + y*rgb_pitch); - source_y = (source_y0_q16+128)>>16; - yweight = ((source_y0_q16+128)>>8)&0xFF; - if (source_y < ymin) { - source_y = ymin; - yweight = 0; - } - if (source_y > ymax) { - source_y = ymax; - yweight = 0; - } - ctx.y_row = y_buf + source_y*y_pitch; - source_y = source_y0_q16+source_uv_yoffs_q16+(128<<y_shift); - source_y0_q16 += source_dy_q16; - uvweight = source_y>>(8+y_shift)&0xFF; - source_y >>= 16+y_shift; - if (source_y < uvmin) { - source_y = uvmin; - uvweight = 0; - } - if (source_y > uvmax) { - source_y = uvmax; - uvweight = 0; - } - ctx.u_row = u_buf + source_y*uv_pitch; - ctx.v_row = v_buf + source_y*uv_pitch; - ctx.y_yweight = yweight; - ctx.uv_yweight = uvweight; - (*scale_row)(&ctx, dither); - dither ^= 2; - } - } -} - -bool IsScaleYCbCrToRGB565Fast(int source_x0, - int source_y0, - int source_width, - int source_height, - int width, - int height, - YUVType yuv_type, - ScaleFilter filter) -{ - // Very fast. - if (width <= 0 || height <= 0) - return true; -# if defined(MOZILLA_MAY_SUPPORT_NEON) - if (filter != FILTER_NONE) { - int source_dx_q16; - int source_dy_q16; - int uvxscale_min; - int uvxscale_max; - int uvyscale_min; - int uvyscale_max; - source_dx_q16 = (source_width<<16) / width; - source_dy_q16 = (source_height<<16) / height; - uvxscale_min = yuv_type != YV24 ? - CHROMA_NEAREST_SUBSAMP_STEP_MIN : CHROMA_NEAREST_NORMAL_STEP_MIN; - uvxscale_max = yuv_type != YV24 ? - CHROMA_NEAREST_SUBSAMP_STEP_MAX : CHROMA_NEAREST_NORMAL_STEP_MAX; - uvyscale_min = yuv_type == YV12 ? - CHROMA_NEAREST_SUBSAMP_STEP_MIN : CHROMA_NEAREST_NORMAL_STEP_MIN; - uvyscale_max = yuv_type == YV12 ? - CHROMA_NEAREST_SUBSAMP_STEP_MAX : CHROMA_NEAREST_NORMAL_STEP_MAX; - if (uvxscale_min <= abs(source_dx_q16) - && abs(source_dx_q16) <= uvxscale_max - && uvyscale_min <= abs(source_dy_q16) - && abs(source_dy_q16) <= uvyscale_max) { - if (yuv_type != YV24) - return supports_neon(); - } - } -# endif - return false; -} - - - -void yuv_to_rgb565_row_c(uint16 *dst, - const uint8 *y, - const uint8 *u, - const uint8 *v, - int x_shift, - int pic_x, - int pic_width) -{ - int x; - for (x = 0; x < pic_width; x++) - { - dst[x] = yu2rgb565(y[pic_x+x], - u[(pic_x+x)>>x_shift], - v[(pic_x+x)>>x_shift], - 2); // Disable dithering for now. - } -} - -void ConvertYCbCrToRGB565(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int pic_x, - int pic_y, - int pic_width, - int pic_height, - int y_pitch, - int uv_pitch, - int rgb_pitch, - YUVType yuv_type) -{ - int x_shift; - int y_shift; - x_shift = yuv_type != YV24; - y_shift = yuv_type == YV12; -//TODO: fix NEON asm for iOS -# if defined(MOZILLA_MAY_SUPPORT_NEON) && !defined(__APPLE__) - if (yuv_type != YV24 && supports_neon()) - { - for (int i = 0; i < pic_height; i++) { - int yoffs; - int uvoffs; - yoffs = y_pitch * (pic_y+i) + pic_x; - uvoffs = uv_pitch * ((pic_y+i)>>y_shift) + (pic_x>>x_shift); - yuv42x_to_rgb565_row_neon((uint16*)(rgb_buf + rgb_pitch * i), - y_buf + yoffs, - u_buf + uvoffs, - v_buf + uvoffs, - pic_width, - pic_x&x_shift); - } - } - else -# endif - { - for (int i = 0; i < pic_height; i++) { - int yoffs; - int uvoffs; - yoffs = y_pitch * (pic_y+i); - uvoffs = uv_pitch * ((pic_y+i)>>y_shift); - yuv_to_rgb565_row_c((uint16*)(rgb_buf + rgb_pitch * i), - y_buf + yoffs, - u_buf + uvoffs, - v_buf + uvoffs, - x_shift, - pic_x, - pic_width); - } - } -} - -bool IsConvertYCbCrToRGB565Fast(int pic_x, - int pic_y, - int pic_width, - int pic_height, - YUVType yuv_type) -{ -# if defined(MOZILLA_MAY_SUPPORT_NEON) - return (yuv_type != YV24 && supports_neon()); -# else - return false; -# endif -} - -} // namespace gfx - -} // namespace mozilla - -#endif // HAVE_YCBCR_TO_RGB565 diff --git a/gfx/ycbcr/ycbcr_to_rgb565.h b/gfx/ycbcr/ycbcr_to_rgb565.h deleted file mode 100644 index 41272223b..000000000 --- a/gfx/ycbcr/ycbcr_to_rgb565.h +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. -#ifndef MEDIA_BASE_YCBCR_TO_RGB565_H_ -#define MEDIA_BASE_YCBCR_TO_RGB565_H_ -#include "yuv_convert.h" -#include "mozilla/arm.h" - -// It's currently only worth including this if we have NEON support. -#ifdef MOZILLA_MAY_SUPPORT_NEON -#define HAVE_YCBCR_TO_RGB565 1 -#endif - -namespace mozilla { - -namespace gfx { - -#ifdef HAVE_YCBCR_TO_RGB565 -// Convert a frame of YUV to 16 bit RGB565. -void ConvertYCbCrToRGB565(const uint8* yplane, - const uint8* uplane, - const uint8* vplane, - uint8* rgbframe, - int pic_x, - int pic_y, - int pic_width, - int pic_height, - int ystride, - int uvstride, - int rgbstride, - YUVType yuv_type); - -// Used to test if we have an accelerated version. -bool IsConvertYCbCrToRGB565Fast(int pic_x, - int pic_y, - int pic_width, - int pic_height, - YUVType yuv_type); - -// Scale a frame of YUV to 16 bit RGB565. -void ScaleYCbCrToRGB565(const uint8_t *yplane, - const uint8_t *uplane, - const uint8_t *vplane, - uint8_t *rgbframe, - int source_x0, - int source_y0, - int source_width, - int source_height, - int width, - int height, - int ystride, - int uvstride, - int rgbstride, - YUVType yuv_type, - ScaleFilter filter); - -// Used to test if we have an accelerated version. -bool IsScaleYCbCrToRGB565Fast(int source_x0, - int source_y0, - int source_width, - int source_height, - int width, - int height, - YUVType yuv_type, - ScaleFilter filter); -#endif // HAVE_YCBCR_TO_RGB565 - -} // namespace gfx - -} // namespace mozilla - -#endif // MEDIA_BASE_YCBCR_TO_RGB565_H_ diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp deleted file mode 100644 index 78fd4ee89..000000000 --- a/gfx/ycbcr/yuv_convert.cpp +++ /dev/null @@ -1,510 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// This webpage shows layout of YV12 and other YUV formats -// http://www.fourcc.org/yuv.php -// The actual conversion is best described here -// http://en.wikipedia.org/wiki/YUV -// An article on optimizing YUV conversion using tables instead of multiplies -// http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf -// -// YV12 is a full plane of Y and a half height, half width chroma planes -// YV16 is a full plane of Y and a full height, half width chroma planes -// YV24 is a full plane of Y and a full height, full width chroma planes -// -// ARGB pixel format is output, which on little endian is stored as BGRA. -// The alpha is set to 255, allowing the application to use RGBA or RGB32. - -#include "yuv_convert.h" - -#include "gfxPrefs.h" -#include "libyuv.h" -#include "scale_yuv_argb.h" -// Header for low level row functions. -#include "yuv_row.h" -#include "mozilla/SSE.h" - -namespace mozilla { - -namespace gfx { - -// 16.16 fixed point arithmetic -const int kFractionBits = 16; -const int kFractionMax = 1 << kFractionBits; -const int kFractionMask = ((1 << kFractionBits) - 1); - -YUVType TypeFromSize(int ywidth, - int yheight, - int cbcrwidth, - int cbcrheight) -{ - if (ywidth == cbcrwidth && yheight == cbcrheight) { - return YV24; - } - else if ((ywidth + 1) / 2 == cbcrwidth && yheight == cbcrheight) { - return YV16; - } - else { - return YV12; - } -} - -libyuv::FourCC FourCCFromYUVType(YUVType aYUVType) -{ - if (aYUVType == YV24) { - return libyuv::FOURCC_I444; - } else if (aYUVType == YV16) { - return libyuv::FOURCC_I422; - } else if (aYUVType == YV12) { - return libyuv::FOURCC_I420; - } else { - return libyuv::FOURCC_ANY; - } -} - -// Convert a frame of YUV to 32 bit ARGB. -void ConvertYCbCrToRGB32(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int pic_x, - int pic_y, - int pic_width, - int pic_height, - int y_pitch, - int uv_pitch, - int rgb_pitch, - YUVType yuv_type, - YUVColorSpace yuv_color_space) { - - - // Deprecated function's conversion is accurate. - // libyuv converion is a bit inaccurate to get performance. It dynamically - // calculates RGB from YUV to use simd. In it, signed byte is used for conversion's - // coefficient, but it requests 129. libyuv cut 129 to 127. And only 6 bits are - // used for a decimal part during the dynamic calculation. - // - // The function is still fast on some old intel chips. - // See Bug 1256475. - bool use_deprecated = gfxPrefs::YCbCrAccurateConversion() || - (supports_mmx() && supports_sse() && !supports_sse3() && - yuv_color_space == YUVColorSpace::BT601); - // The deprecated function only support BT601. - // See Bug 1210357. - if (yuv_color_space != YUVColorSpace::BT601) { - use_deprecated = false; - } - if (use_deprecated) { - ConvertYCbCrToRGB32_deprecated(y_buf, u_buf, v_buf, rgb_buf, - pic_x, pic_y, pic_width, pic_height, - y_pitch, uv_pitch, rgb_pitch, yuv_type); - return; - } - - if (yuv_type == YV24) { - const uint8* src_y = y_buf + y_pitch * pic_y + pic_x; - const uint8* src_u = u_buf + uv_pitch * pic_y + pic_x; - const uint8* src_v = v_buf + uv_pitch * pic_y + pic_x; - DebugOnly<int> err = libyuv::I444ToARGB(src_y, y_pitch, - src_u, uv_pitch, - src_v, uv_pitch, - rgb_buf, rgb_pitch, - pic_width, pic_height); - MOZ_ASSERT(!err); - } else if (yuv_type == YV16) { - const uint8* src_y = y_buf + y_pitch * pic_y + pic_x; - const uint8* src_u = u_buf + uv_pitch * pic_y + pic_x / 2; - const uint8* src_v = v_buf + uv_pitch * pic_y + pic_x / 2; - DebugOnly<int> err = libyuv::I422ToARGB(src_y, y_pitch, - src_u, uv_pitch, - src_v, uv_pitch, - rgb_buf, rgb_pitch, - pic_width, pic_height); - MOZ_ASSERT(!err); - } else { - MOZ_ASSERT(yuv_type == YV12); - const uint8* src_y = y_buf + y_pitch * pic_y + pic_x; - const uint8* src_u = u_buf + (uv_pitch * pic_y + pic_x) / 2; - const uint8* src_v = v_buf + (uv_pitch * pic_y + pic_x) / 2; - if (yuv_color_space == YUVColorSpace::BT709) { - DebugOnly<int> err = libyuv::H420ToARGB(src_y, y_pitch, - src_u, uv_pitch, - src_v, uv_pitch, - rgb_buf, rgb_pitch, - pic_width, pic_height); - MOZ_ASSERT(!err); - } else { - MOZ_ASSERT(yuv_color_space == YUVColorSpace::BT601); - DebugOnly<int> err = libyuv::I420ToARGB(src_y, y_pitch, - src_u, uv_pitch, - src_v, uv_pitch, - rgb_buf, rgb_pitch, - pic_width, pic_height); - MOZ_ASSERT(!err); - } - } -} - -// Convert a frame of YUV to 32 bit ARGB. -void ConvertYCbCrToRGB32_deprecated(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int pic_x, - int pic_y, - int pic_width, - int pic_height, - int y_pitch, - int uv_pitch, - int rgb_pitch, - YUVType yuv_type) { - unsigned int y_shift = yuv_type == YV12 ? 1 : 0; - unsigned int x_shift = yuv_type == YV24 ? 0 : 1; - // Test for SSE because the optimized code uses movntq, which is not part of MMX. - bool has_sse = supports_mmx() && supports_sse(); - // There is no optimized YV24 SSE routine so we check for this and - // fall back to the C code. - has_sse &= yuv_type != YV24; - bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0; - int x_width = odd_pic_x ? pic_width - 1 : pic_width; - - for (int y = pic_y; y < pic_height + pic_y; ++y) { - uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch; - const uint8* y_ptr = y_buf + y * y_pitch + pic_x; - const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift); - const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift); - - if (odd_pic_x) { - // Handle the single odd pixel manually and use the - // fast routines for the remaining. - FastConvertYUVToRGB32Row_C(y_ptr++, - u_ptr++, - v_ptr++, - rgb_row, - 1, - x_shift); - rgb_row += 4; - } - - if (has_sse) { - FastConvertYUVToRGB32Row(y_ptr, - u_ptr, - v_ptr, - rgb_row, - x_width); - } - else { - FastConvertYUVToRGB32Row_C(y_ptr, - u_ptr, - v_ptr, - rgb_row, - x_width, - x_shift); - } - } - - // MMX used for FastConvertYUVToRGB32Row requires emms instruction. - if (has_sse) - EMMS(); -} - -// C version does 8 at a time to mimic MMX code -static void FilterRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, - int source_width, int source_y_fraction) { - int y1_fraction = source_y_fraction; - int y0_fraction = 256 - y1_fraction; - uint8* end = ybuf + source_width; - do { - ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8; - ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8; - ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8; - ybuf[3] = (y0_ptr[3] * y0_fraction + y1_ptr[3] * y1_fraction) >> 8; - ybuf[4] = (y0_ptr[4] * y0_fraction + y1_ptr[4] * y1_fraction) >> 8; - ybuf[5] = (y0_ptr[5] * y0_fraction + y1_ptr[5] * y1_fraction) >> 8; - ybuf[6] = (y0_ptr[6] * y0_fraction + y1_ptr[6] * y1_fraction) >> 8; - ybuf[7] = (y0_ptr[7] * y0_fraction + y1_ptr[7] * y1_fraction) >> 8; - y0_ptr += 8; - y1_ptr += 8; - ybuf += 8; - } while (ybuf < end); -} - -#ifdef MOZILLA_MAY_SUPPORT_MMX -void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, - int source_width, int source_y_fraction); -#endif - -#ifdef MOZILLA_MAY_SUPPORT_SSE2 -void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, - int source_width, int source_y_fraction); -#endif - -static inline void FilterRows(uint8* ybuf, const uint8* y0_ptr, - const uint8* y1_ptr, int source_width, - int source_y_fraction) { -#ifdef MOZILLA_MAY_SUPPORT_SSE2 - if (mozilla::supports_sse2()) { - FilterRows_SSE2(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); - return; - } -#endif - -#ifdef MOZILLA_MAY_SUPPORT_MMX - if (mozilla::supports_mmx()) { - FilterRows_MMX(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); - return; - } -#endif - - FilterRows_C(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); -} - - -// Scale a frame of YUV to 32 bit ARGB. -void ScaleYCbCrToRGB32(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int source_width, - int source_height, - int width, - int height, - int y_pitch, - int uv_pitch, - int rgb_pitch, - YUVType yuv_type, - YUVColorSpace yuv_color_space, - ScaleFilter filter) { - - bool use_deprecated = gfxPrefs::YCbCrAccurateConversion() || -#if defined(XP_WIN) && defined(_M_X64) - // libyuv does not support SIMD scaling on win 64bit. See Bug 1295927. - supports_sse3() || -#endif - (supports_mmx() && supports_sse() && !supports_sse3()); - // The deprecated function only support BT601. - // See Bug 1210357. - if (yuv_color_space != YUVColorSpace::BT601) { - use_deprecated = false; - } - if (use_deprecated) { - ScaleYCbCrToRGB32_deprecated(y_buf, u_buf, v_buf, - rgb_buf, - source_width, source_height, - width, height, - y_pitch, uv_pitch, - rgb_pitch, - yuv_type, - ROTATE_0, - filter); - return; - } - - DebugOnly<int> err = - libyuv::YUVToARGBScale(y_buf, y_pitch, - u_buf, uv_pitch, - v_buf, uv_pitch, - FourCCFromYUVType(yuv_type), - yuv_color_space, - source_width, source_height, - rgb_buf, rgb_pitch, - width, height, - libyuv::kFilterBilinear); - MOZ_ASSERT(!err); - return; -} - -// Scale a frame of YUV to 32 bit ARGB. -void ScaleYCbCrToRGB32_deprecated(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int source_width, - int source_height, - int width, - int height, - int y_pitch, - int uv_pitch, - int rgb_pitch, - YUVType yuv_type, - Rotate view_rotate, - ScaleFilter filter) { - bool has_mmx = supports_mmx(); - - // 4096 allows 3 buffers to fit in 12k. - // Helps performance on CPU with 16K L1 cache. - // Large enough for 3830x2160 and 30" displays which are 2560x1600. - const int kFilterBufferSize = 4096; - // Disable filtering if the screen is too big (to avoid buffer overflows). - // This should never happen to regular users: they don't have monitors - // wider than 4096 pixels. - // TODO(fbarchard): Allow rotated videos to filter. - if (source_width > kFilterBufferSize || view_rotate) - filter = FILTER_NONE; - - unsigned int y_shift = yuv_type == YV12 ? 1 : 0; - // Diagram showing origin and direction of source sampling. - // ->0 4<- - // 7 3 - // - // 6 5 - // ->1 2<- - // Rotations that start at right side of image. - if ((view_rotate == ROTATE_180) || - (view_rotate == ROTATE_270) || - (view_rotate == MIRROR_ROTATE_0) || - (view_rotate == MIRROR_ROTATE_90)) { - y_buf += source_width - 1; - u_buf += source_width / 2 - 1; - v_buf += source_width / 2 - 1; - source_width = -source_width; - } - // Rotations that start at bottom of image. - if ((view_rotate == ROTATE_90) || - (view_rotate == ROTATE_180) || - (view_rotate == MIRROR_ROTATE_90) || - (view_rotate == MIRROR_ROTATE_180)) { - y_buf += (source_height - 1) * y_pitch; - u_buf += ((source_height >> y_shift) - 1) * uv_pitch; - v_buf += ((source_height >> y_shift) - 1) * uv_pitch; - source_height = -source_height; - } - - // Handle zero sized destination. - if (width == 0 || height == 0) - return; - int source_dx = source_width * kFractionMax / width; - int source_dy = source_height * kFractionMax / height; - int source_dx_uv = source_dx; - - if ((view_rotate == ROTATE_90) || - (view_rotate == ROTATE_270)) { - int tmp = height; - height = width; - width = tmp; - tmp = source_height; - source_height = source_width; - source_width = tmp; - int original_dx = source_dx; - int original_dy = source_dy; - source_dx = ((original_dy >> kFractionBits) * y_pitch) << kFractionBits; - source_dx_uv = ((original_dy >> kFractionBits) * uv_pitch) << kFractionBits; - source_dy = original_dx; - if (view_rotate == ROTATE_90) { - y_pitch = -1; - uv_pitch = -1; - source_height = -source_height; - } else { - y_pitch = 1; - uv_pitch = 1; - } - } - - // Need padding because FilterRows() will write 1 to 16 extra pixels - // after the end for SSE2 version. - uint8 yuvbuf[16 + kFilterBufferSize * 3 + 16]; - uint8* ybuf = - reinterpret_cast<uint8*>(reinterpret_cast<uintptr_t>(yuvbuf + 15) & ~15); - uint8* ubuf = ybuf + kFilterBufferSize; - uint8* vbuf = ubuf + kFilterBufferSize; - // TODO(fbarchard): Fixed point math is off by 1 on negatives. - int yscale_fixed = (source_height << kFractionBits) / height; - - // TODO(fbarchard): Split this into separate function for better efficiency. - for (int y = 0; y < height; ++y) { - uint8* dest_pixel = rgb_buf + y * rgb_pitch; - int source_y_subpixel = (y * yscale_fixed); - if (yscale_fixed >= (kFractionMax * 2)) { - source_y_subpixel += kFractionMax / 2; // For 1/2 or less, center filter. - } - int source_y = source_y_subpixel >> kFractionBits; - - const uint8* y0_ptr = y_buf + source_y * y_pitch; - const uint8* y1_ptr = y0_ptr + y_pitch; - - const uint8* u0_ptr = u_buf + (source_y >> y_shift) * uv_pitch; - const uint8* u1_ptr = u0_ptr + uv_pitch; - const uint8* v0_ptr = v_buf + (source_y >> y_shift) * uv_pitch; - const uint8* v1_ptr = v0_ptr + uv_pitch; - - // vertical scaler uses 16.8 fixed point - int source_y_fraction = (source_y_subpixel & kFractionMask) >> 8; - int source_uv_fraction = - ((source_y_subpixel >> y_shift) & kFractionMask) >> 8; - - const uint8* y_ptr = y0_ptr; - const uint8* u_ptr = u0_ptr; - const uint8* v_ptr = v0_ptr; - // Apply vertical filtering if necessary. - // TODO(fbarchard): Remove memcpy when not necessary. - if (filter & mozilla::gfx::FILTER_BILINEAR_V) { - if (yscale_fixed != kFractionMax && - source_y_fraction && ((source_y + 1) < source_height)) { - FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); - } else { - memcpy(ybuf, y0_ptr, source_width); - } - y_ptr = ybuf; - ybuf[source_width] = ybuf[source_width-1]; - int uv_source_width = (source_width + 1) / 2; - if (yscale_fixed != kFractionMax && - source_uv_fraction && - (((source_y >> y_shift) + 1) < (source_height >> y_shift))) { - FilterRows(ubuf, u0_ptr, u1_ptr, uv_source_width, source_uv_fraction); - FilterRows(vbuf, v0_ptr, v1_ptr, uv_source_width, source_uv_fraction); - } else { - memcpy(ubuf, u0_ptr, uv_source_width); - memcpy(vbuf, v0_ptr, uv_source_width); - } - u_ptr = ubuf; - v_ptr = vbuf; - ubuf[uv_source_width] = ubuf[uv_source_width - 1]; - vbuf[uv_source_width] = vbuf[uv_source_width - 1]; - } - if (source_dx == kFractionMax) { // Not scaled - FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, - dest_pixel, width); - } else if (filter & FILTER_BILINEAR_H) { - LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, - dest_pixel, width, source_dx); - } else { -// Specialized scalers and rotation. -#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_MSC_VER) && defined(_M_IX86) - if(mozilla::supports_sse()) { - if (width == (source_width * 2)) { - DoubleYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr, - dest_pixel, width); - } else if ((source_dx & kFractionMask) == 0) { - // Scaling by integer scale factor. ie half. - ConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr, - dest_pixel, width, - source_dx >> kFractionBits); - } else if (source_dx_uv == source_dx) { // Not rotated. - ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, - dest_pixel, width, source_dx); - } else { - RotateConvertYUVToRGB32Row_SSE(y_ptr, u_ptr, v_ptr, - dest_pixel, width, - source_dx >> kFractionBits, - source_dx_uv >> kFractionBits); - } - } - else { - ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr, - dest_pixel, width, source_dx); - } -#else - (void)source_dx_uv; - ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, - dest_pixel, width, source_dx); -#endif - } - } - // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms. - if (has_mmx) - EMMS(); -} - -} // namespace gfx -} // namespace mozilla diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h deleted file mode 100644 index 266a23d45..000000000 --- a/gfx/ycbcr/yuv_convert.h +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef MEDIA_BASE_YUV_CONVERT_H_ -#define MEDIA_BASE_YUV_CONVERT_H_ - -#include "chromium_types.h" -#include "ImageTypes.h" - -namespace mozilla { - -namespace gfx { - -// Type of YUV surface. -// The value of these enums matter as they are used to shift vertical indices. -enum YUVType { - YV12 = 0, // YV12 is half width and half height chroma channels. - YV16 = 1, // YV16 is half width and full height chroma channels. - YV24 = 2 // YV24 is full width and full height chroma channels. -}; - -// Mirror means flip the image horizontally, as in looking in a mirror. -// Rotate happens after mirroring. -enum Rotate { - ROTATE_0, // Rotation off. - ROTATE_90, // Rotate clockwise. - ROTATE_180, // Rotate upside down. - ROTATE_270, // Rotate counter clockwise. - MIRROR_ROTATE_0, // Mirror horizontally. - MIRROR_ROTATE_90, // Mirror then Rotate clockwise. - MIRROR_ROTATE_180, // Mirror vertically. - MIRROR_ROTATE_270 // Transpose. -}; - -// Filter affects how scaling looks. -enum ScaleFilter { - FILTER_NONE = 0, // No filter (point sampled). - FILTER_BILINEAR_H = 1, // Bilinear horizontal filter. - FILTER_BILINEAR_V = 2, // Bilinear vertical filter. - FILTER_BILINEAR = 3 // Bilinear filter. -}; - -YUVType TypeFromSize(int ywidth, int yheight, int cbcrwidth, int cbcrheight); - -// Convert a frame of YUV to 32 bit ARGB. -// Pass in YV16/YV12 depending on source format -void ConvertYCbCrToRGB32(const uint8* yplane, - const uint8* uplane, - const uint8* vplane, - uint8* rgbframe, - int pic_x, - int pic_y, - int pic_width, - int pic_height, - int ystride, - int uvstride, - int rgbstride, - YUVType yuv_type, - YUVColorSpace yuv_color_space); - -void ConvertYCbCrToRGB32_deprecated(const uint8* yplane, - const uint8* uplane, - const uint8* vplane, - uint8* rgbframe, - int pic_x, - int pic_y, - int pic_width, - int pic_height, - int ystride, - int uvstride, - int rgbstride, - YUVType yuv_type); - -// Scale a frame of YUV to 32 bit ARGB. -// Supports rotation and mirroring. -void ScaleYCbCrToRGB32(const uint8* yplane, - const uint8* uplane, - const uint8* vplane, - uint8* rgbframe, - int source_width, - int source_height, - int width, - int height, - int ystride, - int uvstride, - int rgbstride, - YUVType yuv_type, - YUVColorSpace yuv_color_space, - ScaleFilter filter); - -void ScaleYCbCrToRGB32_deprecated(const uint8* yplane, - const uint8* uplane, - const uint8* vplane, - uint8* rgbframe, - int source_width, - int source_height, - int width, - int height, - int ystride, - int uvstride, - int rgbstride, - YUVType yuv_type, - Rotate view_rotate, - ScaleFilter filter); - -} // namespace gfx -} // namespace mozilla - -#endif // MEDIA_BASE_YUV_CONVERT_H_ diff --git a/gfx/ycbcr/yuv_convert_arm.cpp b/gfx/ycbcr/yuv_convert_arm.cpp deleted file mode 100644 index 081343b0b..000000000 --- a/gfx/ycbcr/yuv_convert_arm.cpp +++ /dev/null @@ -1,232 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// contributor Siarhei Siamashka <siarhei.siamashka@gmail.com> - -#include "yuv_convert.h" -#include "ycbcr_to_rgb565.h" - - - -#ifdef HAVE_YCBCR_TO_RGB565 - -namespace mozilla { - -namespace gfx { - -# if defined(MOZILLA_MAY_SUPPORT_NEON) -# if defined(__clang__) -void __attribute((noinline)) -# else -void __attribute((noinline,optimize("-fomit-frame-pointer"))) -# endif - yuv42x_to_rgb565_row_neon(uint16 *dst, - const uint8 *y, - const uint8 *u, - const uint8 *v, - int n, - int oddflag) -{ - static __attribute__((aligned(16))) uint16 acc_r[8] = { - 22840, 22840, 22840, 22840, 22840, 22840, 22840, 22840, - }; - static __attribute__((aligned(16))) uint16 acc_g[8] = { - 17312, 17312, 17312, 17312, 17312, 17312, 17312, 17312, - }; - static __attribute__((aligned(16))) uint16 acc_b[8] = { - 28832, 28832, 28832, 28832, 28832, 28832, 28832, 28832, - }; - /* - * Registers: - * q0, q1 : d0, d1, d2, d3 - are used for initial loading of YUV data - * q2 : d4, d5 - are used for storing converted RGB data - * q3 : d6, d7 - are used for temporary storage - * - * q4-q7 - reserved - * - * q8, q9 : d16, d17, d18, d19 - are used for expanded Y data - * q10 : d20, d21 - * q11 : d22, d23 - * q12 : d24, d25 - * q13 : d26, d27 - * q13, q14, q15 - various constants (#16, #149, #204, #50, #104, #154) - */ - asm volatile ( -".fpu neon\n" -/* Allow to build on targets not supporting neon, and force the object file - * target to avoid bumping the final binary target */ -".arch armv7-a\n" -".object_arch armv4t\n" -".macro convert_macroblock size\n" -/* load up to 16 source pixels */ - ".if \\size == 16\n" - "pld [%[y], #64]\n" - "pld [%[u], #64]\n" - "pld [%[v], #64]\n" - "vld1.8 {d1}, [%[y]]!\n" - "vld1.8 {d3}, [%[y]]!\n" - "vld1.8 {d0}, [%[u]]!\n" - "vld1.8 {d2}, [%[v]]!\n" - ".elseif \\size == 8\n" - "vld1.8 {d1}, [%[y]]!\n" - "vld1.8 {d0[0]}, [%[u]]!\n" - "vld1.8 {d0[1]}, [%[u]]!\n" - "vld1.8 {d0[2]}, [%[u]]!\n" - "vld1.8 {d0[3]}, [%[u]]!\n" - "vld1.8 {d2[0]}, [%[v]]!\n" - "vld1.8 {d2[1]}, [%[v]]!\n" - "vld1.8 {d2[2]}, [%[v]]!\n" - "vld1.8 {d2[3]}, [%[v]]!\n" - ".elseif \\size == 4\n" - "vld1.8 {d1[0]}, [%[y]]!\n" - "vld1.8 {d1[1]}, [%[y]]!\n" - "vld1.8 {d1[2]}, [%[y]]!\n" - "vld1.8 {d1[3]}, [%[y]]!\n" - "vld1.8 {d0[0]}, [%[u]]!\n" - "vld1.8 {d0[1]}, [%[u]]!\n" - "vld1.8 {d2[0]}, [%[v]]!\n" - "vld1.8 {d2[1]}, [%[v]]!\n" - ".elseif \\size == 2\n" - "vld1.8 {d1[0]}, [%[y]]!\n" - "vld1.8 {d1[1]}, [%[y]]!\n" - "vld1.8 {d0[0]}, [%[u]]!\n" - "vld1.8 {d2[0]}, [%[v]]!\n" - ".elseif \\size == 1\n" - "vld1.8 {d1[0]}, [%[y]]!\n" - "vld1.8 {d0[0]}, [%[u]]!\n" - "vld1.8 {d2[0]}, [%[v]]!\n" - ".else\n" - ".error \"unsupported macroblock size\"\n" - ".endif\n" - - /* d1 - Y data (first 8 bytes) */ - /* d3 - Y data (next 8 bytes) */ - /* d0 - U data, d2 - V data */ - - /* split even and odd Y color components */ - "vuzp.8 d1, d3\n" /* d1 - evenY, d3 - oddY */ - /* clip upper and lower boundaries */ - "vqadd.u8 q0, q0, q4\n" - "vqadd.u8 q1, q1, q4\n" - "vqsub.u8 q0, q0, q5\n" - "vqsub.u8 q1, q1, q5\n" - - "vshr.u8 d4, d2, #1\n" /* d4 = V >> 1 */ - - "vmull.u8 q8, d1, d27\n" /* q8 = evenY * 149 */ - "vmull.u8 q9, d3, d27\n" /* q9 = oddY * 149 */ - - "vld1.16 {d20, d21}, [%[acc_r], :128]\n" /* q10 - initialize accumulator for red */ - "vsubw.u8 q10, q10, d4\n" /* red acc -= (V >> 1) */ - "vmlsl.u8 q10, d2, d28\n" /* red acc -= V * 204 */ - "vld1.16 {d22, d23}, [%[acc_g], :128]\n" /* q11 - initialize accumulator for green */ - "vmlsl.u8 q11, d2, d30\n" /* green acc -= V * 104 */ - "vmlsl.u8 q11, d0, d29\n" /* green acc -= U * 50 */ - "vld1.16 {d24, d25}, [%[acc_b], :128]\n" /* q12 - initialize accumulator for blue */ - "vmlsl.u8 q12, d0, d30\n" /* blue acc -= U * 104 */ - "vmlsl.u8 q12, d0, d31\n" /* blue acc -= U * 154 */ - - "vhsub.s16 q3, q8, q10\n" /* calculate even red components */ - "vhsub.s16 q10, q9, q10\n" /* calculate odd red components */ - "vqshrun.s16 d0, q3, #6\n" /* right shift, narrow and saturate even red components */ - "vqshrun.s16 d3, q10, #6\n" /* right shift, narrow and saturate odd red components */ - - "vhadd.s16 q3, q8, q11\n" /* calculate even green components */ - "vhadd.s16 q11, q9, q11\n" /* calculate odd green components */ - "vqshrun.s16 d1, q3, #6\n" /* right shift, narrow and saturate even green components */ - "vqshrun.s16 d4, q11, #6\n" /* right shift, narrow and saturate odd green components */ - - "vhsub.s16 q3, q8, q12\n" /* calculate even blue components */ - "vhsub.s16 q12, q9, q12\n" /* calculate odd blue components */ - "vqshrun.s16 d2, q3, #6\n" /* right shift, narrow and saturate even blue components */ - "vqshrun.s16 d5, q12, #6\n" /* right shift, narrow and saturate odd blue components */ - - "vzip.8 d0, d3\n" /* join even and odd red components */ - "vzip.8 d1, d4\n" /* join even and odd green components */ - "vzip.8 d2, d5\n" /* join even and odd blue components */ - - "vshll.u8 q3, d0, #8\n\t" - "vshll.u8 q8, d1, #8\n\t" - "vshll.u8 q9, d2, #8\n\t" - "vsri.u16 q3, q8, #5\t\n" - "vsri.u16 q3, q9, #11\t\n" - /* store pixel data to memory */ - ".if \\size == 16\n" - " vst1.16 {d6, d7}, [%[dst]]!\n" - " vshll.u8 q3, d3, #8\n\t" - " vshll.u8 q8, d4, #8\n\t" - " vshll.u8 q9, d5, #8\n\t" - " vsri.u16 q3, q8, #5\t\n" - " vsri.u16 q3, q9, #11\t\n" - " vst1.16 {d6, d7}, [%[dst]]!\n" - ".elseif \\size == 8\n" - " vst1.16 {d6, d7}, [%[dst]]!\n" - ".elseif \\size == 4\n" - " vst1.16 {d6}, [%[dst]]!\n" - ".elseif \\size == 2\n" - " vst1.16 {d6[0]}, [%[dst]]!\n" - " vst1.16 {d6[1]}, [%[dst]]!\n" - ".elseif \\size == 1\n" - " vst1.16 {d6[0]}, [%[dst]]!\n" - ".endif\n" - ".endm\n" - - "vmov.u8 d8, #15\n" /* add this to U/V to saturate upper boundary */ - "vmov.u8 d9, #20\n" /* add this to Y to saturate upper boundary */ - "vmov.u8 d10, #31\n" /* sub this from U/V to saturate lower boundary */ - "vmov.u8 d11, #36\n" /* sub this from Y to saturate lower boundary */ - - "vmov.u8 d26, #16\n" - "vmov.u8 d27, #149\n" - "vmov.u8 d28, #204\n" - "vmov.u8 d29, #50\n" - "vmov.u8 d30, #104\n" - "vmov.u8 d31, #154\n" - - "cmp %[oddflag], #0\n" - "beq 1f\n" - "convert_macroblock 1\n" - "sub %[n], %[n], #1\n" - "1:\n" - "subs %[n], %[n], #16\n" - "blt 2f\n" - "1:\n" - "convert_macroblock 16\n" - "subs %[n], %[n], #16\n" - "bge 1b\n" - "2:\n" - "tst %[n], #8\n" - "beq 3f\n" - "convert_macroblock 8\n" - "3:\n" - "tst %[n], #4\n" - "beq 4f\n" - "convert_macroblock 4\n" - "4:\n" - "tst %[n], #2\n" - "beq 5f\n" - "convert_macroblock 2\n" - "5:\n" - "tst %[n], #1\n" - "beq 6f\n" - "convert_macroblock 1\n" - "6:\n" - ".purgem convert_macroblock\n" - : [y] "+&r" (y), [u] "+&r" (u), [v] "+&r" (v), [dst] "+&r" (dst), [n] "+&r" (n) - : [acc_r] "r" (&acc_r[0]), [acc_g] "r" (&acc_g[0]), [acc_b] "r" (&acc_b[0]), - [oddflag] "r" (oddflag) - : "cc", "memory", - "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", - "d8", "d9", "d10", "d11", /* "d12", "d13", "d14", "d15", */ - "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", - "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" - ); -} -# endif // MOZILLA_MAY_SUPPORT_NEON - -} // namespace gfx - -} // namespace mozilla - -#endif // HAVE_YCBCR_TO_RGB565 diff --git a/gfx/ycbcr/yuv_convert_mmx.cpp b/gfx/ycbcr/yuv_convert_mmx.cpp deleted file mode 100644 index b5353e500..000000000 --- a/gfx/ycbcr/yuv_convert_mmx.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include <mmintrin.h> -#include "yuv_row.h" - -namespace mozilla { -namespace gfx { - -// FilterRows combines two rows of the image using linear interpolation. -// MMX version does 8 pixels at a time. -void FilterRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, - int source_width, int source_y_fraction) { - __m64 zero = _mm_setzero_si64(); - __m64 y1_fraction = _mm_set1_pi16(source_y_fraction); - __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction); - - const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr); - const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr); - __m64* dest64 = reinterpret_cast<__m64*>(ybuf); - __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width); - - do { - __m64 y0 = *y0_ptr64++; - __m64 y1 = *y1_ptr64++; - __m64 y2 = _mm_unpackhi_pi8(y0, zero); - __m64 y3 = _mm_unpackhi_pi8(y1, zero); - y0 = _mm_unpacklo_pi8(y0, zero); - y1 = _mm_unpacklo_pi8(y1, zero); - y0 = _mm_mullo_pi16(y0, y0_fraction); - y1 = _mm_mullo_pi16(y1, y1_fraction); - y2 = _mm_mullo_pi16(y2, y0_fraction); - y3 = _mm_mullo_pi16(y3, y1_fraction); - y0 = _mm_add_pi16(y0, y1); - y2 = _mm_add_pi16(y2, y3); - y0 = _mm_srli_pi16(y0, 8); - y2 = _mm_srli_pi16(y2, 8); - y0 = _mm_packs_pu16(y0, y2); - *dest64++ = y0; - } while (dest64 < end64); -} - -} // namespace gfx -} // namespace mozilla diff --git a/gfx/ycbcr/yuv_convert_sse2.cpp b/gfx/ycbcr/yuv_convert_sse2.cpp deleted file mode 100644 index 25fe20639..000000000 --- a/gfx/ycbcr/yuv_convert_sse2.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include <emmintrin.h> -#include "yuv_row.h" - -namespace mozilla { -namespace gfx { - -// FilterRows combines two rows of the image using linear interpolation. -// SSE2 version does 16 pixels at a time. -void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, - int source_width, int source_y_fraction) { - __m128i zero = _mm_setzero_si128(); - __m128i y1_fraction = _mm_set1_epi16(source_y_fraction); - __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction); - - const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr); - const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr); - __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf); - __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width); - - do { - __m128i y0 = _mm_loadu_si128(y0_ptr128); - __m128i y1 = _mm_loadu_si128(y1_ptr128); - __m128i y2 = _mm_unpackhi_epi8(y0, zero); - __m128i y3 = _mm_unpackhi_epi8(y1, zero); - y0 = _mm_unpacklo_epi8(y0, zero); - y1 = _mm_unpacklo_epi8(y1, zero); - y0 = _mm_mullo_epi16(y0, y0_fraction); - y1 = _mm_mullo_epi16(y1, y1_fraction); - y2 = _mm_mullo_epi16(y2, y0_fraction); - y3 = _mm_mullo_epi16(y3, y1_fraction); - y0 = _mm_add_epi16(y0, y1); - y2 = _mm_add_epi16(y2, y3); - y0 = _mm_srli_epi16(y0, 8); - y2 = _mm_srli_epi16(y2, 8); - y0 = _mm_packus_epi16(y0, y2); - *dest128++ = y0; - ++y0_ptr128; - ++y1_ptr128; - } while (dest128 < end128); -} - -} // namespace gfx -} // namespace mozilla diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h deleted file mode 100644 index c89f54b8f..000000000 --- a/gfx/ycbcr/yuv_row.h +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// yuv_row internal functions to handle YUV conversion and scaling to RGB. -// These functions are used from both yuv_convert.cc and yuv_scale.cc. - -// TODO(fbarchard): Write function that can handle rotation and scaling. - -#ifndef MEDIA_BASE_YUV_ROW_H_ -#define MEDIA_BASE_YUV_ROW_H_ - -#include "chromium_types.h" - -extern "C" { -// Can only do 1x. -// This is the second fastest of the scalers. -void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); - -void FastConvertYUVToRGB32Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - unsigned int x_shift); - -void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); - -// Can do 1x, half size or any scale down by an integer amount. -// Step can be negative (mirroring, rotate 180). -// This is the third fastest of the scalers. -// Only defined on Windows x86-32. -void ConvertYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int step); - -// Rotate is like Convert, but applies different step to Y versus U and V. -// This allows rotation by 90 or 270, by stepping by stride. -// This is the forth fastest of the scalers. -// Only defined on Windows x86-32. -void RotateConvertYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int ystep, - int uvstep); - -// Doubler does 4 pixels at a time. Each pixel is replicated. -// This is the fastest of the scalers. -// Only defined on Windows x86-32. -void DoubleYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); - -// Handles arbitrary scaling up or down. -// Mirroring is supported, but not 90 or 270 degree rotation. -// Chroma is under sampled every 2 pixels for performance. -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - -void ScaleYUVToRGB32Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - -// Handles arbitrary scaling up or down with bilinear filtering. -// Mirroring is supported, but not 90 or 270 degree rotation. -// Chroma is under sampled every 2 pixels for performance. -// This is the slowest of the scalers. -void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - -void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - -void LinearScaleYUVToRGB32Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - - -#if defined(_MSC_VER) -#define SIMD_ALIGNED(var) __declspec(align(16)) var -#else -#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) -#endif -extern SIMD_ALIGNED(const int16 kCoefficientsRgbY[768][4]); - -// x64 uses MMX2 (SSE) so emms is not required. -// Warning C4799: function has no EMMS instruction. -// EMMS() is slow and should be called by the calling function once per image. -#if defined(ARCH_CPU_X86) && !defined(ARCH_CPU_X86_64) -#if defined(_MSC_VER) -#define EMMS() __asm emms -#pragma warning(disable: 4799) -#else -#define EMMS() asm("emms") -#endif -#else -#define EMMS() ((void)0) -#endif - -} // extern "C" - -#endif // MEDIA_BASE_YUV_ROW_H_ diff --git a/gfx/ycbcr/yuv_row_arm.s b/gfx/ycbcr/yuv_row_arm.s deleted file mode 100644 index 6a6c81bee..000000000 --- a/gfx/ycbcr/yuv_row_arm.s +++ /dev/null @@ -1,304 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - - .arch armv7-a - .fpu neon -/* Allow to build on targets not supporting neon, and force the object file - * target to avoid bumping the final binary target */ - .object_arch armv4t - .text - .align - - .balign 64 -YCbCr42xToRGB565_DITHER03_CONSTS_NEON: - .short -14240 - .short -14240+384 - .short 8672 - .short 8672+192 - .short -17696 - .short -17696+384 - .byte 102 - .byte 25 - .byte 52 - .byte 129 -YCbCr42xToRGB565_DITHER12_CONSTS_NEON: - .short -14240+128 - .short -14240+256 - .short 8672+64 - .short 8672+128 - .short -17696+128 - .short -17696+256 - .byte 102 - .byte 25 - .byte 52 - .byte 129 -YCbCr42xToRGB565_DITHER21_CONSTS_NEON: - .short -14240+256 - .short -14240+128 - .short 8672+128 - .short 8672+64 - .short -17696+256 - .short -17696+128 - .byte 102 - .byte 25 - .byte 52 - .byte 129 -YCbCr42xToRGB565_DITHER30_CONSTS_NEON: - .short -14240+384 - .short -14240 - .short 8672+192 - .short 8672 - .short -17696+384 - .short -17696 - .byte 102 - .byte 25 - .byte 52 - .byte 129 - -@ void ScaleYCbCr42xToRGB565_BilinearY_Row_NEON( -@ yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither); -@ -@ ctx = { -@ uint16_t *rgb_row; /*r0*/ -@ const uint8_t *y_row; /*r1*/ -@ const uint8_t *u_row; /*r2*/ -@ const uint8_t *v_row; /*r3*/ -@ int y_yweight; /*r4*/ -@ int y_pitch; /*r5*/ -@ int width; /*r6*/ -@ int source_x0_q16; /*r7*/ -@ int source_dx_q16; /*r8*/ -@ int source_uv_xoffs_q16; /*r9*/ -@ }; - .global ScaleYCbCr42xToRGB565_BilinearY_Row_NEON - .type ScaleYCbCr42xToRGB565_BilinearY_Row_NEON, %function - .balign 64 - .fnstart -ScaleYCbCr42xToRGB565_BilinearY_Row_NEON: - STMFD r13!,{r4-r9,r14} @ 8 words. - ADR r14,YCbCr42xToRGB565_DITHER03_CONSTS_NEON - VPUSH {Q4-Q7} @ 16 words. - ADD r14,r14,r1, LSL #4 @ Select the dither table to use - LDMIA r0, {r0-r9} - @ Set up image index registers. - ADD r12,r8, r8 - VMOV.I32 D16,#0 @ Q8 = < 2| 2| 0| 0>*source_dx_q16 - VDUP.32 D17,r12 - ADD r12,r12,r12 - VTRN.32 D16,D17 @ Q2 = < 2| 0| 2| 0>*source_dx_q16 - VDUP.32 D19,r12 @ Q9 = < 4| 4| ?| ?>*source_dx_q16 - ADD r12,r12,r12 - VDUP.32 Q0, r7 @ Q0 = < 1| 1| 1| 1>*source_x0_q16 - VADD.I32 D17,D17,D19 @ Q8 = < 6| 4| 2| 0>*source_dx_q16 - CMP r8, #0 @ If source_dx_q16 is negative... - VDUP.32 Q9, r12 @ Q9 = < 8| 8| 8| 8>*source_dx_q16 - ADDLT r7, r7, r8, LSL #4 @ Make r7 point to the end of the block - VADD.I32 Q0, Q0, Q8 @ Q0 = < 6| 4| 2| 0>*source_dx_q16+source_x0_q16 - SUBLT r7, r7, r8 @ (i.e., the lowest address we'll use) - VADD.I32 Q1, Q0, Q9 @ Q1 = <14|12|10| 8>*source_dx_q16+source_x0_q16 - VDUP.I32 Q9, r8 @ Q8 = < 1| 1| 1| 1>*source_dx_q16 - VADD.I32 Q2, Q0, Q9 @ Q2 = < 7| 5| 3| 1>*source_dx_q16+source_x0_q16 - VADD.I32 Q3, Q1, Q9 @ Q3 = <15|13|11| 9>*source_dx_q16+source_x0_q16 - VLD1.64 {D30,D31},[r14,:128] @ Load some constants - VMOV.I8 D28,#52 - VMOV.I8 D29,#129 - @ The basic idea here is to do aligned loads of a block of data and then - @ index into it using VTBL to extract the data from the source X - @ coordinate corresponding to each destination pixel. - @ This is significantly less code and significantly fewer cycles than doing - @ a series of single-lane loads, but it means that the X step between - @ pixels must be limited to 2.0 or less, otherwise we couldn't guarantee - @ that we could read 8 pixels from a single aligned 32-byte block of data. - @ Q0...Q3 contain the 16.16 fixed-point X coordinates of each pixel, - @ separated into even pixels and odd pixels to make extracting offsets and - @ weights easier. - @ We then pull out two bytes from the middle of each coordinate: the top - @ byte corresponds to the integer part of the X coordinate, and the bottom - @ byte corresponds to the weight to use for bilinear blending. - @ These are separated out into different registers with VTRN. - @ Then by subtracting the integer X coordinate of the first pixel in the - @ data block we loaded, we produce an index register suitable for use by - @ VTBL. -s42xbily_neon_loop: - @ Load the Y' data. - MOV r12,r7, ASR #16 - VRSHRN.S32 D16,Q0, #8 - AND r12,r12,#~15 @ Read 16-byte aligned blocks - VDUP.I8 D20,r12 - ADD r12,r1, r12 @ r12 = y_row+(source_x&~7) - VRSHRN.S32 D17,Q1, #8 - PLD [r12,#64] - VLD1.64 {D8, D9, D10,D11},[r12,:128],r5 @ Load Y' top row - ADD r14,r7, r8, LSL #3 - VRSHRN.S32 D18,Q2, #8 - MOV r14,r14,ASR #16 - VRSHRN.S32 D19,Q3, #8 - AND r14,r14,#~15 @ Read 16-byte aligned blocks - VLD1.64 {D12,D13,D14,D15},[r12,:128] @ Load Y' bottom row - PLD [r12,#64] - VDUP.I8 D21,r14 - ADD r14,r1, r14 @ r14 = y_row+(source_x&~7) - VMOV.I8 Q13,#1 - PLD [r14,#64] - VTRN.8 Q8, Q9 @ Q8 = <wFwEwDwCwBwAw9w8w7w6w5w4w3w2w1w0> - @ Q9 = <xFxExDxCxBxAx9x8x7x6x5x4x3x2x1x0> - VSUB.S8 Q9, Q9, Q10 @ Make offsets relative to the data we loaded. - @ First 8 Y' pixels - VTBL.8 D20,{D8, D9, D10,D11},D18 @ Index top row at source_x - VTBL.8 D24,{D12,D13,D14,D15},D18 @ Index bottom row at source_x - VADD.S8 Q13,Q9, Q13 @ Add 1 to source_x - VTBL.8 D22,{D8, D9, D10,D11},D26 @ Index top row at source_x+1 - VTBL.8 D26,{D12,D13,D14,D15},D26 @ Index bottom row at source_x+1 - @ Next 8 Y' pixels - VLD1.64 {D8, D9, D10,D11},[r14,:128],r5 @ Load Y' top row - VLD1.64 {D12,D13,D14,D15},[r14,:128] @ Load Y' bottom row - PLD [r14,#64] - VTBL.8 D21,{D8, D9, D10,D11},D19 @ Index top row at source_x - VTBL.8 D25,{D12,D13,D14,D15},D19 @ Index bottom row at source_x - VTBL.8 D23,{D8, D9, D10,D11},D27 @ Index top row at source_x+1 - VTBL.8 D27,{D12,D13,D14,D15},D27 @ Index bottom row at source_x+1 - @ Blend Y'. - VDUP.I16 Q9, r4 @ Load the y weights. - VSUBL.U8 Q4, D24,D20 @ Q5:Q4 = c-a - VSUBL.U8 Q5, D25,D21 - VSUBL.U8 Q6, D26,D22 @ Q7:Q6 = d-b - VSUBL.U8 Q7, D27,D23 - VMUL.S16 Q4, Q4, Q9 @ Q5:Q4 = (c-a)*yweight - VMUL.S16 Q5, Q5, Q9 - VMUL.S16 Q6, Q6, Q9 @ Q7:Q6 = (d-b)*yweight - VMUL.S16 Q7, Q7, Q9 - VMOVL.U8 Q12,D16 @ Promote the x weights to 16 bits. - VMOVL.U8 Q13,D17 @ Sadly, there's no VMULW. - VRSHRN.S16 D8, Q4, #8 @ Q4 = (c-a)*yweight+128>>8 - VRSHRN.S16 D9, Q5, #8 - VRSHRN.S16 D12,Q6, #8 @ Q6 = (d-b)*yweight+128>>8 - VRSHRN.S16 D13,Q7, #8 - VADD.I8 Q10,Q10,Q4 @ Q10 = a+((c-a)*yweight+128>>8) - VADD.I8 Q11,Q11,Q6 @ Q11 = b+((d-b)*yweight+128>>8) - VSUBL.U8 Q4, D22,D20 @ Q5:Q4 = b-a - VSUBL.U8 Q5, D23,D21 - VMUL.S16 Q4, Q4, Q12 @ Q5:Q4 = (b-a)*xweight - VMUL.S16 Q5, Q5, Q13 - VRSHRN.S16 D8, Q4, #8 @ Q4 = (b-a)*xweight+128>>8 - ADD r12,r7, r9 - VRSHRN.S16 D9, Q5, #8 - MOV r12,r12,ASR #17 - VADD.I8 Q8, Q10,Q4 @ Q8 = a+((b-a)*xweight+128>>8) - @ Start extracting the chroma x coordinates, and load Cb and Cr. - AND r12,r12,#~15 @ Read 16-byte aligned blocks - VDUP.I32 Q9, r9 @ Q9 = source_uv_xoffs_q16 x 4 - ADD r14,r2, r12 - VADD.I32 Q10,Q0, Q9 - VLD1.64 {D8, D9, D10,D11},[r14,:128] @ Load Cb - PLD [r14,#64] - VADD.I32 Q11,Q1, Q9 - ADD r14,r3, r12 - VADD.I32 Q12,Q2, Q9 - VLD1.64 {D12,D13,D14,D15},[r14,:128] @ Load Cr - PLD [r14,#64] - VADD.I32 Q13,Q3, Q9 - VRSHRN.S32 D20,Q10,#9 @ Q10 = <xEwExCwCxAwAx8w8x6w6x4w4x2w2x0w0> - VRSHRN.S32 D21,Q11,#9 - VDUP.I8 Q9, r12 - VRSHRN.S32 D22,Q12,#9 @ Q11 = <xFwFxDwDxBwBx9w9x7w7x5w5x3w3x1w1> - VRSHRN.S32 D23,Q13,#9 - @ We don't actually need the x weights, but we get them for free. - @ Free ALU slot - VTRN.8 Q10,Q11 @ Q10 = <wFwEwDwCwBwAw9w8w7w6w5w4w3w2w1w0> - @ Free ALU slot @ Q11 = <xFxExDxCxBxAx9x8x7x6x5x4x3x2x1x0> - VSUB.S8 Q11,Q11,Q9 @ Make offsets relative to the data we loaded. - VTBL.8 D18,{D8, D9, D10,D11},D22 @ Index Cb at source_x - VMOV.I8 D24,#74 - VTBL.8 D19,{D8, D9, D10,D11},D23 - VMOV.I8 D26,#102 - VTBL.8 D20,{D12,D13,D14,D15},D22 @ Index Cr at source_x - VMOV.I8 D27,#25 - VTBL.8 D21,{D12,D13,D14,D15},D23 - @ We now have Y' in Q8, Cb in Q9, and Cr in Q10 - @ We use VDUP to expand constants, because it's a permute instruction, so - @ it can dual issue on the A8. - SUBS r6, r6, #16 @ width -= 16 - VMULL.U8 Q4, D16,D24 @ Q5:Q4 = Y'*74 - VDUP.32 Q6, D30[1] @ Q7:Q6 = bias_G - VMULL.U8 Q5, D17,D24 - VDUP.32 Q7, D30[1] - VMLSL.U8 Q6, D18,D27 @ Q7:Q6 = -25*Cb+bias_G - VDUP.32 Q11,D30[0] @ Q12:Q11 = bias_R - VMLSL.U8 Q7, D19,D27 - VDUP.32 Q12,D30[0] - VMLAL.U8 Q11,D20,D26 @ Q12:Q11 = 102*Cr+bias_R - VDUP.32 Q8, D31[0] @ Q13:Q8 = bias_B - VMLAL.U8 Q12,D21,D26 - VDUP.32 Q13,D31[0] - VMLAL.U8 Q8, D18,D29 @ Q13:Q8 = 129*Cb+bias_B - VMLAL.U8 Q13,D19,D29 - VMLSL.U8 Q6, D20,D28 @ Q7:Q6 = -25*Cb-52*Cr+bias_G - VMLSL.U8 Q7, D21,D28 - VADD.S16 Q11,Q4, Q11 @ Q12:Q11 = 74*Y'+102*Cr+bias_R - VADD.S16 Q12,Q5, Q12 - VQADD.S16 Q8, Q4, Q8 @ Q13:Q8 = 74*Y'+129*Cr+bias_B - VQADD.S16 Q13,Q5, Q13 - VADD.S16 Q6, Q4, Q6 @ Q7:Q6 = 74*Y'-25*Cb-52*Cr+bias_G - VADD.S16 Q7, Q5, Q7 - @ Push each value to the top of its word and saturate it. - VQSHLU.S16 Q11,Q11,#2 - VQSHLU.S16 Q12,Q12,#2 - VQSHLU.S16 Q6, Q6, #2 - VQSHLU.S16 Q7, Q7, #2 - VQSHLU.S16 Q8, Q8, #2 - VQSHLU.S16 Q13,Q13,#2 - @ Merge G and B into R. - VSRI.U16 Q11,Q6, #5 - VSRI.U16 Q12,Q7, #5 - VSRI.U16 Q11,Q8, #11 - MOV r14,r8, LSL #4 - VSRI.U16 Q12,Q13,#11 - BLT s42xbily_neon_tail - VDUP.I32 Q13,r14 - @ Store the result. - VST1.16 {D22,D23,D24,D25},[r0]! - BEQ s42xbily_neon_done - @ Advance the x coordinates. - VADD.I32 Q0, Q0, Q13 - VADD.I32 Q1, Q1, Q13 - ADD r7, r14 - VADD.I32 Q2, Q2, Q13 - VADD.I32 Q3, Q3, Q13 - B s42xbily_neon_loop -s42xbily_neon_tail: - @ We have between 1 and 15 pixels left to write. - @ -r6 == the number of pixels we need to skip writing. - @ Adjust r0 to point to the last one we need to write, because we're going - @ to write them in reverse order. - ADD r0, r0, r6, LSL #1 - MOV r14,#-2 - ADD r0, r0, #30 - @ Skip past the ones we don't need to write. - SUB PC, PC, r6, LSL #2 - ORR r0, r0, r0 - VST1.16 {D25[3]},[r0,:16],r14 - VST1.16 {D25[2]},[r0,:16],r14 - VST1.16 {D25[1]},[r0,:16],r14 - VST1.16 {D25[0]},[r0,:16],r14 - VST1.16 {D24[3]},[r0,:16],r14 - VST1.16 {D24[2]},[r0,:16],r14 - VST1.16 {D24[1]},[r0,:16],r14 - VST1.16 {D24[0]},[r0,:16],r14 - VST1.16 {D23[3]},[r0,:16],r14 - VST1.16 {D23[2]},[r0,:16],r14 - VST1.16 {D23[1]},[r0,:16],r14 - VST1.16 {D23[0]},[r0,:16],r14 - VST1.16 {D22[3]},[r0,:16],r14 - VST1.16 {D22[2]},[r0,:16],r14 - VST1.16 {D22[1]},[r0,:16],r14 - VST1.16 {D22[0]},[r0,:16] -s42xbily_neon_done: - VPOP {Q4-Q7} @ 16 words. - LDMFD r13!,{r4-r9,PC} @ 8 words. - .fnend - .size ScaleYCbCr42xToRGB565_BilinearY_Row_NEON, .-ScaleYCbCr42xToRGB565_BilinearY_Row_NEON - -#if defined(__ELF__)&&defined(__linux__) - .section .note.GNU-stack,"",%progbits -#endif diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp deleted file mode 100644 index d327f854e..000000000 --- a/gfx/ycbcr/yuv_row_c.cpp +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "yuv_row.h" - -#define DCHECK(a) - -extern "C" { - -// C reference code that mimic the YUV assembly. -#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x))) -#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \ - (((x) + (y)) > 32767 ? 32767 : ((x) + (y)))) - -static inline void YuvPixel(uint8 y, - uint8 u, - uint8 v, - uint8* rgb_buf) { - - int b = kCoefficientsRgbY[256+u][0]; - int g = kCoefficientsRgbY[256+u][1]; - int r = kCoefficientsRgbY[256+u][2]; - int a = kCoefficientsRgbY[256+u][3]; - - b = paddsw(b, kCoefficientsRgbY[512+v][0]); - g = paddsw(g, kCoefficientsRgbY[512+v][1]); - r = paddsw(r, kCoefficientsRgbY[512+v][2]); - a = paddsw(a, kCoefficientsRgbY[512+v][3]); - - b = paddsw(b, kCoefficientsRgbY[y][0]); - g = paddsw(g, kCoefficientsRgbY[y][1]); - r = paddsw(r, kCoefficientsRgbY[y][2]); - a = paddsw(a, kCoefficientsRgbY[y][3]); - - b >>= 6; - g >>= 6; - r >>= 6; - a >>= 6; - - *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) | - (packuswb(g) << 8) | - (packuswb(r) << 16) | - (packuswb(a) << 24); -} - -void FastConvertYUVToRGB32Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - unsigned int x_shift) { - for (int x = 0; x < width; x += 2) { - uint8 u = u_buf[x >> x_shift]; - uint8 v = v_buf[x >> x_shift]; - uint8 y0 = y_buf[x]; - YuvPixel(y0, u, v, rgb_buf); - if ((x + 1) < width) { - uint8 y1 = y_buf[x + 1]; - if (x_shift == 0) { - u = u_buf[x + 1]; - v = v_buf[x + 1]; - } - YuvPixel(y1, u, v, rgb_buf + 4); - } - rgb_buf += 8; // Advance 2 pixels. - } -} - -// 16.16 fixed point is used. A shift by 16 isolates the integer. -// A shift by 17 is used to further subsample the chrominence channels. -// & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits, -// for 1/65536 pixel accurate interpolation. -void ScaleYUVToRGB32Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - int x = 0; - for (int i = 0; i < width; i += 2) { - int y = y_buf[x >> 16]; - int u = u_buf[(x >> 17)]; - int v = v_buf[(x >> 17)]; - YuvPixel(y, u, v, rgb_buf); - x += source_dx; - if ((i + 1) < width) { - y = y_buf[x >> 16]; - YuvPixel(y, u, v, rgb_buf+4); - x += source_dx; - } - rgb_buf += 8; - } -} - -void LinearScaleYUVToRGB32Row_C(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - int x = 0; - if (source_dx >= 0x20000) { - x = 32768; - } - for (int i = 0; i < width; i += 2) { - int y0 = y_buf[x >> 16]; - int y1 = y_buf[(x >> 16) + 1]; - int u0 = u_buf[(x >> 17)]; - int u1 = u_buf[(x >> 17) + 1]; - int v0 = v_buf[(x >> 17)]; - int v1 = v_buf[(x >> 17) + 1]; - int y_frac = (x & 65535); - int uv_frac = ((x >> 1) & 65535); - int y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; - int u = (uv_frac * u1 + (uv_frac ^ 65535) * u0) >> 16; - int v = (uv_frac * v1 + (uv_frac ^ 65535) * v0) >> 16; - YuvPixel(y, u, v, rgb_buf); - x += source_dx; - if ((i + 1) < width) { - y0 = y_buf[x >> 16]; - y1 = y_buf[(x >> 16) + 1]; - y_frac = (x & 65535); - y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; - YuvPixel(y, u, v, rgb_buf+4); - x += source_dx; - } - rgb_buf += 8; - } -} - -} // extern "C" - diff --git a/gfx/ycbcr/yuv_row_other.cpp b/gfx/ycbcr/yuv_row_other.cpp deleted file mode 100644 index c351139f9..000000000 --- a/gfx/ycbcr/yuv_row_other.cpp +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2009 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "yuv_row.h" - -extern "C" { -void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { - FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1); -} - -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); -} - -void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); -} - -} diff --git a/gfx/ycbcr/yuv_row_posix.cpp b/gfx/ycbcr/yuv_row_posix.cpp deleted file mode 100644 index 152bfc778..000000000 --- a/gfx/ycbcr/yuv_row_posix.cpp +++ /dev/null @@ -1,894 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Copyright (c) 2021 Moonchild Productions. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "yuv_row.h" -#include "mozilla/SSE.h" - -#define DCHECK(a) - -extern "C" { - -#if defined(ARCH_CPU_X86_64) - -// We don't need CPUID guards here, since x86-64 implies SSE2. - -// AMD64 ABI uses register paremters. -void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi - const uint8* u_buf, // rsi - const uint8* v_buf, // rdx - uint8* rgb_buf, // rcx - int width) { // r8 - asm( - "jmp 1f\n" -"0:" - "movzb (%1),%%r10\n" - "add $0x1,%1\n" - "movzb (%2),%%r11\n" - "add $0x1,%2\n" - "movq 2048(%5,%%r10,8),%%xmm0\n" - "movzb (%0),%%r10\n" - "movq 4096(%5,%%r11,8),%%xmm1\n" - "movzb 0x1(%0),%%r11\n" - "paddsw %%xmm1,%%xmm0\n" - "movq (%5,%%r10,8),%%xmm2\n" - "add $0x2,%0\n" - "movq (%5,%%r11,8),%%xmm3\n" - "paddsw %%xmm0,%%xmm2\n" - "paddsw %%xmm0,%%xmm3\n" - "shufps $0x44,%%xmm3,%%xmm2\n" - "psraw $0x6,%%xmm2\n" - "packuswb %%xmm2,%%xmm2\n" - "movq %%xmm2,0x0(%3)\n" - "add $0x8,%3\n" -"1:" - "sub $0x2,%4\n" - "jns 0b\n" - -"2:" - "add $0x1,%4\n" - "js 3f\n" - - "movzb (%1),%%r10\n" - "movq 2048(%5,%%r10,8),%%xmm0\n" - "movzb (%2),%%r10\n" - "movq 4096(%5,%%r10,8),%%xmm1\n" - "paddsw %%xmm1,%%xmm0\n" - "movzb (%0),%%r10\n" - "movq (%5,%%r10,8),%%xmm1\n" - "paddsw %%xmm0,%%xmm1\n" - "psraw $0x6,%%xmm1\n" - "packuswb %%xmm1,%%xmm1\n" - "movd %%xmm1,0x0(%3)\n" -"3:" - : - : "r"(y_buf), // %0 - "r"(u_buf), // %1 - "r"(v_buf), // %2 - "r"(rgb_buf), // %3 - "r"(width), // %4 - "r" (kCoefficientsRgbY) // %5 - : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3" -); -} - -void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi - const uint8* u_buf, // rsi - const uint8* v_buf, // rdx - uint8* rgb_buf, // rcx - int width, // r8 - int source_dx) { // r9 - asm( - "xor %%r11,%%r11\n" - "sub $0x2,%4\n" - "js 1f\n" - -"0:" - "mov %%r11,%%r10\n" - "sar $0x11,%%r10\n" - "movzb (%1,%%r10,1),%%rax\n" - "movq 2048(%5,%%rax,8),%%xmm0\n" - "movzb (%2,%%r10,1),%%rax\n" - "movq 4096(%5,%%rax,8),%%xmm1\n" - "lea (%%r11,%6),%%r10\n" - "sar $0x10,%%r11\n" - "movzb (%0,%%r11,1),%%rax\n" - "paddsw %%xmm1,%%xmm0\n" - "movq (%5,%%rax,8),%%xmm1\n" - "lea (%%r10,%6),%%r11\n" - "sar $0x10,%%r10\n" - "movzb (%0,%%r10,1),%%rax\n" - "movq (%5,%%rax,8),%%xmm2\n" - "paddsw %%xmm0,%%xmm1\n" - "paddsw %%xmm0,%%xmm2\n" - "shufps $0x44,%%xmm2,%%xmm1\n" - "psraw $0x6,%%xmm1\n" - "packuswb %%xmm1,%%xmm1\n" - "movq %%xmm1,0x0(%3)\n" - "add $0x8,%3\n" - "sub $0x2,%4\n" - "jns 0b\n" - -"1:" - "add $0x1,%4\n" - "js 2f\n" - - "mov %%r11,%%r10\n" - "sar $0x11,%%r10\n" - "movzb (%1,%%r10,1),%%rax\n" - "movq 2048(%5,%%rax,8),%%xmm0\n" - "movzb (%2,%%r10,1),%%rax\n" - "movq 4096(%5,%%rax,8),%%xmm1\n" - "paddsw %%xmm1,%%xmm0\n" - "sar $0x10,%%r11\n" - "movzb (%0,%%r11,1),%%rax\n" - "movq (%5,%%rax,8),%%xmm1\n" - "paddsw %%xmm0,%%xmm1\n" - "psraw $0x6,%%xmm1\n" - "packuswb %%xmm1,%%xmm1\n" - "movd %%xmm1,0x0(%3)\n" - -"2:" - : - : "r"(y_buf), // %0 - "r"(u_buf), // %1 - "r"(v_buf), // %2 - "r"(rgb_buf), // %3 - "r"(width), // %4 - "r" (kCoefficientsRgbY), // %5 - "r"(static_cast<long>(source_dx)) // %6 - : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2" -); -} - -void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - asm( - "xor %%r11,%%r11\n" // x = 0 - "sub $0x2,%4\n" - "js 2f\n" - "cmp $0x20000,%6\n" // if source_dx >= 2.0 - "jl 0f\n" - "mov $0x8000,%%r11\n" // x = 0.5 for 1/2 or less -"0:" - -"1:" - "mov %%r11,%%r10\n" - "sar $0x11,%%r10\n" - - "movzb (%1, %%r10, 1), %%r13 \n" - "movzb 1(%1, %%r10, 1), %%r14 \n" - "mov %%r11, %%rax \n" - "and $0x1fffe, %%rax \n" - "imul %%rax, %%r14 \n" - "xor $0x1fffe, %%rax \n" - "imul %%rax, %%r13 \n" - "add %%r14, %%r13 \n" - "shr $17, %%r13 \n" - "movq 2048(%5,%%r13,8), %%xmm0\n" - - "movzb (%2, %%r10, 1), %%r13 \n" - "movzb 1(%2, %%r10, 1), %%r14 \n" - "mov %%r11, %%rax \n" - "and $0x1fffe, %%rax \n" - "imul %%rax, %%r14 \n" - "xor $0x1fffe, %%rax \n" - "imul %%rax, %%r13 \n" - "add %%r14, %%r13 \n" - "shr $17, %%r13 \n" - "movq 4096(%5,%%r13,8), %%xmm1\n" - - "mov %%r11, %%rax \n" - "lea (%%r11,%6),%%r10\n" - "sar $0x10,%%r11\n" - "paddsw %%xmm1,%%xmm0\n" - - "movzb (%0, %%r11, 1), %%r13 \n" - "movzb 1(%0, %%r11, 1), %%r14 \n" - "and $0xffff, %%rax \n" - "imul %%rax, %%r14 \n" - "xor $0xffff, %%rax \n" - "imul %%rax, %%r13 \n" - "add %%r14, %%r13 \n" - "shr $16, %%r13 \n" - "movq (%5,%%r13,8),%%xmm1\n" - - "mov %%r10, %%rax \n" - "lea (%%r10,%6),%%r11\n" - "sar $0x10,%%r10\n" - - "movzb (%0,%%r10,1), %%r13 \n" - "movzb 1(%0,%%r10,1), %%r14 \n" - "and $0xffff, %%rax \n" - "imul %%rax, %%r14 \n" - "xor $0xffff, %%rax \n" - "imul %%rax, %%r13 \n" - "add %%r14, %%r13 \n" - "shr $16, %%r13 \n" - "movq (%5,%%r13,8),%%xmm2\n" - - "paddsw %%xmm0,%%xmm1\n" - "paddsw %%xmm0,%%xmm2\n" - "shufps $0x44,%%xmm2,%%xmm1\n" - "psraw $0x6,%%xmm1\n" - "packuswb %%xmm1,%%xmm1\n" - "movq %%xmm1,0x0(%3)\n" - "add $0x8,%3\n" - "sub $0x2,%4\n" - "jns 1b\n" - -"2:" - "add $0x1,%4\n" - "js 3f\n" - - "mov %%r11,%%r10\n" - "sar $0x11,%%r10\n" - - "movzb (%1,%%r10,1), %%r13 \n" - "movq 2048(%5,%%r13,8),%%xmm0\n" - - "movzb (%2,%%r10,1), %%r13 \n" - "movq 4096(%5,%%r13,8),%%xmm1\n" - - "paddsw %%xmm1,%%xmm0\n" - "sar $0x10,%%r11\n" - - "movzb (%0,%%r11,1), %%r13 \n" - "movq (%5,%%r13,8),%%xmm1\n" - - "paddsw %%xmm0,%%xmm1\n" - "psraw $0x6,%%xmm1\n" - "packuswb %%xmm1,%%xmm1\n" - "movd %%xmm1,0x0(%3)\n" - -"3:" - : - : "r"(y_buf), // %0 - "r"(u_buf), // %1 - "r"(v_buf), // %2 - "r"(rgb_buf), // %3 - "r"(width), // %4 - "r" (kCoefficientsRgbY), // %5 - "r"(static_cast<long>(source_dx)) // %6 - : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2" -); -} - -#elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && !defined(__PIC__) - -// PIC version is slower because less registers are available, so -// non-PIC is used on platforms where it is possible. -void FastConvertYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width); - asm( - ".text\n" - ".global FastConvertYUVToRGB32Row_SSE\n" - ".type FastConvertYUVToRGB32Row_SSE, @function\n" -"FastConvertYUVToRGB32Row_SSE:\n" - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x28(%esp),%edi\n" - "mov 0x2c(%esp),%esi\n" - "mov 0x30(%esp),%ebp\n" - "mov 0x34(%esp),%ecx\n" - "jmp 1f\n" - -"0:" - "movzbl (%edi),%eax\n" - "add $0x1,%edi\n" - "movzbl (%esi),%ebx\n" - "add $0x1,%esi\n" - "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" - "movzbl (%edx),%eax\n" - "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n" - "movzbl 0x1(%edx),%ebx\n" - "movq kCoefficientsRgbY(,%eax,8),%mm1\n" - "add $0x2,%edx\n" - "movq kCoefficientsRgbY(,%ebx,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" -"1:" - "sub $0x2,%ecx\n" - "jns 0b\n" - - "and $0x1,%ecx\n" - "je 2f\n" - - "movzbl (%edi),%eax\n" - "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" - "movzbl (%esi),%eax\n" - "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n" - "movzbl (%edx),%eax\n" - "movq kCoefficientsRgbY(,%eax,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%ebp)\n" -"2:" - "popa\n" - "ret\n" - ".previous\n" -); - -void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) -{ - if (mozilla::supports_sse()) { - FastConvertYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width); - return; - } - - FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1); -} - - -void ScaleYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - asm( - ".text\n" - ".global ScaleYUVToRGB32Row_SSE\n" - ".type ScaleYUVToRGB32Row_SSE, @function\n" -"ScaleYUVToRGB32Row_SSE:\n" - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x28(%esp),%edi\n" - "mov 0x2c(%esp),%esi\n" - "mov 0x30(%esp),%ebp\n" - "mov 0x34(%esp),%ecx\n" - "xor %ebx,%ebx\n" - "jmp 1f\n" - -"0:" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%edi,%eax,1),%eax\n" - "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%esi,%eax,1),%eax\n" - "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq kCoefficientsRgbY(,%eax,8),%mm1\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq kCoefficientsRgbY(,%eax,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" -"1:" - "sub $0x2,%ecx\n" - "jns 0b\n" - - "and $0x1,%ecx\n" - "je 2f\n" - - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%edi,%eax,1),%eax\n" - "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%esi,%eax,1),%eax\n" - "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq kCoefficientsRgbY(,%eax,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%ebp)\n" - -"2:" - "popa\n" - "ret\n" - ".previous\n" -); - -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) -{ - if (mozilla::supports_sse()) { - ScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, - width, source_dx); - return; - } - - ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, - width, source_dx); -} - -void LinearScaleYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx); - asm( - ".text\n" - ".global LinearScaleYUVToRGB32Row_SSE\n" - ".type LinearScaleYUVToRGB32Row_SSE, @function\n" -"LinearScaleYUVToRGB32Row_SSE:\n" - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x28(%esp),%edi\n" - "mov 0x30(%esp),%ebp\n" - - // source_width = width * source_dx + ebx - "mov 0x34(%esp), %ecx\n" - "imull 0x38(%esp), %ecx\n" - "mov %ecx, 0x34(%esp)\n" - - "mov 0x38(%esp), %ecx\n" - "xor %ebx,%ebx\n" // x = 0 - "cmp $0x20000,%ecx\n" // if source_dx >= 2.0 - "jl 1f\n" - "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less - "jmp 1f\n" - -"0:" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - - "movzbl (%edi,%eax,1),%ecx\n" - "movzbl 1(%edi,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "andl $0x1fffe, %eax \n" - "imul %eax, %esi \n" - "xorl $0x1fffe, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $17, %ecx \n" - "movq kCoefficientsRgbY+2048(,%ecx,8),%mm0\n" - - "mov 0x2c(%esp),%esi\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - - "movzbl (%esi,%eax,1),%ecx\n" - "movzbl 1(%esi,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "andl $0x1fffe, %eax \n" - "imul %eax, %esi \n" - "xorl $0x1fffe, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $17, %ecx \n" - "paddsw kCoefficientsRgbY+4096(,%ecx,8),%mm0\n" - - "mov %ebx,%eax\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%ecx\n" - "movzbl 1(%edx,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "andl $0xffff, %eax \n" - "imul %eax, %esi \n" - "xorl $0xffff, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $16, %ecx \n" - "movq kCoefficientsRgbY(,%ecx,8),%mm1\n" - - "cmp 0x34(%esp), %ebx\n" - "jge 2f\n" - - "mov %ebx,%eax\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%ecx\n" - "movzbl 1(%edx,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "andl $0xffff, %eax \n" - "imul %eax, %esi \n" - "xorl $0xffff, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $16, %ecx \n" - "movq kCoefficientsRgbY(,%ecx,8),%mm2\n" - - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" - -"1:" - "cmp 0x34(%esp), %ebx\n" - "jl 0b\n" - "popa\n" - "ret\n" - -"2:" - "paddsw %mm0, %mm1\n" - "psraw $6, %mm1\n" - "packuswb %mm1, %mm1\n" - "movd %mm1, (%ebp)\n" - "popa\n" - "ret\n" - ".previous\n" -); - -void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) -{ - if (mozilla::supports_sse()) { - LinearScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, - width, source_dx); - return; - } - - LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, - width, source_dx); -} - -#elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && defined(__PIC__) - -void PICConvertYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - const int16 *kCoefficientsRgbY); - - asm( - ".text\n" - "PICConvertYUVToRGB32Row_SSE:\n" - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x28(%esp),%edi\n" - "mov 0x2c(%esp),%esi\n" - "mov 0x30(%esp),%ebp\n" - "mov 0x38(%esp),%ecx\n" - - "jmp 1f\n" - -"0:" - "movzbl (%edi),%eax\n" - "add $0x1,%edi\n" - "movzbl (%esi),%ebx\n" - "add $0x1,%esi\n" - "movq 2048(%ecx,%eax,8),%mm0\n" - "movzbl (%edx),%eax\n" - "paddsw 4096(%ecx,%ebx,8),%mm0\n" - "movzbl 0x1(%edx),%ebx\n" - "movq 0(%ecx,%eax,8),%mm1\n" - "add $0x2,%edx\n" - "movq 0(%ecx,%ebx,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" -"1:" - "subl $0x2,0x34(%esp)\n" - "jns 0b\n" - - "andl $0x1,0x34(%esp)\n" - "je 2f\n" - - "movzbl (%edi),%eax\n" - "movq 2048(%ecx,%eax,8),%mm0\n" - "movzbl (%esi),%eax\n" - "paddsw 4096(%ecx,%eax,8),%mm0\n" - "movzbl (%edx),%eax\n" - "movq 0(%ecx,%eax,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%ebp)\n" -"2:" - "popa\n" - "ret\n" - ".previous\n" -); - -void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) -{ - if (mozilla::supports_sse()) { - PICConvertYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width, - &kCoefficientsRgbY[0][0]); - return; - } - - FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1); -} - -void PICScaleYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx, - const int16 *kCoefficientsRgbY); - - asm( - ".text\n" - "PICScaleYUVToRGB32Row_SSE:\n" - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x28(%esp),%edi\n" - "mov 0x2c(%esp),%esi\n" - "mov 0x30(%esp),%ebp\n" - "mov 0x3c(%esp),%ecx\n" - "xor %ebx,%ebx\n" - "jmp 1f\n" - -"0:" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%edi,%eax,1),%eax\n" - "movq 2048(%ecx,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%esi,%eax,1),%eax\n" - "paddsw 4096(%ecx,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq 0(%ecx,%eax,8),%mm1\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq 0(%ecx,%eax,8),%mm2\n" - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" -"1:" - "subl $0x2,0x34(%esp)\n" - "jns 0b\n" - - "andl $0x1,0x34(%esp)\n" - "je 2f\n" - - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%edi,%eax,1),%eax\n" - "movq 2048(%ecx,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - "movzbl (%esi,%eax,1),%eax\n" - "paddsw 4096(%ecx,%eax,8),%mm0\n" - "mov %ebx,%eax\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%eax\n" - "movq 0(%ecx,%eax,8),%mm1\n" - "paddsw %mm0,%mm1\n" - "psraw $0x6,%mm1\n" - "packuswb %mm1,%mm1\n" - "movd %mm1,0x0(%ebp)\n" - -"2:" - "popa\n" - "ret\n" - ".previous\n" -); - -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) -{ - if (mozilla::supports_sse()) { - PICScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width, source_dx, - &kCoefficientsRgbY[0][0]); - return; - } - - ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); -} - -void PICLinearScaleYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx, - const int16 *kCoefficientsRgbY); - - asm( - ".text\n" - "PICLinearScaleYUVToRGB32Row_SSE:\n" - "pusha\n" - "mov 0x24(%esp),%edx\n" - "mov 0x30(%esp),%ebp\n" - "mov 0x34(%esp),%ecx\n" - "mov 0x3c(%esp),%edi\n" - "xor %ebx,%ebx\n" - - // source_width = width * source_dx + ebx - "mov 0x34(%esp), %ecx\n" - "imull 0x38(%esp), %ecx\n" - "mov %ecx, 0x34(%esp)\n" - - "mov 0x38(%esp), %ecx\n" - "xor %ebx,%ebx\n" // x = 0 - "cmp $0x20000,%ecx\n" // if source_dx >= 2.0 - "jl 1f\n" - "mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less - "jmp 1f\n" - -"0:" - "mov 0x28(%esp),%esi\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - - "movzbl (%esi,%eax,1),%ecx\n" - "movzbl 1(%esi,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "andl $0x1fffe, %eax \n" - "imul %eax, %esi \n" - "xorl $0x1fffe, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $17, %ecx \n" - "movq 2048(%edi,%ecx,8),%mm0\n" - - "mov 0x2c(%esp),%esi\n" - "mov %ebx,%eax\n" - "sar $0x11,%eax\n" - - "movzbl (%esi,%eax,1),%ecx\n" - "movzbl 1(%esi,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "andl $0x1fffe, %eax \n" - "imul %eax, %esi \n" - "xorl $0x1fffe, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $17, %ecx \n" - "paddsw 4096(%edi,%ecx,8),%mm0\n" - - "mov %ebx,%eax\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%ecx\n" - "movzbl 1(%edx,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "andl $0xffff, %eax \n" - "imul %eax, %esi \n" - "xorl $0xffff, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $16, %ecx \n" - "movq (%edi,%ecx,8),%mm1\n" - - "cmp 0x34(%esp), %ebx\n" - "jge 2f\n" - - "mov %ebx,%eax\n" - "sar $0x10,%eax\n" - "movzbl (%edx,%eax,1),%ecx\n" - "movzbl 1(%edx,%eax,1),%esi\n" - "mov %ebx,%eax\n" - "add 0x38(%esp),%ebx\n" - "andl $0xffff, %eax \n" - "imul %eax, %esi \n" - "xorl $0xffff, %eax \n" - "imul %eax, %ecx \n" - "addl %esi, %ecx \n" - "shrl $16, %ecx \n" - "movq (%edi,%ecx,8),%mm2\n" - - "paddsw %mm0,%mm1\n" - "paddsw %mm0,%mm2\n" - "psraw $0x6,%mm1\n" - "psraw $0x6,%mm2\n" - "packuswb %mm2,%mm1\n" - "movntq %mm1,0x0(%ebp)\n" - "add $0x8,%ebp\n" - -"1:" - "cmp %ebx, 0x34(%esp)\n" - "jg 0b\n" - "popa\n" - "ret\n" - -"2:" - "paddsw %mm0, %mm1\n" - "psraw $6, %mm1\n" - "packuswb %mm1, %mm1\n" - "movd %mm1, (%ebp)\n" - "popa\n" - "ret\n" - ".previous\n" -); - - -void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) -{ - if (mozilla::supports_sse()) { - PICLinearScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width, - source_dx, &kCoefficientsRgbY[0][0]); - return; - } - - LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); -} -#else -void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { - FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1); -} - -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); -} - -void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); -} -#endif - -} diff --git a/gfx/ycbcr/yuv_row_table.cpp b/gfx/ycbcr/yuv_row_table.cpp deleted file mode 100644 index c531b60c2..000000000 --- a/gfx/ycbcr/yuv_row_table.cpp +++ /dev/null @@ -1,233 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "yuv_row.h" - -extern "C" { - -#define RGBY(i) { \ - static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ - static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ - static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \ - 0 \ -} - -#define RGBU(i) { \ - static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \ - static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \ - 0, \ - static_cast<int16>(256 * 64 - 1) \ -} - -#define RGBV(i) { \ - 0, \ - static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \ - static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \ - 0 \ -} - -SIMD_ALIGNED(const int16 kCoefficientsRgbY[256 * 3][4]) = { - RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03), - RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07), - RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B), - RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F), - RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13), - RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17), - RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B), - RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F), - RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23), - RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27), - RGBY(0x28), RGBY(0x29), RGBY(0x2A), RGBY(0x2B), - RGBY(0x2C), RGBY(0x2D), RGBY(0x2E), RGBY(0x2F), - RGBY(0x30), RGBY(0x31), RGBY(0x32), RGBY(0x33), - RGBY(0x34), RGBY(0x35), RGBY(0x36), RGBY(0x37), - RGBY(0x38), RGBY(0x39), RGBY(0x3A), RGBY(0x3B), - RGBY(0x3C), RGBY(0x3D), RGBY(0x3E), RGBY(0x3F), - RGBY(0x40), RGBY(0x41), RGBY(0x42), RGBY(0x43), - RGBY(0x44), RGBY(0x45), RGBY(0x46), RGBY(0x47), - RGBY(0x48), RGBY(0x49), RGBY(0x4A), RGBY(0x4B), - RGBY(0x4C), RGBY(0x4D), RGBY(0x4E), RGBY(0x4F), - RGBY(0x50), RGBY(0x51), RGBY(0x52), RGBY(0x53), - RGBY(0x54), RGBY(0x55), RGBY(0x56), RGBY(0x57), - RGBY(0x58), RGBY(0x59), RGBY(0x5A), RGBY(0x5B), - RGBY(0x5C), RGBY(0x5D), RGBY(0x5E), RGBY(0x5F), - RGBY(0x60), RGBY(0x61), RGBY(0x62), RGBY(0x63), - RGBY(0x64), RGBY(0x65), RGBY(0x66), RGBY(0x67), - RGBY(0x68), RGBY(0x69), RGBY(0x6A), RGBY(0x6B), - RGBY(0x6C), RGBY(0x6D), RGBY(0x6E), RGBY(0x6F), - RGBY(0x70), RGBY(0x71), RGBY(0x72), RGBY(0x73), - RGBY(0x74), RGBY(0x75), RGBY(0x76), RGBY(0x77), - RGBY(0x78), RGBY(0x79), RGBY(0x7A), RGBY(0x7B), - RGBY(0x7C), RGBY(0x7D), RGBY(0x7E), RGBY(0x7F), - RGBY(0x80), RGBY(0x81), RGBY(0x82), RGBY(0x83), - RGBY(0x84), RGBY(0x85), RGBY(0x86), RGBY(0x87), - RGBY(0x88), RGBY(0x89), RGBY(0x8A), RGBY(0x8B), - RGBY(0x8C), RGBY(0x8D), RGBY(0x8E), RGBY(0x8F), - RGBY(0x90), RGBY(0x91), RGBY(0x92), RGBY(0x93), - RGBY(0x94), RGBY(0x95), RGBY(0x96), RGBY(0x97), - RGBY(0x98), RGBY(0x99), RGBY(0x9A), RGBY(0x9B), - RGBY(0x9C), RGBY(0x9D), RGBY(0x9E), RGBY(0x9F), - RGBY(0xA0), RGBY(0xA1), RGBY(0xA2), RGBY(0xA3), - RGBY(0xA4), RGBY(0xA5), RGBY(0xA6), RGBY(0xA7), - RGBY(0xA8), RGBY(0xA9), RGBY(0xAA), RGBY(0xAB), - RGBY(0xAC), RGBY(0xAD), RGBY(0xAE), RGBY(0xAF), - RGBY(0xB0), RGBY(0xB1), RGBY(0xB2), RGBY(0xB3), - RGBY(0xB4), RGBY(0xB5), RGBY(0xB6), RGBY(0xB7), - RGBY(0xB8), RGBY(0xB9), RGBY(0xBA), RGBY(0xBB), - RGBY(0xBC), RGBY(0xBD), RGBY(0xBE), RGBY(0xBF), - RGBY(0xC0), RGBY(0xC1), RGBY(0xC2), RGBY(0xC3), - RGBY(0xC4), RGBY(0xC5), RGBY(0xC6), RGBY(0xC7), - RGBY(0xC8), RGBY(0xC9), RGBY(0xCA), RGBY(0xCB), - RGBY(0xCC), RGBY(0xCD), RGBY(0xCE), RGBY(0xCF), - RGBY(0xD0), RGBY(0xD1), RGBY(0xD2), RGBY(0xD3), - RGBY(0xD4), RGBY(0xD5), RGBY(0xD6), RGBY(0xD7), - RGBY(0xD8), RGBY(0xD9), RGBY(0xDA), RGBY(0xDB), - RGBY(0xDC), RGBY(0xDD), RGBY(0xDE), RGBY(0xDF), - RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3), - RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7), - RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB), - RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF), - RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3), - RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7), - RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB), - RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF), - - // Chroma U table. - RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03), - RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07), - RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B), - RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F), - RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13), - RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17), - RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B), - RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F), - RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23), - RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27), - RGBU(0x28), RGBU(0x29), RGBU(0x2A), RGBU(0x2B), - RGBU(0x2C), RGBU(0x2D), RGBU(0x2E), RGBU(0x2F), - RGBU(0x30), RGBU(0x31), RGBU(0x32), RGBU(0x33), - RGBU(0x34), RGBU(0x35), RGBU(0x36), RGBU(0x37), - RGBU(0x38), RGBU(0x39), RGBU(0x3A), RGBU(0x3B), - RGBU(0x3C), RGBU(0x3D), RGBU(0x3E), RGBU(0x3F), - RGBU(0x40), RGBU(0x41), RGBU(0x42), RGBU(0x43), - RGBU(0x44), RGBU(0x45), RGBU(0x46), RGBU(0x47), - RGBU(0x48), RGBU(0x49), RGBU(0x4A), RGBU(0x4B), - RGBU(0x4C), RGBU(0x4D), RGBU(0x4E), RGBU(0x4F), - RGBU(0x50), RGBU(0x51), RGBU(0x52), RGBU(0x53), - RGBU(0x54), RGBU(0x55), RGBU(0x56), RGBU(0x57), - RGBU(0x58), RGBU(0x59), RGBU(0x5A), RGBU(0x5B), - RGBU(0x5C), RGBU(0x5D), RGBU(0x5E), RGBU(0x5F), - RGBU(0x60), RGBU(0x61), RGBU(0x62), RGBU(0x63), - RGBU(0x64), RGBU(0x65), RGBU(0x66), RGBU(0x67), - RGBU(0x68), RGBU(0x69), RGBU(0x6A), RGBU(0x6B), - RGBU(0x6C), RGBU(0x6D), RGBU(0x6E), RGBU(0x6F), - RGBU(0x70), RGBU(0x71), RGBU(0x72), RGBU(0x73), - RGBU(0x74), RGBU(0x75), RGBU(0x76), RGBU(0x77), - RGBU(0x78), RGBU(0x79), RGBU(0x7A), RGBU(0x7B), - RGBU(0x7C), RGBU(0x7D), RGBU(0x7E), RGBU(0x7F), - RGBU(0x80), RGBU(0x81), RGBU(0x82), RGBU(0x83), - RGBU(0x84), RGBU(0x85), RGBU(0x86), RGBU(0x87), - RGBU(0x88), RGBU(0x89), RGBU(0x8A), RGBU(0x8B), - RGBU(0x8C), RGBU(0x8D), RGBU(0x8E), RGBU(0x8F), - RGBU(0x90), RGBU(0x91), RGBU(0x92), RGBU(0x93), - RGBU(0x94), RGBU(0x95), RGBU(0x96), RGBU(0x97), - RGBU(0x98), RGBU(0x99), RGBU(0x9A), RGBU(0x9B), - RGBU(0x9C), RGBU(0x9D), RGBU(0x9E), RGBU(0x9F), - RGBU(0xA0), RGBU(0xA1), RGBU(0xA2), RGBU(0xA3), - RGBU(0xA4), RGBU(0xA5), RGBU(0xA6), RGBU(0xA7), - RGBU(0xA8), RGBU(0xA9), RGBU(0xAA), RGBU(0xAB), - RGBU(0xAC), RGBU(0xAD), RGBU(0xAE), RGBU(0xAF), - RGBU(0xB0), RGBU(0xB1), RGBU(0xB2), RGBU(0xB3), - RGBU(0xB4), RGBU(0xB5), RGBU(0xB6), RGBU(0xB7), - RGBU(0xB8), RGBU(0xB9), RGBU(0xBA), RGBU(0xBB), - RGBU(0xBC), RGBU(0xBD), RGBU(0xBE), RGBU(0xBF), - RGBU(0xC0), RGBU(0xC1), RGBU(0xC2), RGBU(0xC3), - RGBU(0xC4), RGBU(0xC5), RGBU(0xC6), RGBU(0xC7), - RGBU(0xC8), RGBU(0xC9), RGBU(0xCA), RGBU(0xCB), - RGBU(0xCC), RGBU(0xCD), RGBU(0xCE), RGBU(0xCF), - RGBU(0xD0), RGBU(0xD1), RGBU(0xD2), RGBU(0xD3), - RGBU(0xD4), RGBU(0xD5), RGBU(0xD6), RGBU(0xD7), - RGBU(0xD8), RGBU(0xD9), RGBU(0xDA), RGBU(0xDB), - RGBU(0xDC), RGBU(0xDD), RGBU(0xDE), RGBU(0xDF), - RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3), - RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7), - RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB), - RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF), - RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3), - RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7), - RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB), - RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF), - - // Chroma V table. - RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03), - RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07), - RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B), - RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F), - RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13), - RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17), - RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B), - RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F), - RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23), - RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27), - RGBV(0x28), RGBV(0x29), RGBV(0x2A), RGBV(0x2B), - RGBV(0x2C), RGBV(0x2D), RGBV(0x2E), RGBV(0x2F), - RGBV(0x30), RGBV(0x31), RGBV(0x32), RGBV(0x33), - RGBV(0x34), RGBV(0x35), RGBV(0x36), RGBV(0x37), - RGBV(0x38), RGBV(0x39), RGBV(0x3A), RGBV(0x3B), - RGBV(0x3C), RGBV(0x3D), RGBV(0x3E), RGBV(0x3F), - RGBV(0x40), RGBV(0x41), RGBV(0x42), RGBV(0x43), - RGBV(0x44), RGBV(0x45), RGBV(0x46), RGBV(0x47), - RGBV(0x48), RGBV(0x49), RGBV(0x4A), RGBV(0x4B), - RGBV(0x4C), RGBV(0x4D), RGBV(0x4E), RGBV(0x4F), - RGBV(0x50), RGBV(0x51), RGBV(0x52), RGBV(0x53), - RGBV(0x54), RGBV(0x55), RGBV(0x56), RGBV(0x57), - RGBV(0x58), RGBV(0x59), RGBV(0x5A), RGBV(0x5B), - RGBV(0x5C), RGBV(0x5D), RGBV(0x5E), RGBV(0x5F), - RGBV(0x60), RGBV(0x61), RGBV(0x62), RGBV(0x63), - RGBV(0x64), RGBV(0x65), RGBV(0x66), RGBV(0x67), - RGBV(0x68), RGBV(0x69), RGBV(0x6A), RGBV(0x6B), - RGBV(0x6C), RGBV(0x6D), RGBV(0x6E), RGBV(0x6F), - RGBV(0x70), RGBV(0x71), RGBV(0x72), RGBV(0x73), - RGBV(0x74), RGBV(0x75), RGBV(0x76), RGBV(0x77), - RGBV(0x78), RGBV(0x79), RGBV(0x7A), RGBV(0x7B), - RGBV(0x7C), RGBV(0x7D), RGBV(0x7E), RGBV(0x7F), - RGBV(0x80), RGBV(0x81), RGBV(0x82), RGBV(0x83), - RGBV(0x84), RGBV(0x85), RGBV(0x86), RGBV(0x87), - RGBV(0x88), RGBV(0x89), RGBV(0x8A), RGBV(0x8B), - RGBV(0x8C), RGBV(0x8D), RGBV(0x8E), RGBV(0x8F), - RGBV(0x90), RGBV(0x91), RGBV(0x92), RGBV(0x93), - RGBV(0x94), RGBV(0x95), RGBV(0x96), RGBV(0x97), - RGBV(0x98), RGBV(0x99), RGBV(0x9A), RGBV(0x9B), - RGBV(0x9C), RGBV(0x9D), RGBV(0x9E), RGBV(0x9F), - RGBV(0xA0), RGBV(0xA1), RGBV(0xA2), RGBV(0xA3), - RGBV(0xA4), RGBV(0xA5), RGBV(0xA6), RGBV(0xA7), - RGBV(0xA8), RGBV(0xA9), RGBV(0xAA), RGBV(0xAB), - RGBV(0xAC), RGBV(0xAD), RGBV(0xAE), RGBV(0xAF), - RGBV(0xB0), RGBV(0xB1), RGBV(0xB2), RGBV(0xB3), - RGBV(0xB4), RGBV(0xB5), RGBV(0xB6), RGBV(0xB7), - RGBV(0xB8), RGBV(0xB9), RGBV(0xBA), RGBV(0xBB), - RGBV(0xBC), RGBV(0xBD), RGBV(0xBE), RGBV(0xBF), - RGBV(0xC0), RGBV(0xC1), RGBV(0xC2), RGBV(0xC3), - RGBV(0xC4), RGBV(0xC5), RGBV(0xC6), RGBV(0xC7), - RGBV(0xC8), RGBV(0xC9), RGBV(0xCA), RGBV(0xCB), - RGBV(0xCC), RGBV(0xCD), RGBV(0xCE), RGBV(0xCF), - RGBV(0xD0), RGBV(0xD1), RGBV(0xD2), RGBV(0xD3), - RGBV(0xD4), RGBV(0xD5), RGBV(0xD6), RGBV(0xD7), - RGBV(0xD8), RGBV(0xD9), RGBV(0xDA), RGBV(0xDB), - RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF), - RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3), - RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7), - RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB), - RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF), - RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3), - RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7), - RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB), - RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF), -}; - -#undef RGBY -#undef RGBU -#undef RGBV - -} // extern "C" diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp deleted file mode 100644 index 5cd931139..000000000 --- a/gfx/ycbcr/yuv_row_win.cpp +++ /dev/null @@ -1,498 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "yuv_row.h" -#include "mozilla/SSE.h" - -#define kCoefficientsRgbU kCoefficientsRgbY + 2048 -#define kCoefficientsRgbV kCoefficientsRgbY + 4096 - -extern "C" { - -#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86) -__declspec(naked) -void FastConvertYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - jmp convertend - - convertloop : - movzx eax, byte ptr [edi] - add edi, 1 - movzx ebx, byte ptr [esi] - add esi, 1 - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [edx] - paddsw mm0, [kCoefficientsRgbV + 8 * ebx] - movzx ebx, byte ptr [edx + 1] - movq mm1, [kCoefficientsRgbY + 8 * eax] - add edx, 2 - movq mm2, [kCoefficientsRgbY + 8 * ebx] - paddsw mm1, mm0 - paddsw mm2, mm0 - psraw mm1, 6 - psraw mm2, 6 - packuswb mm1, mm2 - movntq [ebp], mm1 - add ebp, 8 - convertend : - sub ecx, 2 - jns convertloop - - and ecx, 1 // odd number of pixels? - jz convertdone - - movzx eax, byte ptr [edi] - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [esi] - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - movzx eax, byte ptr [edx] - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ebp], mm1 - convertdone : - - popad - ret - } -} - -__declspec(naked) -void ConvertYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int step) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - mov ebx, [esp + 32 + 24] // step - jmp wend - - wloop : - movzx eax, byte ptr [edi] - add edi, ebx - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [esi] - add esi, ebx - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - movzx eax, byte ptr [edx] - add edx, ebx - movq mm1, [kCoefficientsRgbY + 8 * eax] - movzx eax, byte ptr [edx] - add edx, ebx - movq mm2, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - paddsw mm2, mm0 - psraw mm1, 6 - psraw mm2, 6 - packuswb mm1, mm2 - movntq [ebp], mm1 - add ebp, 8 - wend : - sub ecx, 2 - jns wloop - - and ecx, 1 // odd number of pixels? - jz wdone - - movzx eax, byte ptr [edi] - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [esi] - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - movzx eax, byte ptr [edx] - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ebp], mm1 - wdone : - - popad - ret - } -} - -__declspec(naked) -void RotateConvertYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int ystep, - int uvstep) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - jmp wend - - wloop : - movzx eax, byte ptr [edi] - mov ebx, [esp + 32 + 28] // uvstep - add edi, ebx - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [esi] - add esi, ebx - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - movzx eax, byte ptr [edx] - mov ebx, [esp + 32 + 24] // ystep - add edx, ebx - movq mm1, [kCoefficientsRgbY + 8 * eax] - movzx eax, byte ptr [edx] - add edx, ebx - movq mm2, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - paddsw mm2, mm0 - psraw mm1, 6 - psraw mm2, 6 - packuswb mm1, mm2 - movntq [ebp], mm1 - add ebp, 8 - wend : - sub ecx, 2 - jns wloop - - and ecx, 1 // odd number of pixels? - jz wdone - - movzx eax, byte ptr [edi] - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [esi] - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - movzx eax, byte ptr [edx] - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ebp], mm1 - wdone : - - popad - ret - } -} - -__declspec(naked) -void DoubleYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - jmp wend - - wloop : - movzx eax, byte ptr [edi] - add edi, 1 - movzx ebx, byte ptr [esi] - add esi, 1 - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [edx] - paddsw mm0, [kCoefficientsRgbV + 8 * ebx] - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - punpckldq mm1, mm1 - movntq [ebp], mm1 - - movzx ebx, byte ptr [edx + 1] - add edx, 2 - paddsw mm0, [kCoefficientsRgbY + 8 * ebx] - psraw mm0, 6 - packuswb mm0, mm0 - punpckldq mm0, mm0 - movntq [ebp+8], mm0 - add ebp, 16 - wend : - sub ecx, 4 - jns wloop - - add ecx, 4 - jz wdone - - movzx eax, byte ptr [edi] - movq mm0, [kCoefficientsRgbU + 8 * eax] - movzx eax, byte ptr [esi] - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - movzx eax, byte ptr [edx] - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - jmp wend1 - - wloop1 : - movd [ebp], mm1 - add ebp, 4 - wend1 : - sub ecx, 1 - jns wloop1 - wdone : - popad - ret - } -} - -// This version does general purpose scaling by any amount, up or down. -// The only thing it cannot do is rotation by 90 or 270. -// For performance the chroma is under-sampled, reducing cost of a 3x -// 1080p scale from 8.4 ms to 5.4 ms. -__declspec(naked) -void ScaleYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - mov esi, [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - xor ebx, ebx // x - jmp scaleend - - scaleloop : - mov eax, ebx - sar eax, 17 - movzx eax, byte ptr [edi + eax] - movq mm0, [kCoefficientsRgbU + 8 * eax] - mov eax, ebx - sar eax, 17 - movzx eax, byte ptr [esi + eax] - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - mov eax, ebx - add ebx, [esp + 32 + 24] // x += source_dx - sar eax, 16 - movzx eax, byte ptr [edx + eax] - movq mm1, [kCoefficientsRgbY + 8 * eax] - mov eax, ebx - add ebx, [esp + 32 + 24] // x += source_dx - sar eax, 16 - movzx eax, byte ptr [edx + eax] - movq mm2, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - paddsw mm2, mm0 - psraw mm1, 6 - psraw mm2, 6 - packuswb mm1, mm2 - movntq [ebp], mm1 - add ebp, 8 - scaleend : - sub ecx, 2 - jns scaleloop - - and ecx, 1 // odd number of pixels? - jz scaledone - - mov eax, ebx - sar eax, 17 - movzx eax, byte ptr [edi + eax] - movq mm0, [kCoefficientsRgbU + 8 * eax] - mov eax, ebx - sar eax, 17 - movzx eax, byte ptr [esi + eax] - paddsw mm0, [kCoefficientsRgbV + 8 * eax] - mov eax, ebx - sar eax, 16 - movzx eax, byte ptr [edx + eax] - movq mm1, [kCoefficientsRgbY + 8 * eax] - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ebp], mm1 - - scaledone : - popad - ret - } -} - -__declspec(naked) -void LinearScaleYUVToRGB32Row_SSE(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - __asm { - pushad - mov edx, [esp + 32 + 4] // Y - mov edi, [esp + 32 + 8] // U - // [esp + 32 + 12] // V - mov ebp, [esp + 32 + 16] // rgb - mov ecx, [esp + 32 + 20] // width - imul ecx, [esp + 32 + 24] // source_dx - mov [esp + 32 + 20], ecx // source_width = width * source_dx - mov ecx, [esp + 32 + 24] // source_dx - xor ebx, ebx // x = 0 - cmp ecx, 0x20000 - jl lscaleend - mov ebx, 0x8000 // x = 0.5 for 1/2 or less - jmp lscaleend -lscaleloop: - mov eax, ebx - sar eax, 0x11 - - movzx ecx, byte ptr [edi + eax] - movzx esi, byte ptr [edi + eax + 1] - mov eax, ebx - and eax, 0x1fffe - imul esi, eax - xor eax, 0x1fffe - imul ecx, eax - add ecx, esi - shr ecx, 17 - movq mm0, [kCoefficientsRgbU + 8 * ecx] - - mov esi, [esp + 32 + 12] - mov eax, ebx - sar eax, 0x11 - - movzx ecx, byte ptr [esi + eax] - movzx esi, byte ptr [esi + eax + 1] - mov eax, ebx - and eax, 0x1fffe - imul esi, eax - xor eax, 0x1fffe - imul ecx, eax - add ecx, esi - shr ecx, 17 - paddsw mm0, [kCoefficientsRgbV + 8 * ecx] - - mov eax, ebx - sar eax, 0x10 - movzx ecx, byte ptr [edx + eax] - movzx esi, byte ptr [1 + edx + eax] - mov eax, ebx - add ebx, [esp + 32 + 24] - and eax, 0xffff - imul esi, eax - xor eax, 0xffff - imul ecx, eax - add ecx, esi - shr ecx, 16 - movq mm1, [kCoefficientsRgbY + 8 * ecx] - - cmp ebx, [esp + 32 + 20] - jge lscalelastpixel - - mov eax, ebx - sar eax, 0x10 - movzx ecx, byte ptr [edx + eax] - movzx esi, byte ptr [edx + eax + 1] - mov eax, ebx - add ebx, [esp + 32 + 24] - and eax, 0xffff - imul esi, eax - xor eax, 0xffff - imul ecx, eax - add ecx, esi - shr ecx, 16 - movq mm2, [kCoefficientsRgbY + 8 * ecx] - - paddsw mm1, mm0 - paddsw mm2, mm0 - psraw mm1, 0x6 - psraw mm2, 0x6 - packuswb mm1, mm2 - movntq [ebp], mm1 - add ebp, 0x8 - -lscaleend: - cmp ebx, [esp + 32 + 20] - jl lscaleloop - popad - ret - -lscalelastpixel: - paddsw mm1, mm0 - psraw mm1, 6 - packuswb mm1, mm1 - movd [ebp], mm1 - popad - ret - }; -} -#endif // if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86) - -void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { -#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86) - if (mozilla::supports_sse()) { - FastConvertYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width); - return; - } -#endif - - FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1); -} - -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - -#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86) - if (mozilla::supports_sse()) { - ScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); - return; - } -#endif - - ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); -} - -void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { -#if defined(MOZILLA_MAY_SUPPORT_SSE) && defined(_M_IX86) - if (mozilla::supports_sse()) { - LinearScaleYUVToRGB32Row_SSE(y_buf, u_buf, v_buf, rgb_buf, width, - source_dx); - return; - } -#endif - - LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); -} - -} // extern "C" diff --git a/gfx/ycbcr/yuv_row_win64.cpp b/gfx/ycbcr/yuv_row_win64.cpp deleted file mode 100644 index 6a34f840a..000000000 --- a/gfx/ycbcr/yuv_row_win64.cpp +++ /dev/null @@ -1,205 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "yuv_row.h" - -extern "C" { - -// x64 compiler doesn't support MMX and inline assembler. Use SSE2 intrinsics. - -#define kCoefficientsRgbU (reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 2048) -#define kCoefficientsRgbV (reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 4096) - -#include <emmintrin.h> - -static void FastConvertYUVToRGB32Row_SSE2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { - __m128i xmm0, xmmY1, xmmY2; - __m128 xmmY; - - while (width >= 2) { - xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbU + 8 * *u_buf++)), - _mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbV + 8 * *v_buf++))); - - xmmY1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * *y_buf++)); - xmmY1 = _mm_adds_epi16(xmmY1, xmm0); - - xmmY2 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * *y_buf++)); - xmmY2 = _mm_adds_epi16(xmmY2, xmm0); - - xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2), - 0x44); - xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6); - xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); - - _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1); - rgb_buf += 8; - width -= 2; - } - - if (width) { - xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbU + 8 * *u_buf)), - _mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbV + 8 * *v_buf))); - xmmY1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * *y_buf)); - xmmY1 = _mm_adds_epi16(xmmY1, xmm0); - xmmY1 = _mm_srai_epi16(xmmY1, 6); - xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); - *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1); - } -} - -static void ScaleYUVToRGB32Row_SSE2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - __m128i xmm0, xmmY1, xmmY2; - __m128 xmmY; - uint8 u, v, y; - int x = 0; - - while (width >= 2) { - u = u_buf[x >> 17]; - v = v_buf[x >> 17]; - y = y_buf[x >> 16]; - x += source_dx; - - xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbU + 8 * u)), - _mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbV + 8 * v))); - xmmY1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * y)); - xmmY1 = _mm_adds_epi16(xmmY1, xmm0); - - y = y_buf[x >> 16]; - x += source_dx; - - xmmY2 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * y)); - xmmY2 = _mm_adds_epi16(xmmY2, xmm0); - - xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2), - 0x44); - xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6); - xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); - - _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1); - rgb_buf += 8; - width -= 2; - } - - if (width) { - u = u_buf[x >> 17]; - v = v_buf[x >> 17]; - y = y_buf[x >> 16]; - - xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbU + 8 * u)), - _mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbV + 8 * v))); - xmmY1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * y)); - xmmY1 = _mm_adds_epi16(xmmY1, xmm0); - xmmY1 = _mm_srai_epi16(xmmY1, 6); - xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); - *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1); - } -} - -static void LinearScaleYUVToRGB32Row_SSE2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - __m128i xmm0, xmmY1, xmmY2; - __m128 xmmY; - uint8 u0, u1, v0, v1, y0, y1; - uint32 uv_frac, y_frac, u, v, y; - int x = 0; - - if (source_dx >= 0x20000) { - x = 32768; - } - - while(width >= 2) { - u0 = u_buf[x >> 17]; - u1 = u_buf[(x >> 17) + 1]; - v0 = v_buf[x >> 17]; - v1 = v_buf[(x >> 17) + 1]; - y0 = y_buf[x >> 16]; - y1 = y_buf[(x >> 16) + 1]; - uv_frac = (x & 0x1fffe); - y_frac = (x & 0xffff); - u = (uv_frac * u1 + (uv_frac ^ 0x1fffe) * u0) >> 17; - v = (uv_frac * v1 + (uv_frac ^ 0x1fffe) * v0) >> 17; - y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16; - x += source_dx; - - xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbU + 8 * u)), - _mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbV + 8 * v))); - xmmY1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * y)); - xmmY1 = _mm_adds_epi16(xmmY1, xmm0); - - y0 = y_buf[x >> 16]; - y1 = y_buf[(x >> 16) + 1]; - y_frac = (x & 0xffff); - y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16; - x += source_dx; - - xmmY2 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * y)); - xmmY2 = _mm_adds_epi16(xmmY2, xmm0); - - xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2), - 0x44); - xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6); - xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); - - _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1); - rgb_buf += 8; - width -= 2; - } - - if (width) { - u = u_buf[x >> 17]; - v = v_buf[x >> 17]; - y = y_buf[x >> 16]; - - xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbU + 8 * u)), - _mm_loadl_epi64(reinterpret_cast<const __m128i*>(kCoefficientsRgbV + 8 * v))); - xmmY1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(reinterpret_cast<const uint8*>(kCoefficientsRgbY) + 8 * y)); - - xmmY1 = _mm_adds_epi16(xmmY1, xmm0); - xmmY1 = _mm_srai_epi16(xmmY1, 6); - xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); - *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1); - } -} - -void FastConvertYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { - FastConvertYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width); -} - -void ScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - ScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); -} - -void LinearScaleYUVToRGB32Row(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width, - int source_dx) { - LinearScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width, - source_dx); -} - -} // extern "C" |